~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
1

[PATCH hautils] uniq: new command

Details
Message ID
<20220425093045.10478-1-amk@amk.ie>
DKIM signature
missing
Download raw message
Patch: +162 -0
This implements most of uniq excluding the `-f` flag

spec: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html
---
 .gitignore |   1 +
 Makefile   |   2 +
 uniq.ha    | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 uniq.ha

diff --git a/.gitignore b/.gitignore
index 9e1875f..757c334 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,5 @@ sleep
tee
true
uname
uniq
wc
diff --git a/Makefile b/Makefile
index 5062904..c568580 100644
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,7 @@ utils=\
	tee \
	true \
	uname \
	uniq \
	wc

all: $(utils)
@@ -43,4 +44,5 @@ sleep: sleep.ha main/main.ha
tee: tee.ha main/main.ha
true: true.ha
uname: uname.ha main/main.ha
uniq: uniq.ha main/main.ha
wc: wc.ha main/main.ha
diff --git a/uniq.ha b/uniq.ha
new file mode 100644
index 0000000..7699fda
--- /dev/null
+++ b/uniq.ha
@@ -0,0 +1,159 @@
use fmt;
use strings;
use bufio;
use main;
use os;
use io;
use getopt;
use strconv;
use fs;

type config = struct {
	showcount: bool,
	removesingletons: bool,
	skipchars: uint,
	onlyunique: bool,

	input: io::handle,
	output: io::handle,
};

fn maybeprint(cfg: *config, line: str, count: int) void = {
	if (count == 1 && cfg.removesingletons) {
		return;
	};
	if (count != 1 && cfg.onlyunique) {
		return;
	};
	if (cfg.showcount) {
		fmt::fprintf(cfg.output, "{} {}\n", count, line)!;
		return;
	};
	fmt::fprintln(cfg.output, line)!;
};

fn comparisonstring(cfg: *config, line: str) str = {
	if (cfg.skipchars > len(line)) {
		return "";
	};
	return strings::sub(line, cfg.skipchars, strings::end);
};

fn readline(input: io::handle) (str | io::error | io::EOF) = {
	const rawline = match (bufio::scanline(input)) {
	case let err: io::error =>
		return err;
	case io::EOF =>
		return io::EOF;
	case let rawline: []u8 =>
		yield rawline;
	};
	defer free(rawline);
	return strings::dup(strings::fromutf8(rawline));
};

fn uniq(cfg: *config)  (main::error | void) =  {
	let lastline = match(readline(cfg.input)) {
	case let err: io::error =>
		return err;
	case io::EOF =>
		return void;
	case let s: str =>
	     yield s;
	};
	defer free(lastline);
	let count = 1;
	for(true) {
		const line = match(readline(cfg.input)) {
		case let err: io::error =>
			return err;
		case io::EOF =>
			maybeprint(cfg, lastline, count);
			return void;
		case let s: str =>
			yield s;
		};

		let oldcomparison = comparisonstring(cfg, lastline);
		let newcomparison = comparisonstring(cfg, line);
		if (strings::compare(oldcomparison, newcomparison) != 0) {
			maybeprint(cfg, lastline, count);
			free(lastline);
			lastline = strings::dup(line);
			count = 1;
			continue;
		};
		count += 1;
	};
};

export fn utilmain() (main::error | void) = {
	const help: []getopt::help = [
		"report or filter out repeated lines in a file",
		('c', "prefix line with occurance count"),
		('d', "remove lines without duplicates"),
		('f', "fields", "skip comparing the first N fields"),
		('s', "chars", "skip the first N characters"),
		('u', "remove lines with duplicates"),
		"[input file [output file]]",
	];
	const cmd = getopt::parse(os::args, help...);
	defer getopt::finish(&cmd);

	let cfg = &config{...};

	for (let i = 0z; i < len(cmd.opts); i += 1) {
		const opt = cmd.opts[i];
		switch (opt.0) {
		case 'c' =>
			cfg.showcount = true;
		case 'd' =>
			cfg.removesingletons = true;
		case 'f' =>
			fmt::fatal("Unimplemented");
		case 's' =>
			cfg.skipchars = match (strconv::stou(opt.1)) {
			case (strconv::invalid | strconv::overflow) =>
				getopt::printusage(os::stderr, os::args[0], help);
				fmt::fatal("Error: invalid argument for -s");
			case let skipchars: uint =>
				yield skipchars;
			};
		case 'u' =>
			cfg.onlyunique = true;
		case => abort();
		};
	};

	const stdin_rbuf: [os::BUFSIZ]u8 = [0...];
	const stdin_wbuf: [os::BUFSIZ]u8 = [0...];
	cfg.input = os::stdin;
	if (len(cmd.args) >= 1 && cmd.args[0] != "-") {
		match (os::open(cmd.args[0])) {
		case let err: fs::error =>
			getopt::printusage(os::stderr, os::args[0], help);
			fmt::fatal("Error opening '{}': {}",
				cmd.args[0], fs::strerror(err));
		case let file: io::file =>
			cfg.input = &bufio::buffered(file, stdin_rbuf, stdin_wbuf);
		};
	};
	defer io::close(cfg.input);

	cfg.output = os::stdout;
	const stdout_rbuf: [os::BUFSIZ]u8 = [0...];
	const stdout_wbuf: [os::BUFSIZ]u8 = [0...];
	if (len(cmd.args) == 2) {
		match (os::create(cmd.args[1], 0o666, fs::flags::WRONLY)) {
		case let err: fs::error =>
			getopt::printusage(os::stderr, os::args[0], help);
			fmt::fatal("Error opening '{}': {}",
				cmd.args[1], fs::strerror(err));
		case let file: io::file =>
			cfg.output = &bufio::buffered(file, stdout_rbuf, stdout_wbuf);
		};
	};
	defer io::close(cfg.output);

	uniq(cfg)?;
};
-- 
2.36.0
Details
Message ID
<CJJ8JNJLVUCY.YULQLECRJ70Q@desktop>
In-Reply-To
<20220425093045.10478-1-amk@amk.ie> (view parent)
DKIM signature
missing
Download raw message
On Mon Apr 25, 2022 at 12:30 PM MSK, Alex McGrath wrote:
> +		let oldcomparison = comparisonstring(cfg, lastline);
> +		let newcomparison = comparisonstring(cfg, line);
> +		if (strings::compare(oldcomparison, newcomparison) != 0) {

This can be simplified to:

	if (oldcomparison != newcomparison) { ... };

> +	const stdin_rbuf: [os::BUFSIZ]u8 = [0...];
> +	const stdin_wbuf: [os::BUFSIZ]u8 = [0...];

The buffers are very heavy on stack usage, you should probably make them
static:

	static const stdin_rbuf: [os::BUFSIZ]u8 = [0...];
	static const stdin_wbuf: [os::BUFSIZ]u8 = [0...];

> +	const stdout_rbuf: [os::BUFSIZ]u8 = [0...];
> +	const stdout_wbuf: [os::BUFSIZ]u8 = [0...];

Same issue with buffers as above.
Reply to thread Export thread (mbox)