This implements most of uniq excluding the `-f` flag
spec: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html
---
Resolves some comments (static buffers, strings::compare -> ==)
.gitignore | 1 +
Makefile | 2 +
uniq.ha | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 162 insertions(+)
create mode 100644 uniq.ha
diff --git a/.gitignore b/.gitignore
index 9e1875f..757c334 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,5 @@ sleep
tee
true
uname
+uniq
wc
diff --git a/Makefile b/Makefile
index 5062904..c568580 100644
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,7 @@ utils=\
tee \
true \
uname \
+ uniq \
wc
all: $(utils)
@@ -43,4 +44,5 @@ sleep: sleep.ha main/main.ha
tee: tee.ha main/main.ha
true: true.ha
uname: uname.ha main/main.ha
+uniq: uniq.ha main/main.ha
wc: wc.ha main/main.ha
diff --git a/uniq.ha b/uniq.ha
new file mode 100644
index 0000000..114817f
--- /dev/null
+++ b/uniq.ha
@@ -0,0 +1,159 @@
+use fmt;
+use strings;
+use bufio;
+use main;
+use os;
+use io;
+use getopt;
+use strconv;
+use fs;
+
+type config = struct {
+ showcount: bool,
+ removesingletons: bool,
+ skipchars: uint,
+ onlyunique: bool,
+
+ input: io::handle,
+ output: io::handle,
+};
+
+fn maybeprint(cfg: *config, line: str, count: int) void = {
+ if (count == 1 && cfg.removesingletons) {
+ return;
+ };
+ if (count != 1 && cfg.onlyunique) {
+ return;
+ };
+ if (cfg.showcount) {
+ fmt::fprintf(cfg.output, "{} {}\n", count, line)!;
+ return;
+ };
+ fmt::fprintln(cfg.output, line)!;
+};
+
+fn comparisonstring(cfg: *config, line: str) str = {
+ if (cfg.skipchars > len(line)) {
+ return "";
+ };
+ return strings::sub(line, cfg.skipchars, strings::end);
+};
+
+fn readline(input: io::handle) (str | io::error | io::EOF) = {
+ const rawline = match (bufio::scanline(input)) {
+ case let err: io::error =>
+ return err;
+ case io::EOF =>
+ return io::EOF;
+ case let rawline: []u8 =>
+ yield rawline;
+ };
+ defer free(rawline);
+ return strings::dup(strings::fromutf8(rawline));
+};
+
+fn uniq(cfg: *config) (main::error | void) = {
+ let lastline = match(readline(cfg.input)) {
+ case let err: io::error =>
+ return err;
+ case io::EOF =>
+ return void;
+ case let s: str =>
+ yield s;
+ };
+ defer free(lastline);
+ let count = 1;
+ for(true) {
+ const line = match(readline(cfg.input)) {
+ case let err: io::error =>
+ return err;
+ case io::EOF =>
+ maybeprint(cfg, lastline, count);
+ return void;
+ case let s: str =>
+ yield s;
+ };
+
+ let oldcomparison = comparisonstring(cfg, lastline);
+ let newcomparison = comparisonstring(cfg, line);
+ if (oldcomparison != newcomparison) {
+ maybeprint(cfg, lastline, count);
+ free(lastline);
+ lastline = strings::dup(line);
+ count = 1;
+ continue;
+ };
+ count += 1;
+ };
+};
+
+export fn utilmain() (main::error | void) = {
+ const help: []getopt::help = [
+ "report or filter out repeated lines in a file",
+ ('c', "prefix line with occurance count"),
+ ('d', "remove lines without duplicates"),
+ ('f', "fields", "skip comparing the first N fields"),
+ ('s', "chars", "skip the first N characters"),
+ ('u', "remove lines with duplicates"),
+ "[input file [output file]]",
+ ];
+ const cmd = getopt::parse(os::args, help...);
+ defer getopt::finish(&cmd);
+
+ let cfg = &config{...};
+
+ for (let i = 0z; i < len(cmd.opts); i += 1) {
+ const opt = cmd.opts[i];
+ switch (opt.0) {
+ case 'c' =>
+ cfg.showcount = true;
+ case 'd' =>
+ cfg.removesingletons = true;
+ case 'f' =>
+ fmt::fatal("Unimplemented");
+ case 's' =>
+ cfg.skipchars = match (strconv::stou(opt.1)) {
+ case (strconv::invalid | strconv::overflow) =>
+ getopt::printusage(os::stderr, os::args[0], help);
+ fmt::fatal("Error: invalid argument for -s");
+ case let skipchars: uint =>
+ yield skipchars;
+ };
+ case 'u' =>
+ cfg.onlyunique = true;
+ case => abort();
+ };
+ };
+
+ static const stdin_rbuf: [os::BUFSIZ]u8 = [0...];
+ static const stdin_wbuf: [os::BUFSIZ]u8 = [0...];
+ cfg.input = os::stdin;
+ if (len(cmd.args) >= 1 && cmd.args[0] != "-") {
+ match (os::open(cmd.args[0])) {
+ case let err: fs::error =>
+ getopt::printusage(os::stderr, os::args[0], help);
+ fmt::fatal("Error opening '{}': {}",
+ cmd.args[0], fs::strerror(err));
+ case let file: io::file =>
+ cfg.input = &bufio::buffered(file, stdin_rbuf, stdin_wbuf);
+ };
+ };
+ defer io::close(cfg.input);
+
+ cfg.output = os::stdout;
+ static const stdout_rbuf: [os::BUFSIZ]u8 = [0...];
+ static const stdout_wbuf: [os::BUFSIZ]u8 = [0...];
+ if (len(cmd.args) == 2) {
+ match (os::create(cmd.args[1], 0o666, fs::flags::WRONLY)) {
+ case let err: fs::error =>
+ getopt::printusage(os::stderr, os::args[0], help);
+ fmt::fatal("Error opening '{}': {}",
+ cmd.args[1], fs::strerror(err));
+ case let file: io::file =>
+ cfg.output = &bufio::buffered(file, stdout_rbuf, stdout_wbuf);
+ };
+ };
+ defer io::close(cfg.output);
+
+ uniq(cfg)?;
+};
--
2.36.0
Sorry for the second review round, I was pretty tired when doing the
first one and missed a few things. Hope you're not annoyed by this.
On Mon Apr 25, 2022 at 2:56 PM MSK, Alex McGrath wrote:
>+ fmt::fprintf(cfg.output, "{} {}\n", count, line)!;
Tip: you can suffix the fmt function with ln to make it add a newline:
fmt::fprintfln(cfg.output, "{} {}", count, line)!;
>+ let lastline = match(readline(cfg.input)) {
>+ case let err: io::error =>
>+ return err;
>+ case io::EOF =>
>+ return void;
This can be just 'return;', void is the implicit return value.
>+ const line = match(readline(cfg.input)) {
>+ case let err: io::error =>
>+ return err;
>+ case io::EOF =>
>+ maybeprint(cfg, lastline, count);
>+ return void;
And this one as well.
> + let cfg = &config{...};
This notation can be a little bit confusing, i think it's better to
initializer to as 'let cfg = config { ... };' and then take its address
when necessary.
> + uniq(cfg)?;
For example, this would become uniq(&cfg)
On Mon Apr 25, 2022 at 5:26 PM UTC, Alexey Yerin wrote:
> On Mon Apr 25, 2022 at 2:56 PM MSK, Alex McGrath wrote:
> >+ fmt::fprintf(cfg.output, "{} {}\n", count, line)!;
>
> Tip: you can suffix the fmt function with ln to make it add a newline:
>
> fmt::fprintfln(cfg.output, "{} {}", count, line)!;
In addition, since you're just printing out space-separated values, you
can avoid the format string using fprintln:
fmt::fprintln(cfg.output, count, line)!;