---
.gitignore | 1 +
Makefile | 1 +
cut.ha | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 239 insertions(+)
create mode 100644 cut.ha
diff --git a/.gitignore b/.gitignore
index bfa47d4..a003319 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
basename
cat
+cut
dirname
env
false
diff --git a/Makefile b/Makefile
index ec9316d..9c44d41 100644
--- a/Makefile
+++ b/Makefile
@@ -6,6 +6,7 @@ HAREFLAGS=
utils=\
basename \
cat \
+ cut \
dirname \
env \
false \
diff --git a/cut.ha b/cut.ha
new file mode 100644
index 0000000..0e07021
--- /dev/null
+++ b/cut.ha
@@ -0,0 +1,237 @@
+use bufio;
+use encoding::utf8;
+use fmt;
+use fs;
+use getopt;
+use io;
+use main;
+use os;
+use strconv;
+use strings;
+use types;
+
+type open_start = uint;
+type open_end = uint;
+type position = (uint | (uint, uint) | open_start | open_end);
+
+export fn utilmain() (main::error | void) = {
+ const cmd = getopt::parse(os::args,
+ ('b', "LIST", "output only bytes from LIST"),
+ ('c', "LIST", "output only characters from LIST"),
+ ('d', "SEP", "set the field delimiter"),
+ ('f', "LIST", "output only fields from LIST"),
+ ('n', "do not split characters"),
+ ('s', "output only lines containing delimiter"),
+ "[file...]");
+ defer getopt::finish(&cmd);
+
+ let mode = ' ';
+ let list = "";
+ let delim = '\t';
+ let nosplit = false;
+ let skip_nodelim = false;
+ for (let i = 0z; i < len(cmd.opts); i += 1) {
+ const opt = cmd.opts[i];
+ switch (opt.0) {
+ case 'b', 'c', 'f' =>
+ if (mode != ' ') {
+ fmt::fatal("Only one list may be specified");
+ };
+ mode = opt.0;
+ list = opt.1;
+ case 'd' =>
+ const runes = strings::torunes(opt.1);
+ if (len(runes) != 1) {
+ fmt::fatal("The delimiter must be a single character");
+ };
+ delim = runes[0];
+ case 'n' =>
+ nosplit = true;
+ case 's' =>
+ skip_nodelim = true;
+ case =>
+ abort();
+ };
+ };
+ if (mode == ' ') {
+ fmt::fatal("One of the -b, -c, or -f options must be specified");
+ };
+
+ list = strings::multireplace(list, (" ", ","), ("\t", ","));
+ defer free(list);
+ let outlist: []position = [];
+ defer free(outlist);
+ const tokenizer = strings::tokenize(list, ",");
+ for (true) {
+ const part = match (strings::next_token(&tokenizer)) {
+ case let s: str =>
+ yield s;
+ case void =>
+ break;
+ };
+ if (strings::hasprefix(part, '-')) {
+ const n = stou(strings::trimprefix(part, "-"));
+ append(outlist, n: open_start);
+ } else if (strings::hassuffix(part, '-')) {
+ const n = stou(strings::trimsuffix(part, "-"));
+ append(outlist, n: open_end);
+ } else if (strings::contains(part, '-')) {
+ const sides = strings::splitn(part, "-", 2);
+ const start = stou(sides[0]);
+ const end = stou(sides[1]);
+ if (start > end) {
+ fmt::fatal("Ranges must be increasing");
+ };
+ append(outlist, (start, end));
+ } else {
+ const n = stou(part);
+ append(outlist, n);
+ };
+ };
+
+ if (len(cmd.args) == 0) {
+ cut(os::stdin, mode, outlist, delim, nosplit, skip_nodelim)?;
+ return;
+ };
+
+ for (let i = 0z; i < len(cmd.args); i += 1z) {
+ const file = open(cmd.args[i]);
+ cut(file, mode, outlist, delim, nosplit, skip_nodelim)?;
+ io::close(file)?;
+ };
+};
+
+fn stou(s: str) uint = {
+ match (strconv::stou(s)) {
+ case let n: uint =>
+ if (n < 1) {
+ fmt::fatal("Byte/char/field positions are indexed from 1");
+ };
+ return n;
+ case =>
+ fmt::fatalf("Invalid byte/char/field position '{}'", s);
+ };
+};
+
+fn open(path: str) io::handle = {
+ if (path == "-") {
+ return os::stdin;
+ };
+
+ match (os::open(path)) {
+ case let file: io::file =>
+ return file;
+ case let err: fs::error =>
+ fmt::fatalf("Error opening '{}': {}", path, fs::strerror(err));
+ };
+};
+
+fn cut(h: io::handle,
+ mode: rune,
+ outlist: []position,
+ delim: rune,
+ nosplit: bool,
+ skip_nodelim: bool
+) (main::error | void) = {
+ const scanner = bufio::newscanner(h, types::SIZE_MAX);
+ let str_delim = strings::fromrunes([delim]);
+ defer free(str_delim);
+ for (true) {
+ const line = match (bufio::scan_line(&scanner)) {
+ case let s: const str =>
+ yield s;
+ case io::EOF =>
+ break;
+ case let err: io::error =>
+ return err;
+ case utf8::invalid =>
+ fmt::fatal("Error: invalid UTF-8");
+ };
+ switch (mode) {
+ case 'b' =>
+ let out: []u8 = [];
+ defer free(out);
+ const bytes = strings::toutf8(line);
+ for (let i = 0z; i < len(bytes); i += 1) {
+ if (test_position(outlist, i)) {
+ append(out, bytes[i]);
+ };
+ };
+ match (strings::fromutf8(out)) {
+ case let s: str =>
+ fmt::println(s)!;
+ case utf8::invalid =>
+ fmt::fatal("Error: invalid UTF-8"); // TODO: handle invalid UTF-8
+ };
+ case 'c' =>
+ let out: []rune = [];
+ defer free(out);
+ const iter = strings::iter(line);
+ for (let i = 0z; true; i += 1) {
+ match (strings::next(&iter)) {
+ case let r: rune =>
+ if (test_position(outlist, i)) {
+ append(out, r);
+ };
+ case void =>
+ break;
+ };
+ };
+ const s = strings::fromrunes(out);
+ defer free(s);
+ fmt::println(s)!;
+ case 'f' =>
+ if (!strings::contains(line, str_delim)) {
+ if (!skip_nodelim) {
+ fmt::println(line)!;
+ };
+ continue;
+ };
+ let tokenizer = strings::tokenize(line, str_delim);
+ let fields: []str = [];
+ defer free(fields);
+ for (let i = 0z; true; i += 1) {
+ const field = match (strings::next_token(&tokenizer)) {
+ case let s: str =>
+ yield s;
+ case void =>
+ break;
+ };
+ if (test_position(outlist, i)) {
+ append(fields, field);
+ };
+ };
+ const joined = strings::join(str_delim, fields...);
+ defer free(joined);
+ fmt::println(joined)!;
+ case =>
+ abort();
+ };
+ };
+};
+
+// Check if a given index should be outputted for a list of positions
+fn test_position(outlist: []position, pos: size) bool = {
+ pos += 1;
+ for (let i = 0z; i < len(outlist); i += 1) {
+ match (outlist[i]) {
+ case let n: open_start =>
+ if (pos <= n) {
+ return true;
+ };
+ case let n: open_end =>
+ if (pos >= n) {
+ return true;
+ };
+ case let range: (uint, uint) =>
+ if (range.0 <= pos && pos <= range.1) {
+ return true;
+ };
+ case let n: uint =>
+ if (pos == n) {
+ return true;
+ };
+ };
+ };
+ return false;
+};
--
2.42.1