~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
1

[PATCH] cut: new command

Details
Message ID
<20231121231936.5860-1-sam@samnystrom.dev>
DKIM signature
missing
Download raw message
Patch: +239 -0
---
 .gitignore |   1 +
 Makefile   |   1 +
 cut.ha     | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 239 insertions(+)
 create mode 100644 cut.ha

diff --git a/.gitignore b/.gitignore
index bfa47d4..a003319 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
basename
cat
cut
dirname
env
false
diff --git a/Makefile b/Makefile
index ec9316d..9c44d41 100644
--- a/Makefile
+++ b/Makefile
@@ -6,6 +6,7 @@ HAREFLAGS=
utils=\
	basename \
	cat \
	cut \
	dirname \
	env \
	false \
diff --git a/cut.ha b/cut.ha
new file mode 100644
index 0000000..0e07021
--- /dev/null
+++ b/cut.ha
@@ -0,0 +1,237 @@
use bufio;
use encoding::utf8;
use fmt;
use fs;
use getopt;
use io;
use main;
use os;
use strconv;
use strings;
use types;

type open_start = uint;
type open_end = uint;
type position = (uint | (uint, uint) | open_start | open_end);

export fn utilmain() (main::error | void) = {
	const cmd = getopt::parse(os::args,
		('b', "LIST", "output only bytes from LIST"),
		('c', "LIST", "output only characters from LIST"),
		('d', "SEP", "set the field delimiter"),
		('f', "LIST", "output only fields from LIST"),
		('n', "do not split characters"),
		('s', "output only lines containing delimiter"),
		"[file...]");
	defer getopt::finish(&cmd);

	let mode = ' ';
	let list = "";
	let delim = '\t';
	let nosplit = false;
	let skip_nodelim = false;
	for (let i = 0z; i < len(cmd.opts); i += 1) {
		const opt = cmd.opts[i];
		switch (opt.0) {
		case 'b', 'c', 'f' =>
			if (mode != ' ') {
				fmt::fatal("Only one list may be specified");
			};
			mode = opt.0;
			list = opt.1;
		case 'd' =>
			const runes = strings::torunes(opt.1);
			if (len(runes) != 1) {
				fmt::fatal("The delimiter must be a single character");
			};
			delim = runes[0];
		case 'n' =>
			nosplit = true;
		case 's' =>
			skip_nodelim = true;
		case =>
			abort();
		};
	};
	if (mode == ' ') {
		fmt::fatal("One of the -b, -c, or -f options must be specified");
	};

	list = strings::multireplace(list, (" ", ","), ("\t", ","));
	defer free(list);
	let outlist: []position = [];
	defer free(outlist);
	const tokenizer = strings::tokenize(list, ",");
	for (true) {
		const part = match (strings::next_token(&tokenizer)) {
		case let s: str =>
			yield s;
		case void =>
			break;
		};
		if (strings::hasprefix(part, '-')) {
			const n = stou(strings::trimprefix(part, "-"));
			append(outlist, n: open_start);
		} else if (strings::hassuffix(part, '-')) {
			const n = stou(strings::trimsuffix(part, "-"));
			append(outlist, n: open_end);
		} else if (strings::contains(part, '-')) {
			const sides = strings::splitn(part, "-", 2);
			const start = stou(sides[0]);
			const end = stou(sides[1]);
			if (start > end) {
				fmt::fatal("Ranges must be increasing");
			};
			append(outlist, (start, end));
		} else {
			const n = stou(part);
			append(outlist, n);
		};
	};

	if (len(cmd.args) == 0) {
		cut(os::stdin, mode, outlist, delim, nosplit, skip_nodelim)?;
		return;
	};

	for (let i = 0z; i < len(cmd.args); i += 1z) {
		const file = open(cmd.args[i]);
		cut(file, mode, outlist, delim, nosplit, skip_nodelim)?;
		io::close(file)?;
	};
};

fn stou(s: str) uint = {
	match (strconv::stou(s)) {
	case let n: uint =>
		if (n < 1) {
			fmt::fatal("Byte/char/field positions are indexed from 1");
		};
		return n;
	case =>
		fmt::fatalf("Invalid byte/char/field position '{}'", s);
	};
};

fn open(path: str) io::handle = {
	if (path == "-") {
		return os::stdin;
	};

	match (os::open(path)) {
	case let file: io::file =>
		return file;
	case let err: fs::error =>
		fmt::fatalf("Error opening '{}': {}", path, fs::strerror(err));
	};
};

fn cut(h: io::handle,
	mode: rune,
	outlist: []position,
	delim: rune,
	nosplit: bool,
	skip_nodelim: bool
) (main::error | void) = {
	const scanner = bufio::newscanner(h, types::SIZE_MAX);
	let str_delim = strings::fromrunes([delim]);
	defer free(str_delim);
	for (true) {
		const line = match (bufio::scan_line(&scanner)) {
		case let s: const str =>
			yield s;
		case io::EOF =>
			break;
		case let err: io::error =>
			return err;
		case utf8::invalid =>
			fmt::fatal("Error: invalid UTF-8");
		};
		switch (mode) {
		case 'b' =>
			let out: []u8 = [];
			defer free(out);
			const bytes = strings::toutf8(line);
			for (let i = 0z; i < len(bytes); i += 1) {
				if (test_position(outlist, i)) {
					append(out, bytes[i]);
				};
			};
			match (strings::fromutf8(out)) {
			case let s: str =>
				fmt::println(s)!;
			case utf8::invalid =>
				fmt::fatal("Error: invalid UTF-8"); // TODO: handle invalid UTF-8
			};
		case 'c' =>
			let out: []rune = [];
			defer free(out);
			const iter = strings::iter(line);
			for (let i = 0z; true; i += 1) {
				match (strings::next(&iter)) {
				case let r: rune =>
					if (test_position(outlist, i)) {
						append(out, r);
					};
				case void =>
					break;
				};
			};
			const s = strings::fromrunes(out);
			defer free(s);
			fmt::println(s)!;
		case 'f' =>
			if (!strings::contains(line, str_delim)) {
				if (!skip_nodelim) {
					fmt::println(line)!;
				};
				continue;
			};
			let tokenizer = strings::tokenize(line, str_delim);
			let fields: []str = [];
			defer free(fields);
			for (let i = 0z; true; i += 1) {
				const field = match (strings::next_token(&tokenizer)) {
				case let s: str =>
					yield s;
				case void =>
					break;
				};
				if (test_position(outlist, i)) {
					append(fields, field);
				};
			};
			const joined = strings::join(str_delim, fields...);
			defer free(joined);
			fmt::println(joined)!;
		case =>
			abort();
		};
	};
};

// Check if a given index should be outputted for a list of positions
fn test_position(outlist: []position, pos: size) bool = {
	pos += 1;
	for (let i = 0z; i < len(outlist); i += 1) {
		match (outlist[i]) {
		case let n: open_start =>
			if (pos <= n) {
				return true;
			};
		case let n: open_end =>
			if (pos >= n) {
				return true;
			};
		case let range: (uint, uint) =>
			if (range.0 <= pos && pos <= range.1) {
				return true;
			};
		case let n: uint =>
			if (pos == n) {
				return true;
			};
		};
	};
	return false;
};
-- 
2.42.1
Details
Message ID
<CXFJ2R7REJ3S.NFKX93F4AJE0@taiga>
In-Reply-To
<20231121231936.5860-1-sam@samnystrom.dev> (view parent)
DKIM signature
missing
Download raw message
Thanks!

To git@git.sr.ht:~sircmpwn/hautils
   56651ec..b2ce3ec  master -> master
Reply to thread Export thread (mbox)