~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
5 4

[PATCH hare v5] wordexp: new module

Details
Message ID
<20231121092232.1318306-1-sir@cmpwn.com>
DKIM signature
missing
Download raw message
Patch: +163 -0
Implements: https://todo.sr.ht/~sircmpwn/hare/274
Signed-off-by: Drew DeVault <sir@cmpwn.com>
---
v5 adds copyright headers.

 wordexp/+test.ha   | 52 +++++++++++++++++++++++++++++++
 wordexp/README     |  9 ++++++
 wordexp/error.ha   | 26 ++++++++++++++++
 wordexp/wordexp.ha | 76 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 163 insertions(+)
 create mode 100644 wordexp/+test.ha
 create mode 100644 wordexp/README
 create mode 100644 wordexp/error.ha
 create mode 100644 wordexp/wordexp.ha

diff --git a/wordexp/+test.ha b/wordexp/+test.ha
new file mode 100644
index 00000000..f9fb4da6
--- /dev/null
+++ b/wordexp/+test.ha
@@ -0,0 +1,52 @@
// SPDX-License-Identifier: MIT
// (c) Hare authors <https://harelang.org>

use os;
use strings;

fn streq(a: []str, b: []str) bool = {
	if (len(a) != len(b)) {
		return false;
	};
	for (let i = 0z; i < len(a); i += 1) {
		if (a[i] != b[i]) {
			return false;
		};
	};
	return true;
};

@test fn wordexp() void = {
	os::setenv("TESTVAR", "test value")!;
	os::unsetenv("UNSET")!;
	static const cases: [_](str, []str) = [
		(``, []),
		(`hello world`, ["hello", "world"]),
		(`hello $TESTVAR`, ["hello", "test", "value"]),
		(`hello "$TESTVAR"`, ["hello", "test value"]),
		(`hello $(echo world)`, ["hello", "world"]),
		(`hello $((2+2))`, ["hello", "4"]),
		(`hello $UNSET`, ["hello"]),
	];

	for (let i = 0z; i < len(cases); i += 1) {
		const (in, out) = cases[i];
		const words = wordexp(in, flag::NONE)!;
		defer strings::freeall(words);
		assert(streq(words, out));
	};
};

@test fn wordexp_error() void = {
	os::unsetenv("UNSET")!;
	static const cases: [_](str, flag) = [
		(`hello $UNSET`, flag::UNDEF),
		(`hello $(`, 0),
	];

	for (let i = 0z; i < len(cases); i += 1) {
		const (in, flag) = cases[i];
		const result = wordexp(in, flag);
		assert(result is sh_error);
	};
};
diff --git a/wordexp/README b/wordexp/README
new file mode 100644
index 00000000..a56cd8ff
--- /dev/null
+++ b/wordexp/README
@@ -0,0 +1,9 @@
The wordexp module implements word expansion using shell semantics, similar to
POSIX wordexp(3). Word expansion is performed with the platform-specific system
shell, which is generally POSIX sh(1) compatible on Unix-like systems.

When used with a POSIX shell, the IFS variable is unconditionally unset in the
environment, causing the shell to assume the default value of " \t\n".

Note that, by design, this module runs arbitrary shell commands from
user-supplied inputs. It must only be used in a trusted environment.
diff --git a/wordexp/error.ha b/wordexp/error.ha
new file mode 100644
index 00000000..857d9820
--- /dev/null
+++ b/wordexp/error.ha
@@ -0,0 +1,26 @@
// SPDX-License-Identifier: MIT
// (c) Hare authors <https://harelang.org>

use encoding::utf8;
use io;
use os::exec;

// Tagged union of possible wordexp error conditions.
export type error = !(io::error | exec::error | utf8::invalid | sh_error);

// An error occured during shell expansion.
export type sh_error = !void;

// Converts an [[error]] to a human-friendly string.
export fn strerror(err: error) const str = {
	match (err) {
	case let err: io::error =>
		return io::strerror(err);
	case let err: exec::error =>
		return exec::strerror(err);
	case utf8::invalid =>
		return "Word expansion returned invalid UTF-8 data";
	case sh_error =>
		return "An error occured during shell expansion";
	};
};
diff --git a/wordexp/wordexp.ha b/wordexp/wordexp.ha
new file mode 100644
index 00000000..84e6e1bf
--- /dev/null
+++ b/wordexp/wordexp.ha
@@ -0,0 +1,76 @@
// SPDX-License-Identifier: MIT
// (c) Hare authors <https://harelang.org>
// (c) 2005-2020 Rich Felker, et al
// Based on the musl libc implementation

use bufio;
use io;
use os;
use os::exec;
use strings;
use types;

// Flags applicable to a [[wordexp]] operation.
export type flag = enum uint {
	NONE = 0,
	// DOOFFS = (1 << 0),  // not implemented
	// APPEND = (1 << 1),  // not implemented
	// REUSE  = (1 << 3),  // not implemented
	// NOCMD   = (1 << 2), // not implemented
	SHOWERR = (1 << 4),
	UNDEF   = (1 << 5),
};

// Performs shell expansion and word splitting on the provided string, returning
// a list of expanded words, similar to POSIX wordexp(3). Note that this
// function, by design, will execute arbitrary commands from the input string.
//
// Pass the return value to [[strings::freeall]] to free resources associated
// with the return value.
export fn wordexp(s: str, flags: flag) ([]str | error) = {
	const (rd, wr) = exec::pipe();
	const child = match (exec::fork()?) {
	case let proc: exec::process =>
		io::close(wr)!;
		yield proc;
	case void =>
		// "x" is added to detect syntax errors, it is discarded later
		const cmd = exec::cmd("/bin/sh",
			if (flags & flag::UNDEF != 0) "-uc" else "-c",
			`eval "printf %s\\\\0 x $1"`, "sh", s)!;
		exec::unsetenv(&cmd, "IFS")!;
		exec::addfile(&cmd, os::stdout_file, wr);
		if (flags & flag::SHOWERR == 0) {
			exec::addfile(&cmd, os::stderr_file, exec::nullfd);
		};
		io::close(rd)!;
		exec::exec(&cmd);
	};

	const scan = bufio::newscanner(rd, types::SIZE_MAX);
	defer bufio::finish(&scan);

	match (bufio::scan_string(&scan, "\0")?) {
	case io::EOF =>
		return sh_error;
	case => void; // Discard the first "x" argument
	};

	let words: []str = [];
	for (true) {
		match (bufio::scan_string(&scan, "\0")?) {
		case io::EOF => break;
		case let word: const str =>
			append(words, strings::dup(word));
		};
	};

	io::close(rd)!;
	const st = exec::wait(&child)!;
	match (exec::check(&st)) {
	case !exec::exit_status =>
		return sh_error;
	case void =>
		return words;
	};
};
-- 
2.42.1

[hare/patches] build success

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CX4DQO7BQCD2.1UEFKPP9FWO05@cirno2>
In-Reply-To
<20231121092232.1318306-1-sir@cmpwn.com> (view parent)
DKIM signature
missing
Download raw message
hare/patches: SUCCESS in 1m56s

[wordexp: new module][0] v5 from [Drew DeVault][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/46860
[1]: sir@cmpwn.com

✓ #1097985 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1097985
✓ #1097984 SUCCESS hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1097984
Details
Message ID
<CX6EVVAUNPF9.2RNA27QUL3VD5@notmylaptop>
In-Reply-To
<20231121092232.1318306-1-sir@cmpwn.com> (view parent)
DKIM signature
missing
Download raw message
LGTM

cc ecs: any objections to merging this?

(FWIW I'm still not *that* happy with having wordexp in the stdlib, but
I also think this is the best compromise)

Also: thoughts on putting a Hare-native POSIX wordexp implementation in
the extlib? I'm not entirely sure this is a good idea myself, but I
figured I'd bring it up.
Details
Message ID
<CX6F49Q1JF5U.2CV0EHO1IA0DB@taiga>
In-Reply-To
<CX6EVVAUNPF9.2RNA27QUL3VD5@notmylaptop> (view parent)
DKIM signature
missing
Download raw message
On Thu Nov 23, 2023 at 7:53 PM CET, Sebastian wrote:
> Also: thoughts on putting a Hare-native POSIX wordexp implementation in
> the extlib? I'm not entirely sure this is a good idea myself, but I
> figured I'd bring it up.

I have no objection to that, but fwiw I don't really dislike the
shelling-out approach (hah) so I'm not personally interested in doing
the work for that.
Details
Message ID
<CX6F5VKRZUBJ.1KJN8OZ5HNWWU@notmylaptop>
In-Reply-To
<CX6F49Q1JF5U.2CV0EHO1IA0DB@taiga> (view parent)
DKIM signature
missing
Download raw message
On Thu Nov 23, 2023 at 1:54 PM EST, Drew DeVault wrote:
> I have no objection to that, but fwiw I don't really dislike the
> shelling-out approach (hah) so I'm not personally interested in doing
> the work for that.

I've already done most of the work for it :)
https://lists.sr.ht/~sircmpwn/hare-dev/<20230930022154.26689-1-sebastian%40sebsite.pw>
Details
Message ID
<CX6OUARWQLGI.2BU72Z3BQTSHE@d2evs.net>
In-Reply-To
<20231121092232.1318306-1-sir@cmpwn.com> (view parent)
DKIM signature
missing
Download raw message
hm, alright. this implementation looks pretty straightforward, i'm open
to having it in the stdlib. a few small questions:

On Tue Nov 21, 2023 at 9:22 AM UTC, Drew DeVault wrote:
> diff --git a/wordexp/wordexp.ha b/wordexp/wordexp.ha
> @@ -0,0 +1,76 @@
-%<-
> +export fn wordexp(s: str, flags: flag) ([]str | error) = {
> +	const (rd, wr) = exec::pipe();
> +	const child = match (exec::fork()?) {

is there a reason we can't use the normal spawn-esque exec:: interface,
rather than exec::fork()? unless i'm missing something, it should just
work with exec::addfile(&cmd, rd, exec::nullfd);

> +	case let proc: exec::process =>
> +		io::close(wr)!;
> +		yield proc;
> +	case void =>
> +		// "x" is added to detect syntax errors, it is discarded later

why is this necessary? shouldn't the syntax error cause a nonzero exit
status anyways?
Reply to thread Export thread (mbox)