~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
2 2

[PATCH hare v3] wordexp: new module

Details
Message ID
<20231120105202.409691-1-sir@cmpwn.com>
DKIM signature
missing
Download raw message
Patch: +156 -0
Implements: https://todo.sr.ht/~sircmpwn/hare/274
Signed-off-by: Drew DeVault <sir@cmpwn.com>
---
This version abandons NOCMD, removes any attempt at syntax checking,
defines the shell as the system shell rather than a POSIX shell, and
more adequately explains the trusted user input constraints.

Is this a hack? Well, it's a small hack, but it works fine. I think the
interface is useful and not really replacable by another solution in
many of the use-cases to which it is suited, and I'd rather have a
little hack here in the stdlib where we can be certain that we've done
our little hack correctly rather than allow copy-pasted versions of this
code to proliferate throughout the ecosystem.

 wordexp/+test.ha   | 49 ++++++++++++++++++++++++++++++++
 wordexp/README     |  6 ++++
 wordexp/error.ha   | 31 ++++++++++++++++++++
 wordexp/wordexp.ha | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 156 insertions(+)
 create mode 100644 wordexp/+test.ha
 create mode 100644 wordexp/README
 create mode 100644 wordexp/error.ha
 create mode 100644 wordexp/wordexp.ha

diff --git a/wordexp/+test.ha b/wordexp/+test.ha
new file mode 100644
index 00000000..52837d76
--- /dev/null
+++ b/wordexp/+test.ha
@@ -0,0 +1,49 @@
use os;
use strings;

fn streq(a: []str, b: []str) bool = {
	if (len(a) != len(b)) {
		return false;
	};
	for (let i = 0z; i < len(a); i += 1) {
		if (a[i] != b[i]) {
			return false;
		};
	};
	return true;
};

@test fn wordexp() void = {
	os::setenv("TESTVAR", "test value")!;
	os::unsetenv("UNSET")!;
	static const cases: [_](str, []str) = [
		(``, []),
		(`hello world`, ["hello", "world"]),
		(`hello $TESTVAR`, ["hello", "test", "value"]),
		(`hello "$TESTVAR"`, ["hello", "test value"]),
		(`hello $(echo world)`, ["hello", "world"]),
		(`hello $((2+2))`, ["hello", "4"]),
		(`hello $UNSET`, ["hello"]),
	];

	for (let i = 0z; i < len(cases); i += 1) {
		const (in, out) = cases[i];
		const words = wordexp(in, flag::NONE)!;
		defer strings::freeall(words);
		assert(streq(words, out));
	};
};

@test fn wordexp_error() void = {
	os::unsetenv("UNSET")!;
	static const cases: [_](str, we_error, flag) = [
		(`hello $UNSET`, we_error::BADVAL, flag::UNDEF),
		(`hello $(`, we_error::SYNTAX, 0),
	];

	for (let i = 0z; i < len(cases); i += 1) {
		const (in, err, flag) = cases[i];
		const result = wordexp(in, flag) as we_error;
		assert(result == err);
	};
};
diff --git a/wordexp/README b/wordexp/README
new file mode 100644
index 00000000..c8a8aead
--- /dev/null
+++ b/wordexp/README
@@ -0,0 +1,6 @@
The wordexp module implements word expansion using shell semantics, similar to
POSIX wordexp(3). Word expansion is performed with the platform-specific system
shell, which is generally POSIX sh(1) compatible on Unix-like systems.

Note that, by design, this module runs arbitrary shell commands from
user-supplied inputs. It must only be used in a trusted environment.
diff --git a/wordexp/error.ha b/wordexp/error.ha
new file mode 100644
index 00000000..223ca0db
--- /dev/null
+++ b/wordexp/error.ha
@@ -0,0 +1,31 @@
use encoding::utf8;
use io;
use os::exec;

// Tagged union of possible wordexp error conditions.
export type error = !(io::error | exec::error | utf8::invalid | we_error);

// wordexp-specific errors
export type we_error = enum uint {
	BADVAL,
	SYNTAX,
};

// Converts an [[error]] to a human-friendly string.
export fn strerror(err: error) const str = {
	match (err) {
	case let err: io::error =>
		return io::strerror(err);
	case let err: exec::error =>
		return exec::strerror(err);
	case utf8::invalid =>
		return "Word expansion returned invalid UTF-8 data";
	case let err: we_error =>
		switch (err) {
		case we_error::BADVAL =>
			return "Undefined shell variable in word expansion";
		case we_error::SYNTAX =>
			return "Shell syntax error in word expansion";
		};
	};
};
diff --git a/wordexp/wordexp.ha b/wordexp/wordexp.ha
new file mode 100644
index 00000000..477e1d0b
--- /dev/null
+++ b/wordexp/wordexp.ha
@@ -0,0 +1,70 @@
// Based on the musl libc implementation
// Copyright © 2005-2020 Rich Felker, et al
// MIT license
use bufio;
use io;
use os;
use os::exec;
use strings;
use types;

// Flags applicable to a [[wordexp]] operation.
export type flag = enum uint {
	NONE = 0,
	// DOOFFS = (1 << 0),  // not implemented
	// APPEND = (1 << 1),  // not implemented
	// REUSE  = (1 << 3),  // not implemented
	// NOCMD   = (1 << 2), // not implemented
	SHOWERR = (1 << 4),
	UNDEF   = (1 << 5),
};

// Performs shell expansion and word splitting on the provied string, returning
// a list of expanded words, similar to POSIX wordexp(3). Note that this
// function, by design, will execute arbitrary commands from the input string.
//
// Pass the return value to [[strings::freeall]] to free resources associated
// with the return value.
export fn wordexp(s: str, flags: flag) ([]str | error) = {
	const (rd, wr) = exec::pipe();
	const child = match (exec::fork()?) {
	case let proc: exec::process =>
		io::close(wr)!;
		yield proc;
	case void =>
		// "x" is added to detect syntax errors, it is discarded later
		const cmd = exec::cmd("/bin/sh",
			if (flags & flag::UNDEF != 0) "-uc" else "-c",
			`eval "printf %s\\\\0 x $1"`, "sh", s)!;
		exec::addfile(&cmd, os::stdout_file, wr);
		if (flags & flag::SHOWERR == 0) {
			exec::addfile(&cmd, os::stderr_file, exec::nullfd);
		};
		io::close(rd)!;
		exec::exec(&cmd);
	};
	defer io::close(rd)!;
	defer exec::wait(&child)!;

	const scan = bufio::newscanner(rd, types::SIZE_MAX);
	defer bufio::finish(&scan);

	match (bufio::scan_string(&scan, "\0")?) {
	case io::EOF =>
		if (flags & flag::UNDEF != 0) {
			return we_error::BADVAL;
		};
		return we_error::SYNTAX;
	case => yield; // Discard the first "x" argument
	};

	let words: []str = [];
	for (true) {
		match (bufio::scan_string(&scan, "\0")?) {
		case io::EOF => break;
		case let word: const str =>
			append(words, strings::dup(word));
		};
	};
	return words;
};
-- 
2.42.1

[hare/patches] build failed

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CX3L0E9WTKKN.32ZG6KX773UP2@cirno2>
In-Reply-To
<20231120105202.409691-1-sir@cmpwn.com> (view parent)
DKIM signature
missing
Download raw message
hare/patches: FAILED in 1m41s

[wordexp: new module][0] v3 from [Drew DeVault][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/46831
[1]: sir@cmpwn.com

✓ #1097493 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1097493
✗ #1097492 FAILED  hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1097492
Details
Message ID
<CX3YNE76FHUB.2N3UED0AMZMST@notmylaptop>
In-Reply-To
<20231120105202.409691-1-sir@cmpwn.com> (view parent)
DKIM signature
missing
Download raw message
On Mon Nov 20, 2023 at 5:50 AM EST, Drew DeVault wrote:
> This version abandons NOCMD, removes any attempt at syntax checking,
> defines the shell as the system shell rather than a POSIX shell, and
> more adequately explains the trusted user input constraints.

This implementation is definitely an improvement; I'd be fine merging a
later revision of this.

> Is this a hack? Well, it's a small hack, but it works fine. I think the
> interface is useful and not really replacable by another solution in
> many of the use-cases to which it is suited, and I'd rather have a
> little hack here in the stdlib where we can be certain that we've done
> our little hack correctly rather than allow copy-pasted versions of this
> code to proliferate throughout the ecosystem.

Fair enough. I do think this argument works better for making wordexp an
extended library rather than a stdlib module, but of course that's not
an option if we use it for hare.ini, so I guess it's fine.

As an aside: it would be neat if rc had a flag to only perform word
expansion, so the printf hack won't be necessary on gaia at all.

> +@test fn wordexp() void = {
> +	os::setenv("TESTVAR", "test value")!;
> +	os::unsetenv("UNSET")!;
> +	static const cases: [_](str, []str) = [
> +		(``, []),
> +		(`hello world`, ["hello", "world"]),
> +		(`hello $TESTVAR`, ["hello", "test", "value"]),
> +		(`hello "$TESTVAR"`, ["hello", "test value"]),
> +		(`hello $(echo world)`, ["hello", "world"]),
> +		(`hello $((2+2))`, ["hello", "4"]),
> +		(`hello $UNSET`, ["hello"]),
> +	];
> +
> +	for (let i = 0z; i < len(cases); i += 1) {
> +		const (in, out) = cases[i];
> +		const words = wordexp(in, flag::NONE)!;
> +		defer strings::freeall(words);
> +		assert(streq(words, out));
> +	};
> +};

Either this test should os::unsetenv("IFS"), or wordexp should just
always temporarily unset IFS. I'm not sure which makes more sense; I'll
leave that to your judgement.

> +// Tagged union of possible wordexp error conditions.
> +export type error = !(io::error | exec::error | utf8::invalid | we_error);
> +
> +// wordexp-specific errors
> +export type we_error = enum uint {
> +	BADVAL,
> +	SYNTAX,
> +};

If we support both badval and syntax, then IMO they should be two
separate types (but I wrote some more comments about this below). Also,
syntax should be changed to a more accurate name, like sh_error or
something like that. Even besides things like OOM, there are other
instances where the shell may fail due to a non-syntax-error, like
${UNSET?error}.

> +// Converts an [[error]] to a human-friendly string.
> +export fn strerror(err: error) const str = {
> +	match (err) {
> +	case let err: io::error =>
> +		return io::strerror(err);
> +	case let err: exec::error =>
> +		return exec::strerror(err);
> +	case utf8::invalid =>
> +		return "Word expansion returned invalid UTF-8 data";
> +	case let err: we_error =>
> +		switch (err) {
> +		case we_error::BADVAL =>
> +			return "Undefined shell variable in word expansion";
> +		case we_error::SYNTAX =>
> +			return "Shell syntax error in word expansion";

And likewise, this error message should be more generic.

> +// Performs shell expansion and word splitting on the provied string, returning
> +// a list of expanded words, similar to POSIX wordexp(3). Note that this
> +// function, by design, will execute arbitrary commands from the input string.

Typo: s/provied/provided/

> +	match (bufio::scan_string(&scan, "\0")?) {
> +	case io::EOF =>
> +		if (flags & flag::UNDEF != 0) {
> +			return we_error::BADVAL;
> +		};
> +		return we_error::SYNTAX;

This, isn't right. Syntax errors can still occur if flag::UNDEF is set.
Without re-introducing syntax-checking logic, it's not possible afaict
to differentiate between these errors. So I think we may be better off
just combining them into one.

Another idea: sh_error (or whatever it's called) could be defined as
!str, and could hold the contents written to stderr (possibly with some
default error string for when a failure occurs but stderr is empty).
Then we could also get rid of flag::SHOW_ERR.

> +	case => yield; // Discard the first "x" argument

s/yield/void/

> +	};
> +
> +	let words: []str = [];
> +	for (true) {
> +		match (bufio::scan_string(&scan, "\0")?) {
> +		case io::EOF => break;
> +		case let word: const str =>
> +			append(words, strings::dup(word));
> +		};
> +	};

Should the exit status of the shell be checked here? I feel like it
might make sense to do that, rather than just returning an empty slice.
Reply to thread Export thread (mbox)