~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
3 3

[PATCH hare v2] bytes,strings: make {cut,rcut} return a tagged union value

Details
Message ID
<20230421123908.6820-1-autumnull@posteo.net>
DKIM signature
pass
Download raw message
Patch: +78 -115
additionally removes the requirement that the delimiter can't be empty.

Signed-off-by: Autumn! <autumnull@posteo.net>
---
 bytes/tokenize.ha   | 54 ++++++++++++----------------------
 cmd/haredoc/env.ha  | 11 ++++---
 net/uri/query.ha    |  5 +++-
 regex/regex.ha      | 70 +++++++++++++--------------------------------
 strings/tokenize.ha | 53 ++++++++++++++++++----------------
 5 files changed, 78 insertions(+), 115 deletions(-)

diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
index 317f5b8a..13eb8ac1 100644
--- a/bytes/tokenize.ha
@@ -184,61 +184,45 @@ export fn remaining_tokens(s: *tokenizer) []u8 = {
};

// Returns the input slice "cut" along the first instance of a delimiter,
// returning everything up to the delimiter, and everything after the delimiter,
// in a tuple. The contents are borrowed from the input slice.
//
// The caller must ensure that 'delimiter' is not an empty slice.
export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
	let ln = if (delim is u8) {
		yield 1z;
	} else {
		let ln = len(delim: []u8);
		assert(ln > 0, "bytes::cut called with empty delimiter");
		yield ln;
	};
// returning everything up to the delimiter, and everything after the
// delimiter, in a tuple. If the delimiter is not found, returns void.
// The contents are borrowed from the input slice.
export fn cut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
	let ln = if (delim is u8) 1z else len(delim: []u8);
	match (index(in, delim)) {
	case let i: size =>
		return (in[..i], in[i + ln..]);
	case void =>
		return (in, []);
		return void;
	};
};

// Returns the input slice "cut" along the last instance of a delimiter,
// returning everything up to the delimiter, and everything after the delimiter,
// in a tuple. The contents are borrowed from the input slice.
//
// The caller must ensure that 'delimiter' is not an empty slice.
export fn rcut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
	let ln = if (delim is u8) {
		yield 1z;
	} else {
		let ln = len(delim: []u8);
		assert(ln > 0, "bytes::rcut called with empty delimiter");
		yield ln;
	};
// returning everything up to the delimiter, and everything after the
// delimiter, in a tuple. If the delimiter is not found, returns void.
// The contents are borrowed from the input slice.
export fn rcut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
	let ln = if (delim is u8) 1z else len(delim: []u8);
	match (rindex(in, delim)) {
	case let i: size =>
		return (in[..i], in[i + ln..]);
	case void =>
		return (in, []);
		return void;
	};
};

@test fn cut() void = {
	const c = cut(['a', 'b', 'c'], ['b']);
	const c = cut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
	const c = cut(['a', 'b', 'c'], 'b');
	const c = cut(['a', 'b', 'c'], 'b') as ([]u8, []u8);
	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
	const c = cut(['a', 'b', 'c', 'b', 'a'], 'b');
	const c = cut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
	assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a']));
	const c = cut(['a', 'b', 'c'], 'x');
	assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, []));
	const c = cut([], 'x');
	assert(equal(c.0, []) && equal(c.1, []));
	assert(cut(['a', 'b', 'c'], 'x') is void);
	assert(cut([], 'x') is void);

	const c = rcut(['a', 'b', 'c'], ['b']);
	const c = rcut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
	const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b');
	const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
	assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, ['a']));
};
diff --git a/cmd/haredoc/env.ha b/cmd/haredoc/env.ha
index d1d6a186..1d86f17d 100644
--- a/cmd/haredoc/env.ha
+++ b/cmd/haredoc/env.ha
@@ -42,10 +42,13 @@ fn default_tags() ([]module::tag | error) = {
	for (true) match (bufio::scanline(pipe.0)?) {
	case let b: []u8 =>
		defer free(b);
		const (k, v) = strings::cut(strings::fromutf8(b)!, "\t");
		if (k == "Build tags") {
			tags = module::parsetags(v) as []module::tag;
			break;
		match (strings::cut(strings::fromutf8(b)!, "\t")) {
		case void => void;
		case let s: (str, str) =>
			if (s.0 == "Build tags") {
				tags = module::parsetags(s.1) as []module::tag;
				break;
			};
		};
	case io::EOF =>
		// process exited with failure; handled below
diff --git a/net/uri/query.ha b/net/uri/query.ha
index 8c9dd0e4..2ba7fc6b 100644
--- a/net/uri/query.ha
+++ b/net/uri/query.ha
@@ -30,7 +30,10 @@ export fn query_next(dec: *query_decoder) ((str, str) | invalid | void) = {
	case => return;
	};

	const raw = strings::cut(tok, "=");
	const raw: (str, str) = match (strings::cut(tok, "=")) {
	case let s: (str, str) => yield s;
	case void => yield (tok, "");
	};
	strio::reset(&dec.bufs.0);
	percent_decode_static(&dec.bufs.0, raw.0)?;
	strio::reset(&dec.bufs.1);
diff --git a/regex/regex.ha b/regex/regex.ha
index c194adc6..c07c0b86 100644
--- a/regex/regex.ha
+++ b/regex/regex.ha
@@ -399,65 +399,35 @@ export fn compile(expr: str) (regex | error) = {
	};
};

// returns min, max, and length of string matched
fn parse_repetition(
	s: str
) (((void | size), (void | size), size) | error) = {
	const first_comma = strings::index(s, ",");
	const first_endbrace = strings::index(s, "}");
	if (first_endbrace is void) {
		return `Repetition expression syntax error '{n}'`: error;
	};
	const first_endbrace = first_endbrace as size;

	let min_str = "";
	let max_str = "";
	let is_single_arg = false;
	if (first_comma is void || first_endbrace < first_comma as size) {
		const cut = strings::cut(s, "}");
		min_str = cut.0;
		max_str = cut.0;
		is_single_arg = true;
	} else {
		const cut = strings::cut(s, ",");
		min_str = cut.0;
		max_str = strings::cut(cut.1, "}").0;
) ((size, size, size) | error) = {
	const brace_cut = match (strings::cut(s, "}")) {
	case void => return `Repetition expression syntax error '{n}'`: error;
	case let s: (str, str) => yield s;
	};

	let min: (void | size) = void;
	let max: (void | size) = void;

	if (len(min_str) > 0) {
		min = match (strconv::stoi(min_str)) {
		case let res: int =>
			yield if (res < 0) {
				return `Negative repitition count '{-n}'`: error;
			} else {
				yield res: size;
			};
		case => return `Repetition expression syntax error '{n}'`: error;
		};
	} else {
		min = 0;
	const (min_str, max_str) = match (strings::cut(brace_cut.0, ",")) {
	case void =>
		let n = parse_repnum(brace_cut.0)?;
		return (n, n, len(brace_cut.0));
	case let s: (str, str) => yield s;
	};

	if (len(max_str) > 0) {
		max = match (strconv::stoi(max_str)) {
		case let res: int =>
			yield if (res < 0) {
				return `Negative repitition count '{-n}'`: error;
			} else {
				yield res: size;
			};
		case => return `Repetition expression syntax error '{n}'`: error;
		};
	};
	const min = if (len(min_str) == 0) 0: size else parse_repnum(min_str)?;
	const max = if (len(max_str) == 0) -1: size else parse_repnum(max_str)?;
	return (min, max, len(brace_cut.0));
};

	const rep_len = if (is_single_arg) {
		yield len(min_str);
fn parse_repnum(s: str) (size | error) = match (strconv::stoi(s)) {
case let res: int =>
	if (res < 0) {
		return `Negative repetition count '{-n}'`: error;
	} else {
		yield len(min_str) + 1 + len(max_str);
		return res: size;
	};
	return (min, max, rep_len);
case => return `Repetition expression syntax error '{n}'`: error;
};

fn delete_thread(i: size, threads: *[]thread) void = {
diff --git a/strings/tokenize.ha b/strings/tokenize.ha
index 673e0c3c..da4f707d 100644
--- a/strings/tokenize.ha
+++ b/strings/tokenize.ha
@@ -179,44 +179,47 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX);
	};
};

// Returns a string "cut" along the first instance of a delimiter, returning
// everything up to the delimiter, and everything after the delimiter, in a
// tuple.
// Returns a string "cut" along the first instance of a delimiter,
// returning everything up to the delimiter, and everything after the
// delimiter, in a tuple. If the delimiter is not found, returns void.
//
// 	strings::cut("hello=world=foobar", "=")	// ("hello", "world=foobar")
// 	strings::cut("hello world", "=")	// ("hello world", "")
// 	strings::cut("hello world", "=")	// void
//
// The return value is borrowed from the 'in' parameter.  The caller must ensure
// that 'delimiter' is not an empty string.
export fn cut(in: str, delim: str) (str, str) = {
	let c = bytes::cut(toutf8(in), toutf8(delim));
	return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
// The return value is borrowed from the 'in' parameter.
export fn cut(in: str, delim: str) ((str, str) | void) = {
	match (bytes::cut(toutf8(in), toutf8(delim))) {
	case void => return void;
	case let bs: ([]u8, []u8) =>
		return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
	};
};

// Returns a string "cut" along the last instance of a delimiter, returning
// everything up to the delimiter, and everything after the delimiter, in a
// tuple.
// Returns a string "cut" along the last instance of a delimiter,
// returning everything up to the delimiter, and everything after the
// delimiter, in a tuple. If the delimiter is not found, the first result
// will be void.
//
// 	strings::rcut("hello=world=foobar", "=")	// ("hello=world", "foobar")
// 	strings::rcut("hello world", "=")	// ("hello world", "")
// 	strings::rcut("hello world", "=")	// void
//
// The return value is borrowed from the 'in' parameter.  The caller must ensure
// that 'delimiter' is not an empty string.
export fn rcut(in: str, delim: str) (str, str) = {
	let c = bytes::rcut(toutf8(in), toutf8(delim));
	return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
// The return value is borrowed from the 'in' parameter.
export fn rcut(in: str, delim: str) ((str, str) | void) = {
	match (bytes::rcut(toutf8(in), toutf8(delim))) {
	case void => return void;
	case let bs: ([]u8, []u8) =>
		return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
	};
};

@test fn cut() void = {
	const sample = cut("hello=world", "=");
	const sample = cut("hello=world", "=") as (str, str);
	assert(sample.0 == "hello" && sample.1 == "world");
	const sample = cut("hello=world=foobar", "=");
	const sample = cut("hello=world=foobar", "=") as (str, str);
	assert(sample.0 == "hello" && sample.1 == "world=foobar");
	const sample = cut("hello world", "=");
	assert(sample.0 == "hello world" && sample.1 == "");
	const sample = cut("", "=");
	assert(sample.0 == "" && sample.1 == "");
	assert(cut("hello world", "=") is void);
	assert(cut("", "=") is void);

	const sample = rcut("hello=world=foobar", "=");
	const sample = rcut("hello=world=foobar", "=") as (str, str);
	assert(sample.0 == "hello=world" && sample.1 == "foobar");
};
-- 
2.40.0

[hare/patches] build success

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CS2FWMCY9E11.38UIM7IXALV6B@cirno2>
In-Reply-To
<20230421123908.6820-1-autumnull@posteo.net> (view parent)
DKIM signature
missing
Download raw message
hare/patches: SUCCESS in 1m48s

[bytes,strings: make {cut,rcut} return a tagged union value][0] v2 from [Autumn!][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/40558
[1]: autumnull@posteo.net

✓ #977236 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/977236
✓ #977235 SUCCESS hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/977235
Details
Message ID
<CSI4RIVRNS8E.HXDNRDQNBIL7@monch>
In-Reply-To
<20230421123908.6820-1-autumnull@posteo.net> (view parent)
DKIM signature
pass
Download raw message
thanks!

To git@git.sr.ht:~sircmpwn/hare
   da442e0b..311c4195  master -> master
Details
Message ID
<CSMP4H7OR7FM.3TL6XP7TIR6HE@taiga>
In-Reply-To
<CSI4RIVRNS8E.HXDNRDQNBIL7@monch> (view parent)
DKIM signature
pass
Download raw message
I have reverted this with the following explanation:

    Return tuple directly from strings,bytes::cut,rcut
    
    strings::cut et al are convenience functions which aim to address the
    common 95% of cases, an approach which is common to much of the standard
    library's design. It is not important for this interface to be
    exhaustive; other tools are available for those who need to treat the
    presence or absence of the delimiter differently. The convenience of
    this convenience function is greatly diminished should the 95% of users
    who do not need to distinguish these cases be required to add `as (str,
    str)` -- a full 25% of the 80-character line width budget -- for every
    call.
    
    This reverts commit da442e0bf76cac19a137a3f779b5e0d838b94c8a.
    This reverts commit aa9d6b57fed162be8d5d1c59ef3fb0614e504bba.

Ember, please don't merge something with open objections, such patches
lack consensus.
Reply to thread Export thread (mbox)