~sircmpwn/hare-dev

hare: regex: add replacen and rawreplacen v1 SUPERSEDED

Sebastian: 1
 regex: add replacen and rawreplacen

 4 files changed, 77 insertions(+), 36 deletions(-)
#1003021 alpine.yml success
#1003022 freebsd.yml success
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/41702/mbox | git am -3
Learn more about email & git

[PATCH hare] regex: add replacen and rawreplacen Export this patch

Signed-off-by: Sebastian <sebastian@sebsite.pw>
---
 regex/+test.ha     | 62 +++++++++++++++++++++++++++-------------------
 regex/regex.ha     | 42 +++++++++++++++++++++++++------
 scripts/gen-stdlib |  5 ++--
 stdlib.mk          |  4 +--
 4 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha
index e4ea24c2..a71517aa 100644
--- a/regex/+test.ha
+++ b/regex/+test.ha
@@ -2,6 +2,7 @@
// (c) 2022 Vlad-Stefan Harbuz <vlad@vladh.net>
use fmt;
use strings;
use types;

type matchres = enum { MATCH, NOMATCH, ERROR };

@@ -137,6 +138,7 @@ fn run_replace_case(
	expr: str,
	string: str,
	target: str,
	n: size,
	expected: (str | void),
) void = {
	const re = match (compile(expr)) {
@@ -149,25 +151,25 @@ fn run_replace_case(
	};
	defer finish(&re);

	match (replace(&re, string, target)) {
	match (replacen(&re, string, target, n)) {
	case let e: error =>
		if (expected is str) {
			fmt::errorln(e)!;
			fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\"",
				expr, string, target, expected as str)!;
			fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\"",
				expr, string, target, n, expected as str)!;
			abort();
		};
	case let s: str =>
		defer free(s);
		if (expected is void) {
			fmt::errorln("Expected replace to fail, but it did not")!;
			fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" return=\"{}\"",
				expr, string, target, s)!;
			fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} return=\"{}\"",
				expr, string, target, n, s)!;
			abort();
		};
		if (expected as str != s) {
			fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\" return=\"{}\"",
				expr, string, target, expected as str, s)!;
			fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"",
				expr, string, target, n, expected as str, s)!;
			abort();
		};
	};
@@ -177,6 +179,7 @@ fn run_rawreplace_case(
	expr: str,
	string: str,
	target: str,
	n: size,
	expected: str,
) void = {
	const re = match (compile(expr)) {
@@ -189,11 +192,11 @@ fn run_rawreplace_case(
	};
	defer finish(&re);

	const s = rawreplace(&re, string, target);
	const s = rawreplacen(&re, string, target, n);
	defer free(s);
	if (expected != s) {
		fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\" return=\"{}\"",
			expr, string, target, expected, s)!;
		fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"",
			expr, string, target, n, expected, s)!;
		abort();
	};
};
@@ -676,46 +679,55 @@ fn run_rawreplace_case(
};

@test fn replace() void = {
	const cases: [_](str, str, str, (str | void)) = [
	const cases: [_](str, str, str, size, (str | void)) = [
		(`ab.`, "hello abc and abあ test abq thanks", `xyz`,
			"hello xyz and xyz test xyz thanks"),
			types::SIZE_MAX, "hello xyz and xyz test xyz thanks"),
		(`([Hh])ello`, "Hello world and hello Hare.", `\1owdy`,
			"Howdy world and howdy Hare."),
			types::SIZE_MAX, "Howdy world and howdy Hare."),
		(`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`,
			"fo foobar fooobarfoobarf oofoobar"),
			types::SIZE_MAX, "fo foobar fooobarfoobarf oofoobar"),
		(`(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)`, "12345678910", `\10`,
			"10"),
			types::SIZE_MAX, "10"),
		(`...?`, "abcdefgh", `\7\0\8`,
			"abcdefgh"),
		(`...?`, "abcdefgh", `\7\0\`, void),
			types::SIZE_MAX, "abcdefgh"),
		(`...?`, "abcdefgh", `\7\0\`, types::SIZE_MAX, void),
		(`ab.`, "hello abc and abあ test abq thanks", `xyz`,
			2, "hello xyz and xyz test abq thanks"),
		(`.`, "blablabla", `x`, 0, "blablabla"),
		(`([[:digit:]])([[:digit:]])`, "1234", `\2`, 1, "234"),
	];

	for (let i = 0z; i < len(cases); i += 1) {
		const expr = cases[i].0;
		const string = cases[i].1;
		const target = cases[i].2;
		const expected = cases[i].3;
		run_replace_case(expr, string, target, expected);
		const n = cases[i].3;
		const expected = cases[i].4;
		run_replace_case(expr, string, target, n, expected);
	};
};

@test fn rawreplace() void = {
	const cases = [
		(`ab.`, "hello abc and abあ test abq thanks", "xyz",
			"hello xyz and xyz test xyz thanks"),
			types::SIZE_MAX, "hello xyz and xyz test xyz thanks"),
		(`([Hh])ello`, "Hello world and hello Hare.", `\howdy\`,
			`\howdy\ world and \howdy\ Hare.`),
			types::SIZE_MAX, `\howdy\ world and \howdy\ Hare.`),
		(`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`,
			`fo \0bar \0bar\0barf oo\0bar`),
			types::SIZE_MAX, `fo \0bar \0bar\0barf oo\0bar`),
		(`\\\\`, `\\\\\\\\`, `\00\1`,
			`\00\1\00\1\00\1\00\1`),
			types::SIZE_MAX, `\00\1\00\1\00\1\00\1`),
		(`ab.`, "hello abc and abあ test abq thanks", `xyz`,
			2, "hello xyz and xyz test abq thanks"),
		(`.`, "blablabla", `x`, 0, "blablabla"),
	];

	for (let i = 0z; i < len(cases); i += 1) {
		const expr = cases[i].0;
		const string = cases[i].1;
		const target = cases[i].2;
		const expected = cases[i].3;
		run_rawreplace_case(expr, string, target, expected);
		const n = cases[i].3;
		const expected = cases[i].4;
		run_rawreplace_case(expr, string, target, n, expected);
	};
};
diff --git a/regex/regex.ha b/regex/regex.ha
index 4c25023a..162cf978 100644
--- a/regex/regex.ha
+++ b/regex/regex.ha
@@ -7,6 +7,7 @@ use errors;
use io;
use strconv;
use strings;
use types;

// An error string describing a compilation error.
export type error = !str;
@@ -829,19 +830,35 @@ export fn findall(re: *regex, string: str) []result = {
//
// An error is only returned if 'targetstr' isn't formatted correctly.
export fn replace(re: *regex, string: str, targetstr: str) (str | error) = {
	return replacen(re, string, targetstr, types::SIZE_MAX);
};

// Replaces the first 'n' non-overlapping matches of a regular expression
// against a string with 'targetstr', in the same manner as [[replace]].
export fn replacen(
	re: *regex,
	string: str,
	targetstr: str,
	n: size,
) (str | error) = {
	const target = parse_replace_target(targetstr)?;
	defer free(target);
	// Check if n == 0 after parse_replace_target so errors are propagated
	if (n == 0) {
		return strings::dup(string);
	};

	const matches = findall(re, string);
	if (len(matches) == 0) {
		return strings::dup(string);
	};
	defer result_freeall(matches);

	const target = parse_replace_target(targetstr)?;
	defer free(target);

	const bytes = strings::toutf8(string);
	let buf = alloc(bytes[..matches[0][0].start_bytesize]...);

	for (let i = 0z; i < len(matches); i += 1) {
	const n = if (len(matches) > n) n else len(matches);
	for (let i = 0z; i < n; i += 1) {
		for (let j = 0z; j < len(target); j += 1) {
			match (target[j]) {
			case let b: []u8 =>
@@ -853,7 +870,7 @@ export fn replace(re: *regex, string: str, targetstr: str) (str | error) = {
			};
		};
		const start = matches[i][0].end_bytesize;
		const end = if (i == len(matches) - 1) len(bytes)
		const end = if (i == n - 1) len(bytes)
			else matches[i + 1][0].start_bytesize;
		append(buf, bytes[start..end]...);
	};
@@ -907,6 +924,16 @@ fn parse_replace_target(targetstr: str) ([]([]u8 | size) | error) = {
// with 'targetstr'. 'targetstr' is isn't interpreted in any special way; all
// backslashes are treated literally.
export fn rawreplace(re: *regex, string: str, targetstr: str) str = {
	return rawreplacen(re, string, targetstr, types::SIZE_MAX);
};

// Replaces the first 'n' non-overlapping matches of a regular expression again
// a string with 'targetstr', in the same manner as [[rawreplace]].
export fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str = {
	if (n == 0) {
		return strings::dup(string);
	};

	const matches = findall(re, string);
	if (len(matches) == 0) {
		return strings::dup(string);
@@ -918,14 +945,15 @@ export fn rawreplace(re: *regex, string: str, targetstr: str) str = {
	let buf: []u8 = [];

	append(buf, bytes[..matches[0][0].start_bytesize]...);
	for (let i = 1z; i < len(matches); i += 1) {
	const n = if (len(matches) > n) n else len(matches);
	for (let i = 1z; i < n; i += 1) {
		append(buf, target...);
		const start = matches[i - 1][0].end_bytesize;
		const end = matches[i][0].start_bytesize;
		append(buf, bytes[start..end]...);
	};
	append(buf, target...);
	append(buf, bytes[matches[len(matches) - 1][0].end_bytesize..]...);
	append(buf, bytes[matches[n - 1][0].end_bytesize..]...);

	return strings::fromutf8(buf)!;
};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
index 1ea4b457..236639a6 100755
--- a/scripts/gen-stdlib
+++ b/scripts/gen-stdlib
@@ -1255,10 +1255,11 @@ regex() {
	if [ $testing -eq 0 ]; then
		gen_srcs regex regex.ha
		gen_ssa regex ascii bufio encoding::utf8 errors io strconv \
			strings bufio
			strings bufio types
	else
		gen_srcs regex regex.ha +test.ha
		gen_ssa regex encoding::utf8 errors strconv strings fmt io os bufio
		gen_ssa regex encoding::utf8 errors strconv strings fmt io os \
			bufio types
	fi
}

diff --git a/stdlib.mk b/stdlib.mk
index db2a7121..f7229942 100644
--- a/stdlib.mk
+++ b/stdlib.mk
@@ -2024,7 +2024,7 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st
stdlib_regex_any_srcs = \
	$(STDLIB)/regex/regex.ha

$(HARECACHE)/regex/regex-any.ssa: $(stdlib_regex_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM))
$(HARECACHE)/regex/regex-any.ssa: $(stdlib_regex_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_types_$(PLATFORM))
	@printf 'HAREC \t$@\n'
	@mkdir -p $(HARECACHE)/regex
	@$(stdlib_env) $(HAREC) $(HAREFLAGS) -o $@ -Nregex \
@@ -4541,7 +4541,7 @@ testlib_regex_any_srcs = \
	$(STDLIB)/regex/regex.ha \
	$(STDLIB)/regex/+test.ha

$(TESTCACHE)/regex/regex-any.ssa: $(testlib_regex_any_srcs) $(testlib_rt) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_bufio_$(PLATFORM))
$(TESTCACHE)/regex/regex-any.ssa: $(testlib_regex_any_srcs) $(testlib_rt) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_types_$(PLATFORM))
	@printf 'HAREC \t$@\n'
	@mkdir -p $(TESTCACHE)/regex
	@$(testlib_env) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nregex \
-- 
2.40.1
hare/patches: SUCCESS in 1m43s

[regex: add replacen and rawreplacen][0] from [Sebastian][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/41702
[1]: mailto:sebastian@sebsite.pw

✓ #1003022 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1003022
✓ #1003021 SUCCESS hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1003021