~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
5 3

[PATCH hare 1/4] regex: rename regex_finish to finish

Details
Message ID
<20220514162502.8713-1-vlad@vladh.net>
DKIM signature
pass
Download raw message
Patch: +2 -2
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
 regex/README   | 2 +-
 regex/regex.ha | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/regex/README b/regex/README
index 4ec6455e..276c1091 100644
--- a/regex/README
+++ b/regex/README
@@ -18,7 +18,7 @@ This module implements the POSIX match disambiguation rules by returning
the longest match among the leftmost matches.

	const re = regex::compile(`[Hh]are`)!;
	defer regex::regex_finish(&re);
	defer regex::finish(&re);

	const does_match = regex::test(&re, "Hello Hare, hello Hare.")!;
	fmt::printfln("matched? {}", does_match)!;
diff --git a/regex/regex.ha b/regex/regex.ha
index 41202490..0d6bc27e 100644
--- a/regex/regex.ha
+++ b/regex/regex.ha
@@ -99,7 +99,7 @@ export type regex = struct {
};

// Frees the memory used by a regex.
export fn regex_finish(re: *regex) void = {
export fn finish(re: *regex) void = {
	free(re.insts);
	for (let i = 0z; i < len(re.charsets); i += 1) {
		free(re.charsets[i]);
-- 
2.32.0

[PATCH hare 2/4] ioctlgen: update with regex_finish name change

Details
Message ID
<20220514162502.8713-2-vlad@vladh.net>
In-Reply-To
<20220514162502.8713-1-vlad@vladh.net> (view parent)
DKIM signature
pass
Download raw message
Patch: +2 -2
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
 cmd/ioctlgen/main.ha | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmd/ioctlgen/main.ha b/cmd/ioctlgen/main.ha
index a26f9a25..83e0ad95 100644
--- a/cmd/ioctlgen/main.ha
+++ b/cmd/ioctlgen/main.ha
@@ -19,8 +19,8 @@ let typedefre: regex::regex = regex::regex { ... };
};

@fini fn fini() void = {
	regex::regex_finish(&ioctlre);
	regex::regex_finish(&typedefre);
	regex::finish(&ioctlre);
	regex::finish(&typedefre);
};

type dir = enum u32 {
-- 
2.32.0

[PATCH hare 4/4] ioctlgen: update with regex error handling changes

Details
Message ID
<20220514162502.8713-4-vlad@vladh.net>
In-Reply-To
<20220514162502.8713-1-vlad@vladh.net> (view parent)
DKIM signature
pass
Download raw message
Patch: +2 -2
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
 cmd/ioctlgen/main.ha | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmd/ioctlgen/main.ha b/cmd/ioctlgen/main.ha
index 83e0ad95..5c75c4f4 100644
--- a/cmd/ioctlgen/main.ha
+++ b/cmd/ioctlgen/main.ha
@@ -46,14 +46,14 @@ export fn main() void = {
		};
		defer free(line);

		if (regex::test(&typedefre, line)!) {
		if (regex::test(&typedefre, line)) {
			bufio::unreadrune(os::stdin, '\n');
			bufio::unread(os::stdin, strings::toutf8(line));
			loadtype(store);
			continue;
		};

		let groups = match (regex::find(&ioctlre, line)!) {
		let groups = match (regex::find(&ioctlre, line)) {
		case void =>
			fmt::println(line)!;
			continue;
-- 
2.32.0

[PATCH hare 3/4] regex: find/findall/test can no longer error

Details
Message ID
<20220514162502.8713-3-vlad@vladh.net>
In-Reply-To
<20220514162502.8713-1-vlad@vladh.net> (view parent)
DKIM signature
pass
Download raw message
Patch: +38 -72
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
 regex/+test.ha | 58 +++++++++++++++-----------------------------------
 regex/README   |  6 +++---
 regex/regex.ha | 46 ++++++++++++++++-----------------------
 3 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha
index 53681f2f..5cf668c4 100644
--- a/regex/+test.ha
+++ b/regex/+test.ha
@@ -16,37 +16,34 @@ fn run_find_case(
	case let e: error =>
		if (expected == matchres::MATCH) {
			fmt::println(e)!;
			fmt::fatalf("Expected expression /{}/ to match, but it errored",
			fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it errored",
				expr, string);
		};
		if (expected == matchres::NOMATCH) {
			fmt::println(e)!;
			fmt::fatalf("Expected expression /{}/ to not match, but it errored",
			fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it errored",
				expr, string);
		};
		return;
	};

	if (expected == matchres::ERROR) {
		fmt::fatalf("Expected expression /{}/ to have error caught during compilation, but it did not",
			expr);
	};

	match (find(&re, string)) {
	case void =>
		if (expected == matchres::MATCH) {
			fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it did not",
				expr, string);
		};
		if (expected == matchres::ERROR) {
			fmt::fatalf("Expression /{}/ failed to match, but should have errored",
				expr, string);
		};

	case let m: []capture =>
		if (expected == matchres::NOMATCH) {
			fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did",
				expr, string);
		};
		if (expected == matchres::ERROR) {
			fmt::fatalf("Expression /{}/ matched, but should have errored",
				expr, string);
		};
		if (start: size != m[0].start) {
			fmt::fatalf("Expected start of main capture to be {} but it was {}",
				start, m[0].start);
@@ -55,16 +52,6 @@ fn run_find_case(
			fmt::fatalf("Expected end of main capture to be {} but it was {}",
				end, m[0].end);
		};

	case let e: error =>
		if (expected == matchres::MATCH) {
			fmt::fatalf("Expected expression /{}/ to match, but it errored",
				expr, string);
		};
		if (expected == matchres::NOMATCH) {
			fmt::fatalf("Expected expression /{}/ to not match, but it errored",
				expr, string);
		};
	};
};

@@ -90,40 +77,27 @@ fn run_findall_case(
		return;
	};

	if (expected == matchres::ERROR) {
		fmt::fatalf("Expected expression /{}/ to have error caught during compilation, but it did not",
			expr);
	};

	match (findall(&re, string)) {
	case void =>
		if (expected == matchres::MATCH) {
			fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it did not",
				expr, string);
		};
		if (expected == matchres::ERROR) {
			fmt::fatalf("Expression /{}/ failed to match, but should have errored",
				expr, string);
		};

	case let groupsets: [][]capture =>
		if (expected == matchres::NOMATCH) {
			fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did",
				expr, string);
		};
		if (expected == matchres::ERROR) {
			fmt::fatalf("Expression /{}/ matched, but should have errored",
				expr, string);
		};
		if (count: size != len(groupsets)) {
			fmt::fatalf("Expected to find {} matches but found {}",
				count, len(groupsets));
		};

	case let e: error =>
		if (expected == matchres::MATCH) {
			fmt::fatalf("Expected expression /{}/ to match, but it errored",
				expr, string);
		};
		if (expected == matchres::NOMATCH) {
			fmt::fatalf("Expected expression /{}/ to not match, but it errored",
				expr, string);
		};
	};
};

@@ -309,9 +283,10 @@ fn run_findall_case(
		(`^x(abc){1,2}$`, "xabc", matchres::MATCH, 0, -1),
		(`^x(abc){1,2}$`, "xabcabc", matchres::MATCH, 0, -1),
		(`^x(abc){1,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1),
		(`^x(abc){,2}$`, "xabc", matchres::ERROR, 0, -1),
		(`^x(abc){,2}$`, "xabcabc", matchres::ERROR, 0, -1),
		(`^x(abc){,2}$`, "xabcabcabc", matchres::ERROR, 0, -1),
		(`^x(abc){,2}$`, "xabc", matchres::MATCH, 0, -1),
		(`^x(abc){,2}$`, "xabcabc", matchres::MATCH, 0, -1),
		(`^x(abc){,2}`, "xabcabcabc", matchres::MATCH, 0, 7),
		(`^x(abc){,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1),
		(`^x(abc){1,}$`, "xabc", matchres::MATCH, 0, -1),
		(`^x(abc){1,}$`, "xabcabc", matchres::MATCH, 0, -1),
		(`^x(abc){3,}$`, "xabcabc", matchres::NOMATCH, 0, -1),
@@ -481,6 +456,7 @@ fn run_findall_case(
		// (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1),
		// (`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1),
		// TODO: nested capture groups
		(`((a))`, "abc", matchres::ERROR, 0, -1),
		// (`((a))`, "abc", matchres::MATCH, 0, -1),
		// (`((a)(b)c)(d)`, "abcd", matchres::MATCH, 0, -1),
		// (`(bc+d$|ef*g.|h?i(j|k))`, "effgz", matchres::MATCH, 0, -1),
diff --git a/regex/README b/regex/README
index 276c1091..ea0a3513 100644
--- a/regex/README
+++ b/regex/README
@@ -20,10 +20,10 @@ the longest match among the leftmost matches.
	const re = regex::compile(`[Hh]are`)!;
	defer regex::finish(&re);

	const does_match = regex::test(&re, "Hello Hare, hello Hare.")!;
	const does_match = regex::test(&re, "Hello Hare, hello Hare.");
	fmt::printfln("matched? {}", does_match)!;

	const first_match = regex::find(&re, "Hello Hare, hello Hare.")!;
	const first_match = regex::find(&re, "Hello Hare, hello Hare.");
	match (first_match) {
	case void => void;
	case let captures: []regex::capture =>
@@ -35,7 +35,7 @@ the longest match among the leftmost matches.
			captures[0].end)!;
	};

	const all_matches = regex::findall(&re, "Hello Hare, hello Hare.")!;
	const all_matches = regex::findall(&re, "Hello Hare, hello Hare.");
	match (all_matches) {
	case void => void;
	case let matches: [][]regex::capture =>
diff --git a/regex/regex.ha b/regex/regex.ha
index 0d6bc27e..0ccc89f7 100644
--- a/regex/regex.ha
+++ b/regex/regex.ha
@@ -90,7 +90,6 @@ const charclass_fns: [](charclass, *fn(c: rune) bool) = [
	(charclass::UPPER, &ascii::isupper),
	(charclass::XDIGIT, &ascii::isxdigit),
];
const multibyte_err: error = "Character ranges do not support characters larger than one byte.";

export type regex = struct {
	insts: []inst,
@@ -188,15 +187,11 @@ fn handle_bracket(
		};
	} else if (is_range) {
		const start_enc = utf8::encoderune(r);
		if (len(start_enc) > 1) {
			return multibyte_err;
		};
		assert(len(start_enc) == 1, "Character ranges do not currently support characters larger than one byte");
		const start_b = start_enc[0];

		const end_enc = utf8::encoderune(range_end as rune);
		if (len(end_enc) > 1) {
			return multibyte_err;
		};
		assert(len(end_enc) == 1, "Character ranges do not currently support characters larger than one byte");
		const end_b = end_enc[0];

		if (end_b < start_b) {
@@ -285,6 +280,9 @@ export fn compile(expr: str) (regex | error) = {
				append(insts, r: inst_lit);
			};
		case '(' =>
			if (n_groupstarts > 0) {
				return "Found nested capture groups in expression, which are not supported": error;
			};
			append(insts, void: inst_groupstart);
			n_groupstarts += 1;
		case ')' =>
@@ -441,6 +439,8 @@ fn parse_repetition(
			};
		case => return "Invalid repetition minimum value": error;
		};
	} else {
		min = 0;
	};

	if (len(max_str) > 0) {
@@ -455,10 +455,6 @@ fn parse_repetition(
		};
	};

	if (len(min_str) == 0 && len(max_str) > 0) {
		return "Invalid repetition minimum value": error;
	};

	const rep_len = if (is_single_arg) {
		yield len(min_str);
	} else {
@@ -509,7 +505,7 @@ fn run_thread(
	threads: *[]thread,
	r_or_end: (rune | void),
	str_idx: int
) (void | error | newmatch) = {
) (void | newmatch) = {
	if (threads[i].matched) {
		return;
	};
@@ -546,16 +542,12 @@ fn run_thread(
			threads[i].matched = true;
			return newmatch;
		case inst_groupstart =>
			if (threads[i].curr_capture_inited) {
				return "Found nested capture groups in expression, which are not supported": error;
			};
			assert(!threads[i].curr_capture_inited, "Found nested capture groups in expression, which are not supported");
			threads[i].curr_capture.start = str_idx: size;
			threads[i].curr_capture_inited = true;
			threads[i].pc += 1;
		case inst_groupend =>
			if (!threads[i].curr_capture_inited) {
				return `Found a groupend token ")" without having previously seen a groupstart token "("`: error;
			};
			assert(threads[i].curr_capture_inited, `Found a groupend token ")" without having previously seen a groupstart token "(". Please report this as a bug`);
			threads[i].curr_capture.end = str_idx: size;
			// TODO: This is a perf issue for large strings
			threads[i].curr_capture.content = strings::sub(string,
@@ -615,9 +607,7 @@ fn run_thread(
			};
		case let range: charset_range_item =>
			const r_enc = utf8::encoderune(r);
			if (len(r_enc) > 1) {
				return multibyte_err;
			};
			assert(len(r_enc) == 1, "Character ranges do not currently support characters larger than one byte");
			const r_b = r_enc[0];
			if (r_b >= range.0 && r_b <= range.1) {
				// Succeeded if positive match
@@ -653,7 +643,7 @@ fn search(
	str_iter: *strings::iterator,
	str_idx: *int,
	need_captures: bool
) (void | []capture | error) = {
) (void | []capture) = {
	let threads: []thread = alloc([
		thread { captures = alloc([]), ... }
	]);
@@ -712,7 +702,7 @@ fn search(

		for (let i = 0z; i < len(threads); i += 1) {
			const res = run_thread(i, re, string, &threads,
				r_or_end, *str_idx)?;
				r_or_end, *str_idx);
			const matchlen = threads[i].root_capture.end
				- threads[i].root_capture.start;
			if (res is newmatch && matchlen > 0 && !need_captures) {
@@ -773,10 +763,10 @@ fn search(
};

// Returns whether or not a regex matches a string.
export fn test(re: *regex, string: str) (bool | error) = {
export fn test(re: *regex, string: str) bool = {
	let str_idx = -1;
	let str_iter = strings::iter(string);
	match (search(re, string, &str_iter, &str_idx, false)?) {
	match (search(re, string, &str_iter, &str_idx, false)) {
	case void => return false;
	case []capture => return true;
	};
@@ -785,7 +775,7 @@ export fn test(re: *regex, string: str) (bool | error) = {

// Attempts to match a regular expression against a string and returns the
// longest leftmost match, or void if there is no match.
export fn find(re: *regex, string: str) (void | []capture | error) = {
export fn find(re: *regex, string: str) (void | []capture) = {
	let str_idx = -1;
	let str_iter = strings::iter(string);
	return search(re, string, &str_iter, &str_idx, true);
@@ -793,12 +783,12 @@ export fn find(re: *regex, string: str) (void | []capture | error) = {

// Attempts to match a regular expression against a string and returns all
// non-overlapping matches, or void if there are no matches.
export fn findall(re: *regex, string: str) (void | [][]capture | error) = {
export fn findall(re: *regex, string: str) (void | [][]capture) = {
	let res: [][]capture = alloc([]);
	let str_idx = -1;
	let str_iter = strings::iter(string);
	for (true) {
		const findres = search(re, string, &str_iter, &str_idx, true)?;
		const findres = search(re, string, &str_iter, &str_idx, true);
		match (findres) {
		case let m: []capture =>
			append(res, m);
-- 
2.32.0

[hare/patches] build success

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CJZMJ4S12TWH.14H4M4A3LJLAR@cirno>
In-Reply-To
<20220514162502.8713-3-vlad@vladh.net> (view parent)
DKIM signature
missing
Download raw message
hare/patches: SUCCESS in 1m29s

[regex: rename regex_finish to finish][0] from [Vlad-Stefan Harbuz][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/32275
[1]: vlad@vladh.net

✓ #758186 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/758186
✓ #758185 SUCCESS hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/758185

Re: [PATCH hare 4/4] ioctlgen: update with regex error handling changes

Details
Message ID
<CJZMJYA0R9YX.1FIVZSZRKG0IF@taiga>
In-Reply-To
<20220514162502.8713-4-vlad@vladh.net> (view parent)
DKIM signature
fail
Download raw message
DKIM signature: fail
Thanks!

To git@git.sr.ht:~sircmpwn/hare
   cc91bb81..a0ecfc62  master -> master
Reply to thread Export thread (mbox)