~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
3 2

[PATCH hare] regex: implement multiple alternation

Details
Message ID
<20240415154809.11041-2-max@mxsr.de>
DKIM signature
pass
Download raw message
Patch: +17 -15
Example: The regex pattern `(ha|py|sh)` is implemented using the
following Hare Regular Expression Engine Virtual Machine NFA
Representation:

 0 groupstart
 1 split → 5 (split)
 2 lit h
 3 lit a
 4 jump → 11 (groupend)
 5 split → 9 (s)
 6 lit p
 7 lit y
 8 jump → 11 (groupend)
 9 lit s
10 lit h
11 groupend

Implements: https://todo.sr.ht/~sircmpwn/hare/696
Signed-off-by: Max Schillinger <max@mxsr.de>
---
 regex/+test.ha |  6 +++---
 regex/regex.ha | 26 ++++++++++++++------------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha
index 45600e0d..19a10e81 100644
--- a/regex/+test.ha
+++ b/regex/+test.ha
@@ -581,9 +581,9 @@ fn run_rawreplace_case(
		(`ab|cd`, "cd", matchres::MATCH, 0, 2),
		(`ab|cd`, "abc", matchres::MATCH, 0, 2),
		(`ab|cd`, "abcd", matchres::MATCH, 0, 2),
		// TODO: multiple alternation
		// (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1),
		// (`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1),
		// multiple alternation
		(`a|b|c|d|e`, "e", matchres::MATCH, 0, -1),
		(`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1),
		// TODO: nested capture groups
		(`((a))`, "abc", matchres::ERROR, 0, -1),
		// (`((a))`, "abc", matchres::MATCH, 0, -1),
diff --git a/regex/regex.ha b/regex/regex.ha
index d0140feb..14f3db09 100644
--- a/regex/regex.ha
+++ b/regex/regex.ha
@@ -223,7 +223,7 @@ export fn compile(expr: str) (regex | error) = {
	let iter = strings::iter(expr);
	let r_idx = 0z;
	let anchored = false;
	let curr_alt_jump_idx = -1;
	let jump_idxs: []size = [];
	let in_bracket = false;
	let skip_charclass_rest = false;
	let bracket_idx = -1;
@@ -292,12 +292,11 @@ export fn compile(expr: str) (regex | error) = {
			};
			n_groupstarts -= 1;
			append(insts, void: inst_groupend);
			if (curr_alt_jump_idx != -1) {
				assert(insts[curr_alt_jump_idx] is inst_jump);
				insts[curr_alt_jump_idx] =
					(len(insts) - 1): inst_jump;
				curr_alt_jump_idx = -1;
			for (let jump_idx .. jump_idxs) {
				assert(insts[jump_idx] is inst_jump);
				insts[jump_idx] = (len(insts) - 1): inst_jump;
			};
			jump_idxs = [];
		case '|' =>
			append(insts, types::SIZE_MAX: inst_jump);
			const origin = match (find_last_groupstart(&insts)) {
@@ -307,8 +306,11 @@ export fn compile(expr: str) (regex | error) = {
				yield sz + 1;
			};
			const newinst = (len(insts) + 1): inst_split;
			insert(insts[origin], newinst);
			curr_alt_jump_idx = (len(insts) - 1): int;
			// add split after last jump (if any) or at origin
			const split_idx = if (len(jump_idxs) > 0)
				jump_idxs[len(jump_idxs) - 1] + 1 else origin;
			insert(insts[split_idx], newinst);
			append(jump_idxs, len(insts) - 1);
		case '{' =>
			let origin = len(insts) - 1;
			if (insts[origin] is inst_groupend) {
@@ -400,11 +402,11 @@ export fn compile(expr: str) (regex | error) = {
	};

	// handle whole expression alternation
	if (curr_alt_jump_idx != -1) {
		assert(insts[curr_alt_jump_idx] is inst_jump);
		insts[curr_alt_jump_idx] = len(insts): inst_jump;
		curr_alt_jump_idx = -1;
	for (let jump_idx .. jump_idxs) {
		assert(insts[jump_idx] is inst_jump);
		insts[jump_idx] = len(insts): inst_jump;
	};
	jump_idxs = [];

	append(insts, anchored: inst_match);

-- 
2.44.0

[hare/patches] build failed

builds.sr.ht <builds@sr.ht>
Details
Message ID
<D0KTD6C7WPSB.1JI81BOPLMHDS@fra01>
In-Reply-To
<20240415154809.11041-2-max@mxsr.de> (view parent)
DKIM signature
missing
Download raw message
hare/patches: FAILED in 57s

[regex: implement multiple alternation][0] from [Max Schillinger][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/51032
[1]: max@mxsr.de

✗ #1196487 FAILED  hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1196487
✓ #1196488 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1196488
✓ #1196489 SUCCESS hare/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1196489

[hare/patches] build failed

builds.sr.ht <builds@sr.ht>
Details
Message ID
<D0KTD6D0O2U5.3TLVG3O4P3YWZ@fra01>
In-Reply-To
<20240415154809.11041-2-max@mxsr.de> (view parent)
DKIM signature
missing
Download raw message
hare/patches: FAILED in 57s

[regex: implement multiple alternation][0] from [Max Schillinger][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/51032
[1]: max@mxsr.de

✗ #1196487 FAILED  hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1196487
✓ #1196488 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1196488
✓ #1196489 SUCCESS hare/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1196489
Details
Message ID
<D0KTP6NPCGGV.30X8IUPHJ7I10@vladh.net>
In-Reply-To
<20240415154809.11041-2-max@mxsr.de> (view parent)
DKIM signature
pass
Download raw message
Perfect, thank you! :)

to git@git.sr.ht:~sircmpwn/hare
  51d4feb9..bf1e316a  master -> master
Reply to thread Export thread (mbox)