[PATCH hare v3] regex: allow ^ at start of every whole-expression alternation
Export this patch
Signed-off-by: Max Schillinger <max@mxsr.de>
---
Implements Vlad's suggestion on v2.
regex/+test.ha | 7 +++++++
regex/regex.ha | 17 ++++++++++++++---
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
index f5db579b..279eeea7 100644
--- a/regex/+test.ha
+++ b/regex/+test.ha
@@ -585,6 +585,9 @@ fn run_rawreplace_case(
(`ab$|cd`, "ab", matchres::MATCH, 0, 2),
(`ab$|cd`, "abc", matchres::NOMATCH, 0, 0),
(`ab|cd$`, "cde", matchres::NOMATCH, 0, 0),
+ (`ab|^cd`, "bcd", matchres::NOMATCH, 0, 0),
+ (`ab|^cd`, "cde", matchres::MATCH, 0, 2),
+ (`ab\|^cd`, "cde", matchres::ERROR, 0, 0),
// multiple alternation
(`a|b|c|d|e`, "e", matchres::MATCH, 0, -1),
(`a|b|c|d|e`, "xe", matchres::MATCH, 1, -1),
@@ -593,6 +596,10 @@ fn run_rawreplace_case(
(`a|b$|c$|d$|e`, "ax", matchres::MATCH, 0, 1),
(`a|b$|c$|d$|e`, "cx", matchres::NOMATCH, 0, 0),
(`a|b$|c$|d$|e`, "ex", matchres::MATCH, 0, 1),
+ (`a|^b|^c|^d|e`, "cd", matchres::MATCH, 0, 1),
+ (`a|^b|^c|^d|e`, "xa", matchres::MATCH, 1, 2),
+ (`a|^b|^c|^d|e`, "xc", matchres::NOMATCH, 0, 0),
+ (`a|^b|^c|^d|e`, "xe", matchres::MATCH, 1, 2),
// TODO: nested capture groups
(`((a))`, "abc", matchres::ERROR, 0, -1),
// (`((a))`, "abc", matchres::MATCH, 0, -1),
diff --git a/regex/regex.ha b/regex/regex.ha
index aabc5cbc..540c11fb 100644
--- a/regex/regex.ha
+++ b/regex/regex.ha
@@ -227,6 +227,7 @@ export fn compile(expr: str) (regex | error) = {
let skip_charclass_rest = false;
let bracket_idx = -1;
let is_charset_positive = true;
+ let was_prev_rune_pipe = false;
let n_reps = 0z;
let n_groupstarts = 0;
@@ -267,8 +268,11 @@ export fn compile(expr: str) (regex | error) = {
r_idx += 1;
};
case '^' =>
- if (r_idx != 0) {
- return `Anchor '^' not at start`: error;
+ if (n_groupstarts > 0) {
+ return `Anchor '^' in capture groups is unsupported`: error;
+ };
+ if (!(r_idx == 0 || was_prev_rune_pipe)) {
+ return `Anchor '^' not at start of whole pattern or alternation`: error;
};
case '$' =>
if (n_groupstarts > 0) {
@@ -319,7 +323,13 @@ export fn compile(expr: str) (regex | error) = {
append(jump_idxs, len(insts) - 1);
// add skip if it's a whole-expression alternation
if (origin == 0) {
- append(insts, inst_skip);
+ const peek1 = strings::next(&iter);
+ if (peek1 is rune) {
+ if (peek1 as rune != '^') {
+ append(insts, inst_skip);
+ };
+ strings::prev(&iter);
+ };
};
case '{' =>
let origin = len(insts) - 1;
@@ -408,6 +418,7 @@ export fn compile(expr: str) (regex | error) = {
case =>
append(insts, r: inst_lit);
};
+ was_prev_rune_pipe = (r == '|');
r_idx += 1;
};
--
2.45.2
hare/patches: FAILED in 1m20s
[regex: allow ^ at start of every whole-expression alternation][0] v3 from [Max Schillinger][1]
[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/54013
[1]: mailto:max@mxsr.de
✗ #1280954 FAILED hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1280954
✗ #1280955 FAILED hare/patches/netbsd.yml https://builds.sr.ht/~sircmpwn/job/1280955
✗ #1280953 FAILED hare/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/1280953
✗ #1280956 FAILED hare/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1280956
Looks great, thank you! :) Applied both patches.
to git@git.sr.ht:~sircmpwn/hare
2e40a947..dc65431e master -> master