Per the POSIX ERE specification[0]:
> The <hyphen-minus> character shall be treated as itself if it occurs
first (after an initial '^', if any) or last in the list, or as an
ending range point in a range expression.
0: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
Implements: https://todo.sr.ht/~sircmpwn/hare/624
Signed-off-by: Nolan Prescott <mail@nprescott.com>
---
regex/+test.ha | 8 ++++++++
regex/regex.ha | 3 ++-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/regex/+test.ha b/regex/+test.ha
index d28b41cc..d8e91e5c 100644
--- a/regex/+test.ha
+++ b/regex/+test.ha
@@ -351,6 +351,14 @@ fn run_findall_case(
"M15 4QN",
matchres::MATCH, 0, -1
),
+ (`^[^-a]`, "-bcd", matchres::NOMATCH, 0, 0),
+ (`^[-a]`, "-bcd", matchres::MATCH, 0, 1),
+ (`[^ac-]`, "bde", matchres::MATCH, 0, 1),
+ (`[-ac]`, "foo-de", matchres::MATCH, 3, 4),
+ (`[-ac]`, "def", matchres::NOMATCH, 0, 0),
+ (`foo[-ac]bar`, "foo-bar", matchres::MATCH, 0, 7),
+ (`[ac-]$`, "bde-", matchres::MATCH, 3, 4),
+ (`^[A-Za-z_-]+$`, "foo", matchres::MATCH, 0, 3),
// tests from perl
(`abc`, "abc", matchres::MATCH, 0, -1),
(`abc`, "xbc", matchres::NOMATCH, 0, 0),
diff --git a/regex/regex.ha b/regex/regex.ha
index 0a7b36a0..89f3ce9a 100644
--- a/regex/regex.ha
+++ b/regex/regex.ha
@@ -154,7 +154,8 @@ fn handle_bracket(
};
const is_range = peek1 is rune && peek1 as rune == '-'
- && !(peek2 is void) && !(peek3 is void);
+ && !(peek2 is void) && !(peek3 is void)
+ && !(peek2 as rune == ']');
const range_end = peek2;
const is_first_char = *bracket_idx == 0 || *bracket_idx == 1
&& !*is_charset_positive;
--
2.32.0