Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
regex/README | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/regex/README b/regex/README
index ee43c1db..3af18b1d 100644
--- a/regex/README+++ b/regex/README
@@ -24,8 +24,8 @@ the longest match among the leftmost matches.
case void => void;
case let groups: []regex::matchgroup =>
defer free(groups);
- // The match groups provide the content, start index and end index of- // the main match, as well as all submatches.+ // The match groups provide the content, start index and end+ // index of the main match, as well as all submatches. };
const all_matches = regex::findall(&re, "Hello Hare, hello Hare.")!;
@@ -33,8 +33,8 @@ the longest match among the leftmost matches.
case void => void;
case let groupsets: [][]regex::matchgroup =>
defer regex::freeall(groupsets);
- // A slice of multiple match group sets, which can be used similarly- // to the find() example.+ // A slice of multiple match group sets, which can be used+ // similarly to the find() example. };
[0]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04
--
2.32.0
[PATCH hare v2 3/9] regex: add some printing to README example to make usage clearer
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
regex/README | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/regex/README b/regex/README
index 3af18b1d..ac755eb5 100644
--- a/regex/README+++ b/regex/README
@@ -26,6 +26,9 @@ the longest match among the leftmost matches.
defer free(groups);
// The match groups provide the content, start index and end
// index of the main match, as well as all submatches.
+ fmt::printfln("{} ({}, {})", groups[0].content,+ groups[0].start,+ groups[0].end)!; };
const all_matches = regex::findall(&re, "Hello Hare, hello Hare.")!;
@@ -35,6 +38,11 @@ the longest match among the leftmost matches.
defer regex::freeall(groupsets);
// A slice of multiple match group sets, which can be used
// similarly to the find() example.
+ for (let i = 0z; i < len(groupsets); i += 1) {+ fmt::printfln("{} ({}, {})", groupsets[i][0].content,+ groupsets[i][0].start,+ groupsets[i][0].end)!;+ }; };
[0]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04
--
2.32.0
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
regex/+test.ha | 8 ++---regex/README | 41 ++++++++++++------------regex/regex.ha | 84 +++++++++++++++++++++++++-------------------------
3 files changed, 68 insertions(+), 65 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
index 82e66394..5a5f5c4c 100644
--- a/regex/+test.ha+++ b/regex/+test.ha
@@ -41,7 +41,7 @@ fn run_find_case(
expr, string);
};
- case let m: []matchgroup =>+ case let m: []capture => if (expected == matchres::NOMATCH) {
fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did",
expr, string);
@@ -51,11 +51,11 @@ fn run_find_case(
expr, string);
};
if (start: size != m[0].start) {
- fmt::fatalf("Expected start of main match group to be {} but it was {}",+ fmt::fatalf("Expected start of main capture to be {} but it was {}", start, m[0].start);
};
if (end: size != m[0].end) {
- fmt::fatalf("Expected end of main match group to be {} but it was {}",+ fmt::fatalf("Expected end of main capture to be {} but it was {}", end, m[0].end);
};
@@ -104,7 +104,7 @@ fn run_findall_case(
expr, string);
};
- case let groupsets: [][]matchgroup =>+ case let groupsets: [][]capture => if (expected == matchres::NOMATCH) {
fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did",
expr, string);
diff --git a/regex/README b/regex/README
index ac755eb5..56c3643d 100644
--- a/regex/README+++ b/regex/README
@@ -6,12 +6,13 @@ By default, matches will be found anywhere in the given string. The ^ and $
characters can be used to anchor the match to the beginning or end of the
string.
-find() returns a slice of [[matchgroup]]s for the first match. The first-[[matchgroup]] represents the entire match, while the rest represent the-submatches, specified in the expression using (parens).+find() returns a slice of [[capture]]s for the first match. The first+[[capture]] represents the entire matching string, while the rest represent the+matching substrings for the subexpressions, specified in the regular expression+using parentheses.findall() finds all non-overlapping matches in the given string and returns
-a slice of slices of [[matchgroup]]s.+a slice of slices of [[capture]]s.This module implements the POSIX match disambiguation rules by returning
the longest match among the leftmost matches.
@@ -22,26 +23,28 @@ the longest match among the leftmost matches.
const first_match = regex::find(&re, "Hello Hare, hello Hare.")!;
match (first_match) {
case void => void;
- case let groups: []regex::matchgroup =>- defer free(groups);- // The match groups provide the content, start index and end- // index of the main match, as well as all submatches.- fmt::printfln("{} ({}, {})", groups[0].content,- groups[0].start,- groups[0].end)!;+ case let captures: []regex::capture =>+ defer free(captures);+ // captures[0]: The full matching string.+ // captures[1...]: A capture for every capture group.+ fmt::printfln("{} ({}, {})", captures[0].content,+ captures[0].start,+ captures[0].end)!; };
const all_matches = regex::findall(&re, "Hello Hare, hello Hare.")!;
match (all_matches) {
case void => void;
- case let groupsets: [][]regex::matchgroup =>- defer regex::freeall(groupsets);- // A slice of multiple match group sets, which can be used- // similarly to the find() example.- for (let i = 0z; i < len(groupsets); i += 1) {- fmt::printfln("{} ({}, {})", groupsets[i][0].content,- groupsets[i][0].start,- groupsets[i][0].end)!;+ case let matches: [][]regex::capture =>+ defer regex::freeall(matches);+ // matches[0]: All captures for the first match.+ // matches[0][0]: The full matching string for the first match.+ // matches[0][1...]: A capture for every capture group in the+ // first match.+ for (let i = 0z; i < len(matches); i += 1) {+ fmt::printfln("{} ({}, {})", matches[i][0].content,+ matches[i][0].start,+ matches[i][0].end)!; };
};
diff --git a/regex/regex.ha b/regex/regex.ha
index 3804ec11..ea6baf6f 100644
--- a/regex/regex.ha+++ b/regex/regex.ha
@@ -31,7 +31,7 @@ export type inst = (inst_lit | inst_any | inst_split | inst_jump |
inst_repeat);
// A (sub)match found as a result of matching a certain string against a regex.
-export type matchgroup = struct {+export type capture = struct { content: str,
start: size,
end: size,
@@ -40,10 +40,10 @@ export type matchgroup = struct {
type thread = struct {
pc: size,
start_idx: size,
- root_group: matchgroup,- groups: []matchgroup,- curr_group: matchgroup,- curr_group_inited: bool,+ root_capture: capture,+ captures: []capture,+ curr_capture: capture,+ curr_capture_inited: bool, rep_counters: []size,
matched: bool,
failed: bool,
@@ -468,7 +468,7 @@ fn parse_repetition(
};
fn delete_thread(i: size, threads: *[]thread) void = {
- free(threads[i].groups);+ free(threads[i].captures); free(threads[i].rep_counters);
delete(threads[i]);
};
@@ -492,11 +492,11 @@ fn add_thread(threads: *[]thread, parent_idx: size, new_pc: size) void = {
append(threads, thread {
pc = new_pc,
start_idx = threads[parent_idx].start_idx,
- curr_group = threads[parent_idx].curr_group,- curr_group_inited = threads[parent_idx].curr_group_inited,+ curr_capture = threads[parent_idx].curr_capture,+ curr_capture_inited = threads[parent_idx].curr_capture_inited, matched = threads[parent_idx].matched,
failed = threads[parent_idx].failed,
- groups = alloc(threads[parent_idx].groups...),+ captures = alloc(threads[parent_idx].captures...), rep_counters = alloc(threads[parent_idx].rep_counters...),
...
});
@@ -535,7 +535,7 @@ fn run_thread(
threads[i].failed = true;
return;
};
- threads[i].root_group = matchgroup {+ threads[i].root_capture = capture { start = threads[i].start_idx,
end = str_idx: size,
// TODO: This is a perf issue for large strings
@@ -546,24 +546,24 @@ fn run_thread(
threads[i].matched = true;
return newmatch;
case inst_groupstart =>
- if (threads[i].curr_group_inited) {+ if (threads[i].curr_capture_inited) { return "Found nested capture groups in expression, which are not supported": error;
};
- threads[i].curr_group.start = str_idx: size;- threads[i].curr_group_inited = true;+ threads[i].curr_capture.start = str_idx: size;+ threads[i].curr_capture_inited = true; threads[i].pc += 1;
case inst_groupend =>
- if (!threads[i].curr_group_inited) {+ if (!threads[i].curr_capture_inited) { return `Found a groupend token ")" without having previously seen a groupstart token "("`: error;
};
- threads[i].curr_group.end = str_idx: size;+ threads[i].curr_capture.end = str_idx: size; // TODO: This is a perf issue for large strings
- threads[i].curr_group.content = strings::sub(string,- threads[i].curr_group.start,- threads[i].curr_group.end);- append(threads[i].groups, threads[i].curr_group);- threads[i].curr_group = matchgroup { ... };- threads[i].curr_group_inited = false;+ threads[i].curr_capture.content = strings::sub(string,+ threads[i].curr_capture.start,+ threads[i].curr_capture.end);+ append(threads[i].captures, threads[i].curr_capture);+ threads[i].curr_capture = capture { ... };+ threads[i].curr_capture_inited = false; threads[i].pc += 1;
case let ir: inst_repeat =>
assert(ir.id < len(threads[i].rep_counters));
@@ -652,16 +652,16 @@ fn search(
string: str,
str_iter: *strings::iterator,
str_idx: *int
-) (void | []matchgroup | error) = {+) (void | []capture | error) = { let threads: []thread = alloc([
- thread { groups = [], ... }+ thread { captures = [], ... } ]);
if (re.n_reps > 0) {
threads[0].rep_counters = alloc([0...], re.n_reps);
};
defer {
for (let i = 0z; i < len(threads); i += 1) {
- free(threads[i].groups);+ free(threads[i].captures); free(threads[i].rep_counters);
};
free(threads);
@@ -684,25 +684,25 @@ fn search(
if (all_matched) {
let best_len = 0z;
- let best_n_groups = 0z;+ let best_n_captures = 0z; let best_idx = 0z;
for (let i = 0z; i < len(threads); i += 1) {
- let match_len = threads[i].root_group.end- - threads[i].root_group.start;+ let match_len = threads[i].root_capture.end+ - threads[i].root_capture.start; const is_better = match_len > best_len
|| match_len == best_len
- && len(threads[i].groups)- > best_n_groups;+ && len(threads[i].captures)+ > best_n_captures; if (is_better) {
best_len = match_len;
best_idx = i;
- best_n_groups = len(threads[i].groups);+ best_n_captures = len(threads[i].captures); };
};
- let res: []matchgroup = alloc([],- len(threads[best_idx].groups) + 1);- append(res, threads[best_idx].root_group);- append(res, threads[best_idx].groups...);+ let res: []capture = alloc([],+ len(threads[best_idx].captures) + 1);+ append(res, threads[best_idx].root_capture);+ append(res, threads[best_idx].captures...); return res;
};
@@ -712,8 +712,8 @@ fn search(
for (let i = 0z; i < len(threads); i += 1) {
const res = run_thread(i, re, string, &threads,
r_or_end, *str_idx)?;
- const matchlen = threads[i].root_group.end- - threads[i].root_group.start;+ const matchlen = threads[i].root_capture.end+ - threads[i].root_capture.start; const is_better = res is newmatch && matchlen > 0
&& (first_match_idx is void
|| threads[i].start_idx
@@ -771,7 +771,7 @@ fn search(
// Attempts to match a regular expression against a string and returns the
// longest leftmost match, or void if there is no match.
-export fn find(re: *regex, string: str) (void | []matchgroup | error) = {+export fn find(re: *regex, string: str) (void | []capture | error) = { let str_idx = -1;
let str_iter = strings::iter(string);
return search(re, string, &str_iter, &str_idx);
@@ -779,14 +779,14 @@ export fn find(re: *regex, string: str) (void | []matchgroup | error) = {
// Attempts to match a regular expression against a string and returns all
// non-overlapping matches, or void if there are no matches.
-export fn findall(re: *regex, string: str) (void | [][]matchgroup | error) = {- let res: [][]matchgroup = [];+export fn findall(re: *regex, string: str) (void | [][]capture | error) = {+ let res: [][]capture = []; let str_idx = -1;
let str_iter = strings::iter(string);
for (true) {
const findres = search(re, string, &str_iter, &str_idx)?;
match (findres) {
- case let m: []matchgroup =>+ case let m: []capture => append(res, m);
assert(str_idx: size >= m[0].end);
for (str_idx: size > m[0].end) {
@@ -805,8 +805,8 @@ export fn findall(re: *regex, string: str) (void | [][]matchgroup | error) = {
return res;
};
-// Frees all the matches in a slice and the slice itself.-export fn freeall(s: [][]matchgroup) void = {+// Frees each match in a slice of matches, as well as the slice itself.+export fn freeall(s: [][]capture) void = { for (let i = 0z; i < len(s); i += 1) {
free(s[i]);
};
--
2.32.0
[PATCH hare v2 6/9] regex: update free_*() functions
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
---
regex/README | 4 ++--regex/regex.ha | 9 +++++++--
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/regex/README b/regex/README
index 56c3643d..52d62000 100644
--- a/regex/README+++ b/regex/README
@@ -24,7 +24,7 @@ the longest match among the leftmost matches.
match (first_match) {
case void => void;
case let captures: []regex::capture =>
- defer free(captures);+ defer regex::free_captures(captures); // captures[0]: The full matching string.
// captures[1...]: A capture for every capture group.
fmt::printfln("{} ({}, {})", captures[0].content,
@@ -36,7 +36,7 @@ the longest match among the leftmost matches.
match (all_matches) {
case void => void;
case let matches: [][]regex::capture =>
- defer regex::freeall(matches);+ defer regex::free_matches(matches); // matches[0]: All captures for the first match.
// matches[0][0]: The full matching string for the first match.
// matches[0][1...]: A capture for every capture group in the
diff --git a/regex/regex.ha b/regex/regex.ha
index aa80b3bb..21e4bd6d 100644
--- a/regex/regex.ha+++ b/regex/regex.ha
@@ -805,10 +805,15 @@ export fn findall(re: *regex, string: str) (void | [][]capture | error) = {
return res;
};
+// Frees a slice of captures.+export fn free_captures(s: []capture) void = {+ free(s);+};+// Frees each match in a slice of matches, as well as the slice itself.
-export fn freeall(s: [][]capture) void = {+export fn free_matches(s: [][]capture) void = { for (let i = 0z; i < len(s); i += 1) {
- free(s[i]);+ free_captures(s[i]); };
free(s);
};
--
2.32.0