Alexey Yerin: 1 himtisu::query: rewrite parser to mitigate key name restrictions 1 files changed, 185 insertions(+), 39 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~sircmpwn/himitsu-devel/patches/33046/mbox | git am -3Learn more about email & git
It allows non-ASCII characters, '[]', '()', etc in keys. TODO: fix up key serialization if they have a '!' in them. --- himitsu/query/parse.ha | 224 ++++++++++++++++++++++++++++++++++------- 1 file changed, 185 insertions(+), 39 deletions(-) diff --git a/himitsu/query/parse.ha b/himitsu/query/parse.ha index a936042..fe1ec23 100644 --- a/himitsu/query/parse.ha +++ b/himitsu/query/parse.ha @@ -1,21 +1,9 @@ use bufio; use encoding::utf8; +use fmt; use io; -use regex; -use shlex; use strings; -use fmt; - -let keyre: regex::regex = regex::regex { ... }; - -@init fn init() void = { - // Yes, this is a valid POSIX regular expression - keyre = regex::compile(`^[]A-Za-z-_[]+$`)!; -}; - -@fini fn fini() void = { - regex::finish(&keyre); -}; +use strio; // A parsed Himitsu query. export type query = struct { @@ -37,6 +25,7 @@ export type invalid = !void; // return value to [[finish]] when they are done with it. export fn parse(in: io::handle) (query | invalid | io::error) = { const data = io::drain(in)?; + defer free(data); const data = match (strings::try_fromutf8(data)) { case let data: str => yield data; @@ -44,44 +33,182 @@ export fn parse(in: io::handle) (query | invalid | io::error) = { return invalid; }; - const items = match (shlex::split(data)) { - case let items: []str => - yield items; - case shlex::syntaxerr => - return invalid; + let query = query { ... }; + let iter = strings::iter(data); + for (true) { + match (parse_item(&iter, &query, false)?) { + case void => + yield; + case io::EOF => + break; + }; }; - defer strings::freeall(items); - return parse_items(items); + return query; }; // Parses a list of key/value pairs which has already been split with shlex (or // a shell, for example when parsing a query from argv). export fn parse_items(items: []str) (query | invalid) = { - // XXX: Should do something about the case where the user specifies both - // ? and ! let query = query { ... }; for (let i = 0z; i < len(items); i += 1) { - const (key, value) = strings::cut(items[i], "="); - let optional = false, private = false; - if (strings::hassuffix(key, "!")) { - private = true; + const item = strings::trim(items[i]); + if (len(item) == 0) { + continue; }; - if (strings::hassuffix(key, "?")) { + let iter = strings::iter(item); + parse_item(&iter, &query, true)? as void; + }; + return query; +}; + +fn parse_item( + iter: *strings::iterator, + q: *query, + split: bool, +) (void | io::EOF | invalid) = { + // XXX: Should do something about the case where the user specifies both + // ? and ! + const key = match (parse_value(iter, true, split)?) { + case let s: str => + yield s; + case io::EOF => + return io::EOF; + }; + let optional = false, private = false; + match (strings::next(iter)) { + case let r: rune => + switch (r) { + case '!' => + private = true; + case '?' => optional = true; + case => + strings::prev(iter); + }; + case void => + yield; + }; + const value = match (strings::next(iter)) { + case let r: rune => + yield if (r == '=') { + yield parse_value(iter, false, split)? as str; + } else if (!split && r == ' ' || r == '\t') { + strings::prev(iter); + yield ""; + } else { + return invalid; + }; + case void => + yield ""; + }; + + append(q.items, pair { + key = key, + value = value, + private = private, + optional = optional, + }); +}; + +// Minimal version of shlex::split +fn parse_value( + iter: *strings::iterator, + key: bool, + split: bool, +) (str | io::EOF | invalid) = { + const buf = strio::dynamic(); + defer io::close(&buf)!; + for (true) { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => + if (key && len(strio::string(&buf)) == 0) { + return io::EOF; + }; + break; + }; + if (key && r == '!' || r == '?' || r == '=') { + strings::prev(iter); + break; }; - key = strings::trim(key, '?', '!'); - if (!regex::test(&keyre, key)) { + + switch (r) { + case ' ', '\t' => + if (split) { + strio::appendrune(&buf, r)!; + } else if (key) { + for (true) match (strings::next(iter)) { + case let r: rune => + if (r != ' ' && r != '\t') { + strings::prev(iter); + break; + }; + case void => + break; + }; + continue; + } else { + strings::prev(iter); + break; + }; + case '\\' => + scan_backslash(&buf, iter)?; + case '"' => + scan_double(&buf, iter)?; + case '\'' => + scan_single(&buf, iter)?; + case => + strio::appendrune(&buf, r)!; + }; + }; + return strings::dup(strio::string(&buf)); +}; + +fn scan_double(out: io::handle, iter: *strings::iterator) (void | invalid) = { + for (true) { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => return invalid; }; - append(query.items, pair { - key = strings::dup(key), - value = strings::dup(value), - private = private, - optional = optional, - }); + switch (r) { + case '"' => + break; + case '\\' => + scan_backslash(out, iter)?; + case => + strio::appendrune(out, r)!; + }; + }; +}; + +fn scan_backslash(out: io::handle, iter: *strings::iterator) (void | invalid) = { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => + return invalid; + }; + strio::appendrune(out, r)!; +}; + +fn scan_single(out: io::handle, iter: *strings::iterator) (void | invalid) = { + for (true) { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => + return invalid; + }; + + if (r == '\'') { + break; + }; + strio::appendrune(out, r)!; }; - return query; }; // Frees resources associated with this query. @@ -103,6 +230,14 @@ export fn finish(q: *query) void = { ("foo", "", false, true), ("bar", "baz", true, false), ]), + (`user[email]=me@test.org 'user=password'!=hunter2`, [ + ("user[email]", "me@test.org", false, false), + ("user=password", "hunter2", true, false), + ]), + (`Êmail²=hunter@example.org 'Späced password'!=nothunter2`, [ + ("Êmail²", "hunter@example.org", false, false), + ("Späced password", "nothunter2", true, false), + ]), ]; for (let i = 0z; i < len(cases); i += 1) { @@ -116,8 +251,8 @@ export fn finish(q: *query) void = { assert(len(expected) == len(result.items)); for (let j = 0z; j < len(result.items); j += 1) { - const got = result.items[i]; - const expect = &expected[i]; + const got = result.items[j]; + const expect = &expected[j]; assert(got.key == expect.0); assert(got.value == expect.1); assert(got.private == expect.2); @@ -125,3 +260,14 @@ export fn finish(q: *query) void = { }; }; }; + +@test fn query_parse_split() void = { + const result = parse_items([`Spaced password!=nothunter2`])!; + defer finish(&result); + + assert(len(result.items) == 1); + assert(result.items[0].key == "Spaced password"); + assert(result.items[0].value == "nothunter2"); + assert(result.items[0].private); + assert(!result.items[0].optional); +}; -- 2.36.1