Alexey Yerin: 1 himtisu::query: rewrite the parser 6 files changed, 345 insertions(+), 160 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~sircmpwn/himitsu-devel/patches/41478/mbox | git am -3Learn more about email & git
The old parser based on shlex::split had a few serious disadvantages: * "x!!"=y and "x!"!=y produced the same result because shlex::split would split both of them as ["x!!=y"]. Though the first one is a public pair "x!!"="y", while the second is private pair "x!"="y". The same applies for '?'. * Similar to the previous one, placing '=' inside a key is impossible because the parser can't distinguish between "a=b"=c and "a"="b=c" * Keys were for some reason limited to the regex ^[-_A-Za-z]+$, which disallowed any non-ASCII characters as well as special characters such as '[' or ']'. This limitation prevented a lot of website credentials (e.g. GitLab) from being stored in a form useful to himitsu-firefox. * Newlines could not be supported because the only way to get a newline into shlex is provide a literal newline character, which was not possible because himitsu's protocol is line-oriented. The new parser will no longer operate on slices of strings returned from shlex::split (this is still supported via parse_items), but rather on full strings. Word splitting will be done during parsing automatically. The parser operates a smaller version of shlex::split that, will unwrap any quotes and stop at unquoted '=', '!', '?' or unquoted whitespace when parsing the key, then will parse the value until unquoted whitespace. The backslash was also expanded to provide a literal newline on "\n". himitsu::query::quote is also provided for safe escaping of newlines. The old parser was also used differently in different places. With the new parser everything had to be unified: * himitsud will parse input from clients as a query * himitsu::client::next will automatically parse the query instead of returning a string * hiq will use himitsu::client instead of its custom client --- cmd/himitsud/cmd.ha | 70 ++++++----- cmd/hiq/main.ha | 104 ++++++---------- himitsu/client/client.ha | 7 +- himitsu/query/parse.ha | 249 ++++++++++++++++++++++++++++++++------- himitsu/query/unparse.ha | 65 +++++++++- secstore/serialize.ha | 10 +- 6 files changed, 345 insertions(+), 160 deletions(-) diff --git a/cmd/himitsud/cmd.ha b/cmd/himitsud/cmd.ha index 6bb9ec0..868052e 100644 --- a/cmd/himitsud/cmd.ha +++ b/cmd/himitsud/cmd.ha @@ -35,20 +35,30 @@ fn strerror(err: cmderror) const str = { fn exec(serv: *server, client: *client, cmd: str) (void | servererror) = { // TODO: Better logging of client activity - const args = match (shlex::split(cmd)) { - case shlex::syntaxerr => + const q = match (query::parse_string(cmd)) { + case let q: query::query => + yield q; + case => writefmt(client, "error Invalid command syntax"); return; - case let items: []str => - yield items; }; - defer strings::freeall(args); - if (len(args) == 0) { + defer query::finish(&q); + if (len(q.items) == 0) { writefmt(client, "error Invalid command syntax"); return; }; - const cmd = switch (args[0]) { + // FIXME: this is not very good + if (len(q.items[0].value) != 0 || q.items[0].private + || q.items[0].optional) { + writefmt(client, "error Unknown command"); + return; + }; + const cmd = q.items[0].key; + defer free(cmd); + delete(q.items[0]); + + const cmd = switch (cmd) { case "add" => yield &exec_add; case "del" => @@ -62,7 +72,7 @@ fn exec(serv: *server, client: *client, cmd: str) (void | servererror) = { return; }; - match (cmd(serv, client, args)) { + match (cmd(serv, client, &q)) { case let err: cmderror => // XXX: Probably a harec bug match (err) { @@ -79,7 +89,7 @@ fn exec(serv: *server, client: *client, cmd: str) (void | servererror) = { }; }; -fn exec_add(serv: *server, client: *client, args: []str) (void | cmderror) = { +fn exec_add(serv: *server, client: *client, q: *query::query) (void | cmderror) = { if (serv.store.state != secstore::state::UNLOCKED) { const prompter = prompt::newprompter(serv.conf.prompter[0], serv.conf.prompter[1..])?; @@ -91,10 +101,8 @@ fn exec_add(serv: *server, client: *client, args: []str) (void | cmderror) = { prompt::close(&prompter)?; }; - const q = query::parse_items(args[1..])?; - defer query::finish(&q); // TODO: Prompt user to fill in incomplete keys - let entry = secstore::add(serv.store, &q)!; + let entry = secstore::add(serv.store, q)!; let buf = bufio::dynamic(io::mode::WRITE); fmt::fprint(&buf, "key ")?; secstore::write(serv.store, &buf, entry, false)?; @@ -102,10 +110,7 @@ fn exec_add(serv: *server, client: *client, args: []str) (void | cmderror) = { write(client, bufio::buffer(&buf)); }; -fn exec_del(serv: *server, client: *client, args: []str) (void | cmderror) = { - const q = query::parse_items(args[1..])?; - defer query::finish(&q); - +fn exec_del(serv: *server, client: *client, q: *query::query) (void | cmderror) = { let buf = bufio::dynamic(io::mode::WRITE); let prompter: (prompt::prompter | void) = void; @@ -119,7 +124,7 @@ fn exec_del(serv: *server, client: *client, args: []str) (void | cmderror) = { }; }; - const iter = secstore::query(serv.store, &q); + const iter = secstore::query(serv.store, q); let matches: []*secstore::entry = []; for (true) { const item = match (secstore::next(serv.store, &iter)) { @@ -151,7 +156,7 @@ fn exec_del(serv: *server, client: *client, args: []str) (void | cmderror) = { return; }; - secstore::del(serv.store, &q)!; + secstore::del(serv.store, q)!; } else { match (prompter) { case let p: prompt::prompter => @@ -164,21 +169,15 @@ fn exec_del(serv: *server, client: *client, args: []str) (void | cmderror) = { writebuf(client, bufio::buffer(&buf)); }; -fn exec_query(serv: *server, client: *client, args: []str) (void | cmderror) = { - const cmd = getopt::parse(args, - "query the key store", - ('d', "decrypt private keys"), - "query..." - ); - defer getopt::finish(&cmd); - +fn exec_query(serv: *server, client: *client, q: *query::query) (void | cmderror) = { let decrypt = false; - for (let i = 0z; i < len(cmd.opts); i += 1) { - switch (cmd.opts[i].0) { - case 'd' => - decrypt = true; - case => abort(); - }; + // FIXME: this is not very good + if (len(q.items) > 0 && q.items[0].key == "-d" + && len(q.items[0].value) == 0 && !q.items[0].private + && !q.items[0].optional) { + free(q.items[0].key); + delete(q.items[0]); + decrypt = true; }; let prompter: (prompt::prompter | void) = void; @@ -196,10 +195,7 @@ fn exec_query(serv: *server, client: *client, args: []str) (void | cmderror) = { prompter = new; }; - const q = query::parse_items(cmd.args)?; - defer query::finish(&q); - - const iter = secstore::query(serv.store, &q); + const iter = secstore::query(serv.store, q); let matches: []*secstore::entry = []; for (true) { const item = match (secstore::next(serv.store, &iter)) { @@ -249,7 +245,7 @@ fn exec_query(serv: *server, client: *client, args: []str) (void | cmderror) = { writebuf(client, bufio::buffer(&buf)); }; -fn exec_quit(serv: *server, client: *client, args: []str) (void | cmderror) = { +fn exec_quit(serv: *server, client: *client, q: *query::query) (void | cmderror) = { if (!serv.daemonized) { writefmt(client, "error Server is not damonized, use a service manager"); return; diff --git a/cmd/hiq/main.ha b/cmd/hiq/main.ha index 57e8f6d..cff4f8a 100644 --- a/cmd/hiq/main.ha +++ b/cmd/hiq/main.ha @@ -22,18 +22,15 @@ type flag = enum uint { fn write_item( out: io::handle, - items: str, + q: *query::query, field: str ) (void | io::error) = { if (field == "") { - fmt::fprintln(out, items)!; + query::unparse(out, q)?; return; }; - let items = shlex::split(items)!; - defer strings::freeall(items); - let query = query::parse_items(items)!; - for (let i = 0z; i < len(query.items); i += 1) { - let item = query.items[i]; + for (let i = 0z; i < len(q.items); i += 1) { + let item = q.items[i]; if (item.key != field) { continue; }; @@ -74,16 +71,11 @@ export fn main() void = { }; }; - let buf = path::init()!; - // TODO: Bubble up dirs::runtime errors - const sockpath = path::set(&buf, dirs::runtime()!, "himitsu")!; - let conn = match (unix::connect(sockpath)) { - case let s: net::socket => - yield s; - case errors::noentry => - fmt::fatal("error: himitsud connection failed (is it running?)"); - case let e: net::error => - fmt::fatal("error:", net::strerror(e)); + const conn = match (client::connect()) { + case let socket: net::socket => + yield socket; + case let err: client::error => + fmt::fatal("Error:", client::strerror(err)); }; defer io::close(conn)!; @@ -98,25 +90,28 @@ export fn main() void = { // NB. Can't defer free(line), causes a // use-after-free in fmt::fatal const line = strings::fromutf8(line)!; - const query = match (shlex::split(line)) { - case let q: []str => + const q = match (query::parse_string(line)) { + case let q: query::query => yield q; case => fmt::fatal("Invalid query:", line); }; - defer strings::freeall(query); + defer query::finish(&q); free(line); - send(conn, client::operation::ADD, flags, field, query); + send(conn, client::operation::ADD, flags, field, + &q); }; }; } else { - let query = alloc(cmd.args...); - defer free(query); - if (flags & flag::DECRYPT != 0) { - insert(query[0], "-d"); + const q = match (query::parse_items(cmd.args)) { + case let q: query::query => + yield q; + case => + fmt::fatal("Invalid query"); // TODO }; - send(conn, op, flags, field, query); + defer query::finish(&q); + send(conn, op, flags, field, &q); }; }; @@ -125,50 +120,25 @@ fn send( op: client::operation, flags: flag, field: str, - query: []str, + q: *query::query, ) void = { - fmt::fprint(conn, switch (op) { - case client::operation::QUERY => - yield "query"; - case client::operation::ADD => - yield "add"; - case client::operation::DEL => - yield "del"; - case client::operation::QUIT => - yield "quit"; - })!; - for (let i = 0z; i < len(query); i += 1) { - fmt::fprint(conn, " ")!; - shlex::quote(conn, query[i])!; + // TODO: error handling + const client_flags: client::flags = if (flags & flag::DECRYPT != 0) { + yield client::flags::DECRYPT; + } else { + yield 0; }; - fmt::fprintln(conn)!; - - let buf = strio::dynamic(); - defer io::close(&buf)!; + const iter = client::query(conn, op, q, client_flags)!; - for (let n = 0; true; n += 1) match (bufio::scanline(conn)!) { - case io::EOF => break; - case let line: []u8 => - // NB. Can't defer free(line), causes a use-after-free in - // fmt::fatal - let resp = strings::fromutf8(line)!; - let resp = strings::cut(resp, " "); - switch (resp.0) { - case "key" => - if (flags & flag::ONE != 0 && n > 0) { - fmt::fatal("error: Ambiguous match"); - }; - write_item(&buf, resp.1, field)!; - case "error" => - fmt::fatal("error:", resp.1); - case "end" => - free(line); - break; - case => - break; + for (let n = 0; true; n += 1) match (client::next(&iter)) { + case let resp: query::query => + if (flags & flag::ONE != 0 && n > 0) { + fmt::fatal("error: Ambiguous match"); }; - free(line); + write_item(os::stdout, &resp, field)!; + case let err: client::error => + fmt::fatal("Error:", client::strerror(err)); + case void => + break; }; - - fmt::print(strio::string(&buf))!; }; diff --git a/himitsu/client/client.ha b/himitsu/client/client.ha index 12fbe4a..12e5f9e 100644 --- a/himitsu/client/client.ha +++ b/himitsu/client/client.ha @@ -81,7 +81,7 @@ export fn query( fmt::fprint(&buf, "quit ")!; }; - if (flags & flags::DECRYPT != 0) { + if (op == operation::QUERY && flags & flags::DECRYPT != 0) { fmt::fprint(&buf, "-d ")!; }; @@ -97,7 +97,7 @@ export fn query( // Returns the next key from a key iterator, or void if no further keys are // provided. -export fn next(iter: *keyiter) (const str | void | error) = { +export fn next(iter: *keyiter) (query::query | void | error) = { strio::reset(&iter.buf); match (bufio::scanline(iter.conn)?) { case let buf: []u8 => @@ -117,7 +117,8 @@ export fn next(iter: *keyiter) (const str | void | error) = { return strings::sub(string, 6, strings::end): hierror: error; }; if (strings::hasprefix(string, "key ")) { - return strings::sub(string, 4, strings::end); + return query::parse_string( + strings::sub(string, 4, strings::end))!; }; abort("himitsu returned unexpected response"); }; diff --git a/himitsu/query/parse.ha b/himitsu/query/parse.ha index 0bfec8c..ea11c2e 100644 --- a/himitsu/query/parse.ha +++ b/himitsu/query/parse.ha @@ -1,20 +1,9 @@ use bufio; use encoding::utf8; +use fmt; use io; -use regex; -use shlex; use strings; -use fmt; - -let keyre: regex::regex = regex::regex { ... }; - -@init fn init() void = { - keyre = regex::compile(`^[-_A-Za-z]+$`)!; -}; - -@fini fn fini() void = { - regex::finish(&keyre); -}; +use strio; // A parsed Himitsu query. export type query = struct { @@ -36,51 +25,198 @@ export type invalid = !void; // return value to [[finish]] when they are done with it. export fn parse(in: io::handle) (query | invalid | io::error) = { const data = io::drain(in)?; - const data = match (strings::fromutf8(data)) { + defer free(data); + match (strings::fromutf8(data)) { case let data: str => - yield data; + return parse_string(data); case utf8::invalid => return invalid; }; +}; - const items = match (shlex::split(data)) { - case let items: []str => - yield items; - case shlex::syntaxerr => - return invalid; +// Parses a query, returning its key/value pairs. The caller must pass the +// return value to [[finish]] when they are done with it. +export fn parse_string(in: str) (query | invalid) = { + let query = query { ... }; + let iter = strings::iter(in); + for (true) { + match (parse_item(&iter, &query, false)?) { + case void => + yield; + case io::EOF => + break; + }; }; - defer strings::freeall(items); - return parse_items(items); + return query; }; // Parses a list of key/value pairs which has already been split with shlex (or // a shell, for example when parsing a query from argv). export fn parse_items(items: []str) (query | invalid) = { - // XXX: Should do something about the case where the user specifies both - // ? and ! let query = query { ... }; for (let i = 0z; i < len(items); i += 1) { - const (key, value) = strings::cut(items[i], "="); - let optional = false, private = false; - if (strings::hassuffix(key, "!")) { - private = true; + const item = strings::trim(items[i]); + if (len(item) == 0) { + continue; }; - if (strings::hassuffix(key, "?")) { + let iter = strings::iter(item); + parse_item(&iter, &query, true)? as void; + }; + return query; +}; + +fn parse_item( + iter: *strings::iterator, + q: *query, + split: bool, +) (void | io::EOF | invalid) = { + // XXX: Should do something about the case where the user specifies both + // ? and ! + const key = match (parse_value(iter, true, split)?) { + case let s: str => + yield s; + case io::EOF => + return io::EOF; + }; + let optional = false, private = false; + match (strings::next(iter)) { + case let r: rune => + switch (r) { + case '!' => + private = true; + case '?' => optional = true; + case => + strings::prev(iter); + }; + case void => + yield; + }; + const value = match (strings::next(iter)) { + case let r: rune => + yield if (r == '=') { + yield parse_value(iter, false, split)? as str; + } else { + strings::prev(iter); + yield ""; + }; + case void => + yield ""; + }; + + append(q.items, pair { + key = key, + value = value, + private = private, + optional = optional, + }); +}; + +fn parse_value( + iter: *strings::iterator, + key: bool, + split: bool, +) (str | io::EOF | invalid) = { + if (!split) for (true) match (strings::next(iter)) { + case let r: rune => + if (r != ' ' && r != '\t') { + strings::prev(iter); + break; + }; + case => + break; + }; + + const buf = strio::dynamic(); + defer io::close(&buf)!; + for (true) { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => + if (key && len(strio::string(&buf)) == 0) { + return io::EOF; + }; + break; }; - key = strings::trim(key, '?', '!'); - if (!regex::test(&keyre, key)) { + switch (r) { + case '!', '?', '=' => + if (key) { + strings::prev(iter); + break; + } else { + return invalid; + }; + case ' ', '\t' => + if (split) { + strio::appendrune(&buf, r)!; + } else { + break; + }; + case '\\' => + scan_backslash(&buf, iter)?; + case '"' => + scan_double(&buf, iter)?; + case '\'' => + scan_single(&buf, iter)?; + case => + strio::appendrune(&buf, r)!; + }; + }; + if (key && len(strio::string(&buf)) == 0) { + return io::EOF; + }; + return strings::dup(strio::string(&buf)); +}; + +fn scan_double(out: io::handle, iter: *strings::iterator) (void | invalid) = { + for (true) { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => return invalid; }; - append(query.items, pair { - key = strings::dup(key), - value = strings::dup(value), - private = private, - optional = optional, - }); + switch (r) { + case '"' => + break; + case '\\' => + scan_backslash(out, iter)?; + case => + strio::appendrune(out, r)!; + }; + }; +}; + +fn scan_backslash(out: io::handle, iter: *strings::iterator) (void | invalid) = { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => + return invalid; + }; + if (r == 'n') { + strio::appendrune(out, '\n')!; + } else { + strio::appendrune(out, r)!; + }; +}; + +fn scan_single(out: io::handle, iter: *strings::iterator) (void | invalid) = { + for (true) { + const r = match (strings::next(iter)) { + case let r: rune => + yield r; + case void => + return invalid; + }; + + if (r == '\'') { + break; + }; + strio::appendrune(out, r)!; }; - return query; }; // Frees resources associated with this query. @@ -102,21 +238,33 @@ export fn finish(q: *query) void = { ("foo", "", false, true), ("bar", "baz", true, false), ]), + (`query -d `, [ + ("query", "", false, false), + ("-d", "", false, false), + ]), + (`user[email]=me@test.org 'user=password'!=hunter2`, [ + ("user[email]", "me@test.org", false, false), + ("user=password", "hunter2", true, false), + ]), + (`user[email]=me@test.org 'user=password'!=hunter2`, [ + ("user[email]", "me@test.org", false, false), + ("user=password", "hunter2", true, false), + ]), + (`Êmail²=hunter@example.org 'Späced password'!=nothunter2`, [ + ("Êmail²", "hunter@example.org", false, false), + ("Späced password", "nothunter2", true, false), + ]), ]; for (let i = 0z; i < len(cases); i += 1) { const expected = cases[i].1; - const input = cases[i].0; - const input = bufio::fixed(strings::toutf8(input), - io::mode::READ); - - const result = parse(&input)!; + const result = parse_string(cases[i].0)!; defer finish(&result); assert(len(expected) == len(result.items)); for (let j = 0z; j < len(result.items); j += 1) { - const got = result.items[i]; - const expect = &expected[i]; + const got = result.items[j]; + const expect = &expected[j]; assert(got.key == expect.0); assert(got.value == expect.1); assert(got.private == expect.2); @@ -124,3 +272,14 @@ export fn finish(q: *query) void = { }; }; }; + +@test fn query_parse_split() void = { + const result = parse_items([`Spaced password!=nothunter2`])!; + defer finish(&result); + + assert(len(result.items) == 1); + assert(result.items[0].key == "Spaced password"); + assert(result.items[0].value == "nothunter2"); + assert(result.items[0].private); + assert(!result.items[0].optional); +}; diff --git a/himitsu/query/unparse.ha b/himitsu/query/unparse.ha index d0a8c74..05167e5 100644 --- a/himitsu/query/unparse.ha +++ b/himitsu/query/unparse.ha @@ -1,6 +1,9 @@ +use ascii; +use encoding::utf8; use fmt; use io; -use shlex; +use strings; +use strio; // Converts a Himitsu query into a string, including a newline, and writes it to // the given I/O handle. @@ -9,7 +12,7 @@ export fn unparse(sink: io::handle, q: *query) (size | io::error) = { for (let i = 0z; i < len(q.items); i += 1) { const pair = &q.items[i]; - z += fmt::fprintf(sink, "{}", pair.key)?; + z += quote(sink, pair.key)?; if (pair.private) { z += fmt::fprintf(sink, "!")?; @@ -19,7 +22,7 @@ export fn unparse(sink: io::handle, q: *query) (size | io::error) = { }; if (pair.value != "") { z += fmt::fprintf(sink, "=")?; - z += shlex::quote(sink, pair.value)?; + z += quote(sink, pair.value)?; }; if (i + 1 < len(q.items)) { @@ -29,3 +32,59 @@ export fn unparse(sink: io::handle, q: *query) (size | io::error) = { z += fmt::fprintln(sink)?; return z; }; + +// Quotes a string for [[parse]] and writes it to the provided [[io::handle]]. +export fn quote(sink: io::handle, s: str) (size | io::error) = { + if (len(s) == 0) { + return io::writeall(sink, strings::toutf8(`''`))?; + }; + if (is_safe(s)) { + return io::writeall(sink, strings::toutf8(s))?; + }; + + let z = io::writeall(sink, ['\''])?; + + const iter = strings::iter(s); + for (true) { + const rn = match (strings::next(&iter)) { + case let rn: rune => + yield rn; + case void => + break; + }; + + if (rn == '\'') { + z += io::writeall(sink, strings::toutf8(`'"'"'`))?; + } else if (rn == '\n') { + z += io::writeall(sink, strings::toutf8(`'"\n"'`))?; + } else { + z += io::writeall(sink, utf8::encoderune(rn))?; + }; + }; + + z += io::writeall(sink, ['\''])?; + return z; +}; + +fn is_safe(s: str) bool = { + const iter = strings::iter(s); + for (true) { + const rn = match (strings::next(&iter)) { + case let rn: rune => + yield rn; + case void => + break; + }; + + switch (rn) { + case '@', '%', '+', '=', ':', ',', '.', '/', '-' => + void; + case => + if (!ascii::isalnum(rn) || ascii::isspace(rn) + || rn == '\n') { + return false; + }; + }; + }; + return true; +}; diff --git a/secstore/serialize.ha b/secstore/serialize.ha index 99e6fa7..a9cc4b6 100644 --- a/secstore/serialize.ha +++ b/secstore/serialize.ha @@ -1,14 +1,14 @@ use bytes; -use crypto; use crypto::keystore; +use crypto; use encoding::base64; use errors; use fmt; use fs; +use himitsu::query; use io; use os; use path; -use shlex; use strio; use uuid; @@ -28,11 +28,11 @@ export fn write( // TODO: https://todo.sr.ht/~sircmpwn/hare/619 for (let i = 0z; i < len(ent.pairs); i += 1) { const pair = &ent.pairs[i]; - shlex::quote(sink, pair.key)?; + query::quote(sink, pair.key)?; match (pair.value) { case let val: str => fmt::fprint(sink, "=")?; - shlex::quote(sink, val)?; + query::quote(sink, val)?; case let u: uuid::uuid => fmt::fprint(sink, "!")?; if (private) { @@ -42,7 +42,7 @@ export fn write( let buf = strio::dynamic(); defer io::close(&buf)!; write_private(store, &buf, u)?; - shlex::quote(sink, strio::string(&buf))?; + query::quote(sink, strio::string(&buf))?; }; }; if (i + 1 < len(ent.pairs)) { -- 2.40.1