~sircmpwn/himitsu-devel

himitsu: himtisu::query: rewrite parser to mitigate key name restrictions v1 PROPOSED

Alexey Yerin: 1
 himtisu::query: rewrite parser to mitigate key name restrictions

 1 files changed, 185 insertions(+), 39 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/himitsu-devel/patches/33046/mbox | git am -3
Learn more about email & git

[RFC PATCH himitsu] himtisu::query: rewrite parser to mitigate key name restrictions Export this patch

It allows non-ASCII characters, '[]', '()', etc in keys.
TODO: fix up key serialization if they have a '!' in them.
---
 himitsu/query/parse.ha | 224 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 185 insertions(+), 39 deletions(-)

diff --git a/himitsu/query/parse.ha b/himitsu/query/parse.ha
index a936042..fe1ec23 100644
--- a/himitsu/query/parse.ha
+++ b/himitsu/query/parse.ha
@@ -1,21 +1,9 @@
use bufio;
use encoding::utf8;
use fmt;
use io;
use regex;
use shlex;
use strings;
use fmt;

let keyre: regex::regex = regex::regex { ... };

@init fn init() void = {
	// Yes, this is a valid POSIX regular expression
	keyre = regex::compile(`^[]A-Za-z-_[]+$`)!;
};

@fini fn fini() void = {
	regex::finish(&keyre);
};
use strio;

// A parsed Himitsu query.
export type query = struct {
@@ -37,6 +25,7 @@ export type invalid = !void;
// return value to [[finish]] when they are done with it.
export fn parse(in: io::handle) (query | invalid | io::error) = {
	const data = io::drain(in)?;
	defer free(data);
	const data = match (strings::try_fromutf8(data)) {
	case let data: str =>
		yield data;
@@ -44,44 +33,182 @@ export fn parse(in: io::handle) (query | invalid | io::error) = {
		return invalid;
	};

	const items = match (shlex::split(data)) {
	case let items: []str =>
		yield items;
	case shlex::syntaxerr =>
		return invalid;
	let query = query { ... };
	let iter = strings::iter(data);
	for (true) {
		match (parse_item(&iter, &query, false)?) {
		case void =>
			yield;
		case io::EOF =>
			break;
		};
	};
	defer strings::freeall(items);
	return parse_items(items);
	return query;
};

// Parses a list of key/value pairs which has already been split with shlex (or
// a shell, for example when parsing a query from argv).
export fn parse_items(items: []str) (query | invalid) = {
	// XXX: Should do something about the case where the user specifies both
	// ? and !
	let query = query { ... };
	for (let i = 0z; i < len(items); i += 1) {
		const (key, value) = strings::cut(items[i], "=");
		let optional = false, private = false;
		if (strings::hassuffix(key, "!")) {
			private = true;
		const item = strings::trim(items[i]);
		if (len(item) == 0) {
			continue;
		};
		if (strings::hassuffix(key, "?")) {
		let iter = strings::iter(item);
		parse_item(&iter, &query, true)? as void;
	};
	return query;
};

fn parse_item(
	iter: *strings::iterator,
	q: *query,
	split: bool,
) (void | io::EOF | invalid) = {
	// XXX: Should do something about the case where the user specifies both
	// ? and !
	const key = match (parse_value(iter, true, split)?) {
	case let s: str =>
		yield s;
	case io::EOF =>
		return io::EOF;
	};
	let optional = false, private = false;
	match (strings::next(iter)) {
	case let r: rune =>
		switch (r) {
		case '!' =>
			private = true;
		case '?' =>
			optional = true;
		case =>
			strings::prev(iter);
		};
	case void =>
		yield;
	};
	const value = match (strings::next(iter)) {
	case let r: rune =>
		yield if (r == '=') {
			yield parse_value(iter, false, split)? as str;
		} else if (!split && r == ' ' || r == '\t') {
			strings::prev(iter);
			yield "";
		} else {
			return invalid;
		};
	case void =>
		yield "";
	};

	append(q.items, pair {
		key = key,
		value = value,
		private = private,
		optional = optional,
	});
};

// Minimal version of shlex::split
fn parse_value(
	iter: *strings::iterator,
	key: bool,
	split: bool,
) (str | io::EOF | invalid) = {
	const buf = strio::dynamic();
	defer io::close(&buf)!;
	for (true) {
		const r = match (strings::next(iter)) {
		case let r: rune =>
			yield r;
		case void =>
			if (key && len(strio::string(&buf)) == 0) {
				return io::EOF;
			};
			break;
		};
		if (key && r == '!' || r == '?' || r == '=') {
			strings::prev(iter);
			break;
		};
		key = strings::trim(key, '?', '!');
		if (!regex::test(&keyre, key)) {

		switch (r) {
		case ' ', '\t' =>
			if (split) {
				strio::appendrune(&buf, r)!;
			} else if (key) {
				for (true) match (strings::next(iter)) {
				case let r: rune =>
					if (r != ' ' && r != '\t') {
						strings::prev(iter);
						break;
					};
				case void =>
					break;
				};
				continue;
			} else {
				strings::prev(iter);
				break;
			};
		case '\\' =>
			scan_backslash(&buf, iter)?;
		case '"' =>
			scan_double(&buf, iter)?;
		case '\'' =>
			scan_single(&buf, iter)?;
		case =>
			strio::appendrune(&buf, r)!;
		};
	};
	return strings::dup(strio::string(&buf));
};

fn scan_double(out: io::handle, iter: *strings::iterator) (void | invalid) = {
	for (true) {
		const r = match (strings::next(iter)) {
		case let r: rune =>
			yield r;
		case void =>
			return invalid;
		};

		append(query.items, pair {
			key = strings::dup(key),
			value = strings::dup(value),
			private = private,
			optional = optional,
		});
		switch (r) {
		case '"' =>
			break;
		case '\\' =>
			scan_backslash(out, iter)?;
		case =>
			strio::appendrune(out, r)!;
		};
	};
};

fn scan_backslash(out: io::handle, iter: *strings::iterator) (void | invalid) = {
	const r = match (strings::next(iter)) {
	case let r: rune =>
		yield r;
	case void =>
		return invalid;
	};
	strio::appendrune(out, r)!;
};

fn scan_single(out: io::handle, iter: *strings::iterator) (void | invalid) = {
	for (true) {
		const r = match (strings::next(iter)) {
		case let r: rune =>
			yield r;
		case void =>
			return invalid;
		};

		if (r == '\'') {
			break;
		};
		strio::appendrune(out, r)!;
	};
	return query;
};

// Frees resources associated with this query.
@@ -103,6 +230,14 @@ export fn finish(q: *query) void = {
			("foo", "", false, true),
			("bar", "baz", true, false),
		]),
		(`user[email]=me@test.org 'user=password'!=hunter2`, [
			("user[email]", "me@test.org", false, false),
			("user=password", "hunter2", true, false),
		]),
		(`Êmail²=hunter@example.org 'Späced password'!=nothunter2`, [
			("Êmail²", "hunter@example.org", false, false),
			("Späced password", "nothunter2", true, false),
		]),
	];

	for (let i = 0z; i < len(cases); i += 1) {
@@ -116,8 +251,8 @@ export fn finish(q: *query) void = {

		assert(len(expected) == len(result.items));
		for (let j = 0z; j < len(result.items); j += 1) {
			const got = result.items[i];
			const expect = &expected[i];
			const got = result.items[j];
			const expect = &expected[j];
			assert(got.key == expect.0);
			assert(got.value == expect.1);
			assert(got.private == expect.2);
@@ -125,3 +260,14 @@ export fn finish(q: *query) void = {
		};
	};
};

@test fn query_parse_split() void = {
	const result = parse_items([`Spaced password!=nothunter2`])!;
	defer finish(&result);

	assert(len(result.items) == 1);
	assert(result.items[0].key == "Spaced password");
	assert(result.items[0].value == "nothunter2");
	assert(result.items[0].private);
	assert(!result.items[0].optional);
};
-- 
2.36.1