Haelwenn (lanodan) Monnier: 3 net/uri: Acknowledge the different allowed characters net/uri: Fix decoding multi-byte percent-data net/uri/+test: Use wanted/got wrapping for all str assertions 8 files changed, 99 insertions(+), 27 deletions(-)
hare/patches: SUCCESS in 1m45s [net/uri: Acknowledge the different allowed characters][0] from [Haelwenn (lanodan) Monnier][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/37390 [1]: mailto:contact+sr.ht@hacktivis.me ✓ #900137 SUCCESS hare/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/900137 ✓ #900138 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/900138
Thanks! To git@git.sr.ht:~sircmpwn/hare abaf8ec9..5b0f6d2d master -> master
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/37390/mbox | git am -3Learn more about email & git
- path wasn't percent-encoded - query and fragment had allowed characters being percent-encoded Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me> --- net/uri/+test.ha | 14 ++++++++++++-- net/uri/fmt.ha | 35 +++++++++++++++++++++++++++-------- net/uri/query.ha | 13 +++++++++---- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/net/uri/+test.ha b/net/uri/+test.ha index d33ec324..ac024fc8 100644 --- a/net/uri/+test.ha +++ b/net/uri/+test.ha @@ -68,7 +68,17 @@ use net::ip; ... }, )!; - + test_uri_roundtrip( + "https://sr.ht/projects?search=%23risc-v&sort=longest-active#foo", + uri { + scheme = "https", + host = "sr.ht", + path = "/projects", + query = "search=%23risc-v&sort=longest-active", + fragment = "foo", + ... + }, + )!; }; @test fn invalid() void = { @@ -101,7 +111,7 @@ use net::ip; query = "objectClass?one", ... }, - "ldap://[2001:db8::7]/c=GB?objectClass%3Fone", + "ldap://[2001:db8::7]/c=GB?objectClass?one", )!; // https://bugs.chromium.org/p/chromium/issues/detail?id=841105 diff --git a/net/uri/fmt.ha b/net/uri/fmt.ha index c5397872..7eea2d4b 100644 --- a/net/uri/fmt.ha +++ b/net/uri/fmt.ha @@ -7,6 +7,21 @@ use strconv; use strings; use strio; + +// Extract from RFC3986 ABNF +// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" +// reg-name = *( unreserved / pct-encoded / sub-delims ) +// host = IP-literal / IPv4address / reg-name +// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +// query = *( pchar / "/" / "?" ) +// fragment = *( pchar / "/" / "?" ) + +def unres_host: str = "-._~!$&'()*+,;="; +def unres_query_frag: str = "-._~!$&'()*+,;=:@/?"; +def unres_path: str = "-._~!$&'()*+,;=:@/"; + // Writes a formatted [[uri]] to an [[io::handle]]. Returns the number of bytes // written. export fn fmt(out: io::handle, u: *const uri) (size | io::error) = { @@ -25,7 +40,12 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = { if (!slashes_w) { n += fmt::fprint(out, "//")?; }; - n += percent_encode(out, host)?; + let unres = if(u.scheme == "file") { + yield unres_path; + } else { + yield unres_host; + }; + n += percent_encode(out, host, unres)?; }; case let addr: ip::addr => if (!slashes_w) { @@ -36,14 +56,14 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = { if (u.port != 0) { n += fmt::fprintf(out, ":{}", u.port)?; }; - n += fmt::fprint(out, u.path)?; + n += percent_encode(out, u.path, unres_path)?; if (len(u.query) > 0) { - n += fmt::fprint(out, "?")?; - n += percent_encode(out, u.query)?; + // Always percent-encoded, see parse and encodequery/decodequery + n += fmt::fprintf(out, "?{}", u.query)?; }; if (len(u.fragment) > 0) { n += fmt::fprint(out, "#")?; - n += percent_encode(out, u.fragment)?; + n += percent_encode(out, u.fragment, unres_query_frag)?; }; return n; @@ -62,7 +82,7 @@ fn fmtaddr(out: io::handle, addr: ip::addr) (size | io::error) = { return n; }; -fn percent_encode(out: io::handle, src: str) (size | io::error) = { +fn percent_encode(out: io::handle, src: str, allowed: str) (size | io::error) = { let iter = strings::iter(src); let n = 0z; for (true) { @@ -72,8 +92,7 @@ fn percent_encode(out: io::handle, src: str) (size | io::error) = { case => break; }; - // unreserved - if (ascii::isalnum(r) || strings::contains("-._~", r)) { + if (ascii::isalnum(r) || strings::contains(allowed, r)) { n += fmt::fprint(out, r)?; } else { const en = utf8::encoderune(r); diff --git a/net/uri/query.ha b/net/uri/query.ha index 774801a3..8c9dd0e4 100644 --- a/net/uri/query.ha +++ b/net/uri/query.ha @@ -50,10 +50,10 @@ export fn encodequery(pairs: [](str, str)) str = { if (i > 0) strio::appendrune(&buf, '&')!; assert(len(pair.0) > 0); - percent_encode(&buf, pair.0)!; + percent_encode(&buf, pair.0, unres_query_frag)!; if (len(pair.1) > 0) { strio::appendrune(&buf, '=')!; - percent_encode(&buf, pair.1)!; + percent_encode(&buf, pair.1, unres_query_frag)!; }; }; @@ -61,7 +61,7 @@ export fn encodequery(pairs: [](str, str)) str = { }; @test fn decodequery() void = { - const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active")!; + const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active&quantity=100%25")!; defer finish(&u); const query = decodequery(u.query); @@ -73,15 +73,20 @@ export fn encodequery(pairs: [](str, str)) str = { const pair = query_next(&query)! as (str, str); assert(pair.0 == "sort"); assert(pair.1 == "longest-active"); + + const pair = query_next(&query)! as (str, str); + assert(pair.0 == "quantity"); + assert(pair.1 == "100%"); }; @test fn encodequery() void = { const pairs = [ ("search", "#risc-v"), ("sort", "longest-active"), + ("quantity", "100%") ]; const encoded = encodequery(pairs); defer free(encoded); - assert(encoded == "search=%23risc-v&sort=longest-active"); + assert(encoded == "search=%23risc-v&sort=longest-active&quantity=100%25"); }; -- 2.37.4
Technically it can be non-UTF-8 but hare heavily assumes UTF-8. Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me> --- net/uri/+test.ha | 10 ++++++++++ net/uri/parse.ha | 26 +++++++++++++++++++++++++- scripts/gen-stdlib | 2 +- stdlib.mk | 4 ++-- 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/net/uri/+test.ha b/net/uri/+test.ha index ac024fc8..4b99c7b0 100644 --- a/net/uri/+test.ha +++ b/net/uri/+test.ha @@ -79,6 +79,16 @@ use net::ip; ... }, )!; + test_uri_roundtrip( + "https://en.wiktionary.org/wiki/%E3%81%8A%E3%81%AF%E3%82%88%E3%81%86#Japanese", + uri { + scheme = "https", + host = "en.wiktionary.org", + path = "/wiki/おはよう", + fragment = "Japanese", + ... + } + )!; }; @test fn invalid() void = { diff --git a/net/uri/parse.ha b/net/uri/parse.ha index e45f70ff..54eac1d8 100644 --- a/net/uri/parse.ha +++ b/net/uri/parse.ha @@ -2,6 +2,7 @@ // (c) 2022 Alexey Yerin <yyp@disroot.org> // (c) 2022 Umar Getagazov <umar@handlerug.me> use ascii; +use encoding::utf8; use io; use net::ip; use strconv; @@ -329,6 +330,7 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = { let iter = strings::iter(s); let tmp = strio::dynamic(); defer io::close(&tmp)!; + let percent_data: []u8 = []; for (true) { match (strings::next(&iter)) { case let r: rune => @@ -342,14 +344,36 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = { match (strconv::stou8b(strio::string(&tmp), strconv::base::HEX)) { case let ord: u8 => - strio::appendrune(out, ord: u32: rune)!; + append(percent_data, ord); case => return invalid; }; } else { + if(len(percent_data) > 0) { + match(strings::fromutf8(percent_data)) { + case let stro: str => + strio::concat(out, stro)!; + case utf8::invalid => + return invalid; + }; + + percent_data = []; + }; + strio::appendrune(out, r)!; }; case void => + if(len(percent_data) > 0) { + match(strings::fromutf8(percent_data)) { + case let stro: str => + strio::concat(out, stro)!; + case utf8::invalid => + return invalid; + }; + + percent_data = []; + }; + break; }; }; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib index d2f72a3b..8724fdf4 100755 --- a/scripts/gen-stdlib +++ b/scripts/gen-stdlib @@ -1076,7 +1076,7 @@ net_uri() { +test.ha fi gen_ssa net::uri \ - ascii ip net::ip strconv strings strio + ascii encoding::utf8 ip net::ip strconv strings strio } gensrcs_math_complex() { diff --git a/stdlib.mk b/stdlib.mk index 23e2b4dc..80598d44 100644 --- a/stdlib.mk +++ b/stdlib.mk @@ -1734,7 +1734,7 @@ stdlib_net_uri_any_srcs = \ $(STDLIB)/net/uri/query.ha \ $(STDLIB)/net/uri/uri.ha -$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) +$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/net/uri @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nnet::uri \ @@ -3964,7 +3964,7 @@ testlib_net_uri_any_srcs = \ $(STDLIB)/net/uri/uri.ha \ $(STDLIB)/net/uri/+test.ha -$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) +$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/net/uri @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nnet::uri \ -- 2.37.4
Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me> --- net/uri/+test.ha | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/uri/+test.ha b/net/uri/+test.ha index 4b99c7b0..71c3fd3f 100644 --- a/net/uri/+test.ha +++ b/net/uri/+test.ha @@ -145,25 +145,29 @@ fn test_uri(in: str, expected_uri: uri, expected_str: str) (void | invalid) = { const u = parse(in)?; defer finish(&u); - assert(u.scheme == expected_uri.scheme); + assert_str(u.scheme, expected_uri.scheme); match (u.host) { case let s: str => - assert(s == expected_uri.host as str); + assert_str(s, expected_uri.host as str); case let i: ip::addr => assert(ip::equal(i, expected_uri.host as ip::addr)); }; assert(u.port == expected_uri.port); - assert(u.userinfo == expected_uri.userinfo); - assert(u.path == expected_uri.path); - assert(u.query == expected_uri.query); - assert(u.fragment == expected_uri.fragment); + assert_str(u.userinfo, expected_uri.userinfo); + assert_str(u.path, expected_uri.path); + assert_str(u.query, expected_uri.query); + assert_str(u.fragment, expected_uri.fragment); const s = string(&u); defer free(s); - if (s != expected_str) { - fmt::errorfln("=== wanted\n{}", expected_str)!; - fmt::errorfln("=== got\n{}", s)!; + assert_str(s, expected_str); +}; + +fn assert_str(got: str, expected: str) void = { + if(got != expected) {
small style nit: space between "if" and "(". -- Thomas
+ fmt::errorfln("=== wanted\n{}", expected)!; + fmt::errorfln("=== got\n{}", got)!; abort(); }; }; -- 2.37.4
builds.sr.ht <builds@sr.ht>hare/patches: SUCCESS in 1m45s [net/uri: Acknowledge the different allowed characters][0] from [Haelwenn (lanodan) Monnier][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/37390 [1]: mailto:contact+sr.ht@hacktivis.me ✓ #900137 SUCCESS hare/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/900137 ✓ #900138 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/900138
Thanks! To git@git.sr.ht:~sircmpwn/hare abaf8ec9..5b0f6d2d master -> master