- path wasn't percent-encoded
- query and fragment had allowed characters being percent-encoded
Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
---
net/uri/+test.ha | 14 ++++++++++++--
net/uri/fmt.ha | 35 +++++++++++++++++++++++++++--------
net/uri/query.ha | 13 +++++++++----
3 files changed, 48 insertions(+), 14 deletions(-)
diff --git a/net/uri/+test.ha b/net/uri/+test.ha
index d33ec324..ac024fc8 100644
--- a/net/uri/+test.ha
+++ b/net/uri/+test.ha
@@ -68,7 +68,17 @@ use net::ip;
...
},
)!;
-
+ test_uri_roundtrip(
+ "https://sr.ht/projects?search=%23risc-v&sort=longest-active#foo",
+ uri {
+ scheme = "https",
+ host = "sr.ht",
+ path = "/projects",
+ query = "search=%23risc-v&sort=longest-active",
+ fragment = "foo",
+ ...
+ },
+ )!;
};
@test fn invalid() void = {
@@ -101,7 +111,7 @@ use net::ip;
query = "objectClass?one",
...
},
- "ldap://[2001:db8::7]/c=GB?objectClass%3Fone",
+ "ldap://[2001:db8::7]/c=GB?objectClass?one",
)!;
// https://bugs.chromium.org/p/chromium/issues/detail?id=841105
diff --git a/net/uri/fmt.ha b/net/uri/fmt.ha
index c5397872..7eea2d4b 100644
--- a/net/uri/fmt.ha
+++ b/net/uri/fmt.ha
@@ -7,6 +7,21 @@ use strconv;
use strings;
use strio;
+
+// Extract from RFC3986 ABNF
+// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+// reg-name = *( unreserved / pct-encoded / sub-delims )
+// host = IP-literal / IPv4address / reg-name
+// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+// query = *( pchar / "/" / "?" )
+// fragment = *( pchar / "/" / "?" )
+
+def unres_host: str = "-._~!$&'()*+,;=";
+def unres_query_frag: str = "-._~!$&'()*+,;=:@/?";
+def unres_path: str = "-._~!$&'()*+,;=:@/";
+
// Writes a formatted [[uri]] to an [[io::handle]]. Returns the number of bytes
// written.
export fn fmt(out: io::handle, u: *const uri) (size | io::error) = {
@@ -25,7 +40,12 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = {
if (!slashes_w) {
n += fmt::fprint(out, "//")?;
};
- n += percent_encode(out, host)?;
+ let unres = if(u.scheme == "file") {
+ yield unres_path;
+ } else {
+ yield unres_host;
+ };
+ n += percent_encode(out, host, unres)?;
};
case let addr: ip::addr =>
if (!slashes_w) {
@@ -36,14 +56,14 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = {
if (u.port != 0) {
n += fmt::fprintf(out, ":{}", u.port)?;
};
- n += fmt::fprint(out, u.path)?;
+ n += percent_encode(out, u.path, unres_path)?;
if (len(u.query) > 0) {
- n += fmt::fprint(out, "?")?;
- n += percent_encode(out, u.query)?;
+ // Always percent-encoded, see parse and encodequery/decodequery
+ n += fmt::fprintf(out, "?{}", u.query)?;
};
if (len(u.fragment) > 0) {
n += fmt::fprint(out, "#")?;
- n += percent_encode(out, u.fragment)?;
+ n += percent_encode(out, u.fragment, unres_query_frag)?;
};
return n;
@@ -62,7 +82,7 @@ fn fmtaddr(out: io::handle, addr: ip::addr) (size | io::error) = {
return n;
};
-fn percent_encode(out: io::handle, src: str) (size | io::error) = {
+fn percent_encode(out: io::handle, src: str, allowed: str) (size | io::error) = {
let iter = strings::iter(src);
let n = 0z;
for (true) {
@@ -72,8 +92,7 @@ fn percent_encode(out: io::handle, src: str) (size | io::error) = {
case =>
break;
};
- // unreserved
- if (ascii::isalnum(r) || strings::contains("-._~", r)) {
+ if (ascii::isalnum(r) || strings::contains(allowed, r)) {
n += fmt::fprint(out, r)?;
} else {
const en = utf8::encoderune(r);
diff --git a/net/uri/query.ha b/net/uri/query.ha
index 774801a3..8c9dd0e4 100644
--- a/net/uri/query.ha
+++ b/net/uri/query.ha
@@ -50,10 +50,10 @@ export fn encodequery(pairs: [](str, str)) str = {
if (i > 0) strio::appendrune(&buf, '&')!;
assert(len(pair.0) > 0);
- percent_encode(&buf, pair.0)!;
+ percent_encode(&buf, pair.0, unres_query_frag)!;
if (len(pair.1) > 0) {
strio::appendrune(&buf, '=')!;
- percent_encode(&buf, pair.1)!;
+ percent_encode(&buf, pair.1, unres_query_frag)!;
};
};
@@ -61,7 +61,7 @@ export fn encodequery(pairs: [](str, str)) str = {
};
@test fn decodequery() void = {
- const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active")!;
+ const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active&quantity=100%25")!;
defer finish(&u);
const query = decodequery(u.query);
@@ -73,15 +73,20 @@ export fn encodequery(pairs: [](str, str)) str = {
const pair = query_next(&query)! as (str, str);
assert(pair.0 == "sort");
assert(pair.1 == "longest-active");
+
+ const pair = query_next(&query)! as (str, str);
+ assert(pair.0 == "quantity");
+ assert(pair.1 == "100%");
};
@test fn encodequery() void = {
const pairs = [
("search", "#risc-v"),
("sort", "longest-active"),
+ ("quantity", "100%")
];
const encoded = encodequery(pairs);
defer free(encoded);
- assert(encoded == "search=%23risc-v&sort=longest-active");
+ assert(encoded == "search=%23risc-v&sort=longest-active&quantity=100%25");
};
--
2.37.4
Technically it can be non-UTF-8 but hare heavily assumes UTF-8.
Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
---
net/uri/+test.ha | 10 ++++++++++
net/uri/parse.ha | 26 +++++++++++++++++++++++++-
scripts/gen-stdlib | 2 +-
stdlib.mk | 4 ++--
4 files changed, 38 insertions(+), 4 deletions(-)
diff --git a/net/uri/+test.ha b/net/uri/+test.ha
index ac024fc8..4b99c7b0 100644
--- a/net/uri/+test.ha
+++ b/net/uri/+test.ha
@@ -79,6 +79,16 @@ use net::ip;
...
},
)!;
+ test_uri_roundtrip(
+ "https://en.wiktionary.org/wiki/%E3%81%8A%E3%81%AF%E3%82%88%E3%81%86#Japanese",
+ uri {
+ scheme = "https",
+ host = "en.wiktionary.org",
+ path = "/wiki/おはよう",
+ fragment = "Japanese",
+ ...
+ }
+ )!;
};
@test fn invalid() void = {
diff --git a/net/uri/parse.ha b/net/uri/parse.ha
index e45f70ff..54eac1d8 100644
--- a/net/uri/parse.ha
+++ b/net/uri/parse.ha
@@ -2,6 +2,7 @@
// (c) 2022 Alexey Yerin <yyp@disroot.org>
// (c) 2022 Umar Getagazov <umar@handlerug.me>
use ascii;
+use encoding::utf8;
use io;
use net::ip;
use strconv;
@@ -329,6 +330,7 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = {
let iter = strings::iter(s);
let tmp = strio::dynamic();
defer io::close(&tmp)!;
+ let percent_data: []u8 = [];
for (true) {
match (strings::next(&iter)) {
case let r: rune =>
@@ -342,14 +344,36 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = {
match (strconv::stou8b(strio::string(&tmp),
strconv::base::HEX)) {
case let ord: u8 =>
- strio::appendrune(out, ord: u32: rune)!;
+ append(percent_data, ord);
case =>
return invalid;
};
} else {
+ if(len(percent_data) > 0) {
+ match(strings::fromutf8(percent_data)) {
+ case let stro: str =>
+ strio::concat(out, stro)!;
+ case utf8::invalid =>
+ return invalid;
+ };
+
+ percent_data = [];
+ };
+
strio::appendrune(out, r)!;
};
case void =>
+ if(len(percent_data) > 0) {
+ match(strings::fromutf8(percent_data)) {
+ case let stro: str =>
+ strio::concat(out, stro)!;
+ case utf8::invalid =>
+ return invalid;
+ };
+
+ percent_data = [];
+ };
+
break;
};
};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
index d2f72a3b..8724fdf4 100755
--- a/scripts/gen-stdlib
+++ b/scripts/gen-stdlib
@@ -1076,7 +1076,7 @@ net_uri() {
+test.ha
fi
gen_ssa net::uri \
- ascii ip net::ip strconv strings strio
+ ascii encoding::utf8 ip net::ip strconv strings strio
}
gensrcs_math_complex() {
diff --git a/stdlib.mk b/stdlib.mk
index 23e2b4dc..80598d44 100644
--- a/stdlib.mk
+++ b/stdlib.mk
@@ -1734,7 +1734,7 @@ stdlib_net_uri_any_srcs = \
$(STDLIB)/net/uri/query.ha \
$(STDLIB)/net/uri/uri.ha
-$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM))
+$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(HARECACHE)/net/uri
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nnet::uri \
@@ -3964,7 +3964,7 @@ testlib_net_uri_any_srcs = \
$(STDLIB)/net/uri/uri.ha \
$(STDLIB)/net/uri/+test.ha
-$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM))
+$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(TESTCACHE)/net/uri
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nnet::uri \
--
2.37.4
Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
---
net/uri/+test.ha | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/net/uri/+test.ha b/net/uri/+test.ha
index 4b99c7b0..71c3fd3f 100644
--- a/net/uri/+test.ha
+++ b/net/uri/+test.ha
@@ -145,25 +145,29 @@ fn test_uri(in: str, expected_uri: uri, expected_str: str) (void | invalid) = {
const u = parse(in)?;
defer finish(&u);
- assert(u.scheme == expected_uri.scheme);
+ assert_str(u.scheme, expected_uri.scheme);
match (u.host) {
case let s: str =>
- assert(s == expected_uri.host as str);
+ assert_str(s, expected_uri.host as str);
case let i: ip::addr =>
assert(ip::equal(i, expected_uri.host as ip::addr));
};
assert(u.port == expected_uri.port);
- assert(u.userinfo == expected_uri.userinfo);
- assert(u.path == expected_uri.path);
- assert(u.query == expected_uri.query);
- assert(u.fragment == expected_uri.fragment);
+ assert_str(u.userinfo, expected_uri.userinfo);
+ assert_str(u.path, expected_uri.path);
+ assert_str(u.query, expected_uri.query);
+ assert_str(u.fragment, expected_uri.fragment);
const s = string(&u);
defer free(s);
- if (s != expected_str) {
- fmt::errorfln("=== wanted\n{}", expected_str)!;
- fmt::errorfln("=== got\n{}", s)!;
+ assert_str(s, expected_str);
+};
+
+fn assert_str(got: str, expected: str) void = {
+ if(got != expected) {
+ fmt::errorfln("=== wanted\n{}", expected)!;
+ fmt::errorfln("=== got\n{}", got)!;
abort();
};
};
--
2.37.4