Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
encoding/asn1/+test/decoder_test.ha | 331 ++++++++++++
encoding/asn1/decoder.ha | 810 ++++++++++++++++++++++++++++
2 files changed, 1141 insertions(+)
create mode 100644 encoding/asn1/+test/decoder_test.ha
create mode 100644 encoding/asn1/decoder.ha
diff --git a/encoding/asn1/+test/decoder_test.ha b/encoding/asn1/+test/decoder_test.ha
new file mode 100644
index 00000000..2282fc40
--- /dev/null
+++ b/encoding/asn1/+test/decoder_test.ha
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use bufio;
+use bytes;
+use errors;
+use io;
+use memio;
+use os;
+use strings;
+use time::date;
+use types;
+
+
+// XXX: would be nice to just declare this as mem: memio::stream
+let mem: nullable *memio::stream = null;
+let rbuf: [os::BUFSZ]u8 = [0...];
+
+fn d(i: []u8) decoder = {
+ let buf = memio::fixed(i);
+ let h = match (mem) {
+ case null =>
+ let h = alloc(buf);
+ mem = h;
+ yield h;
+ case let m: *memio::stream =>
+ *m = buf;
+ yield m;
+ };
+ return derdecoder(h);
+};
+
+@fini fn freetdec() void = {
+ match (mem) {
+ case null =>
+ yield;
+ case let m: *memio::stream =>
+ free(m);
+ mem = null;
+ };
+};
+
+@test fn parsetag() void = {
+ assert((next(&d([0x02, 0x01]))!).class == class::UNIVERSAL);
+ assert((next(&d([0x02, 0x01]))!).tagid == 0x02);
+ assert((next(&d([0x1e, 0x01]))!).tagid == 0x1e);
+ assert((next(&d([0x1f, 0x7f, 0x01]))!).tagid == 0x7f);
+ assert((next(&d([0x1f, 0x81, 0x00, 0x01]))!).tagid == 0x80);
+
+ assert((next(&d([0x1f, 0x8f, 0xff, 0xff, 0xff, 0x7f, 0x01]))!).tagid
+ == types::U32_MAX);
+ assert(next(&d([0x1f, 0x90, 0x80, 0x80, 0x80, 0x00, 0x01])) is invalid);
+};
+
+@test fn parselen() void = {
+ assert(dsz(next(&d([0x02, 0x1]))!) == 1);
+ assert(dsz(next(&d([0x02, 0x7f]))!) == 127);
+ assert(dsz(next(&d([0x02, 0x81, 0x80]))!) == 128);
+
+ // must use minimal amount of bytes for length encoding
+ assert(next(&d([0x02, 0x81, 0x01, 0x01])) is invalid);
+ assert(next(&d([0x02, 0x81, 0x7f])) is invalid);
+ assert(next(&d([0x02, 0x82, 0x00, 0xff])) is invalid);
+
+ // indefinite form is not allowed in DER
+ assert(next(&d([0x02, 0x80, 0x01, 0x00, 0x00])) is invalid);
+};
+
+@test fn emptydata() void = {
+ assert(read_bool(&d([])) is badformat);
+ assert(open_set(&d([])) is badformat);
+};
+
+@test fn seq() void = {
+ let dat: [_]u8 = [
+ 0x30, 0x0a, // seq
+ 0x01, 0x01, 0xff, // bool true
+ 0x30, 0x05, // seq
+ 0x30, 0x03, // seq
+ 0x01, 0x01, 0x00, // bool false
+ ];
+
+ let dc = &d(dat);
+ open_seq(dc)!;
+ assert(read_bool(dc)! == true);
+ open_seq(dc)!;
+ open_seq(dc)!;
+ assert(read_bool(dc)! == false);
+ close_seq(dc)!;
+ close_seq(dc)!;
+ close_seq(dc)!;
+ finish(dc)!;
+
+ let dc = &d(dat);
+ open_seq(dc)!;
+ assert(open_seq(dc) is invalid);
+
+ let dc = &d(dat);
+ open_seq(dc)!;
+ assert(close_seq(dc) is badformat);
+
+ let dat: [_]u8 = [
+ 0x30, 0x07, // seq
+ 0x0c, 0x05, 0x65, 0x66, 0x67, 0xc3, 0x96, // utf8 string
+ ];
+
+ let dc = &d(dat);
+ open_seq(dc)!;
+ let r = strreader(dc, utag::UTF8_STRING)!;
+ let s = io::drain(&r)!;
+ defer free(s);
+ assert(bytes::equal([0x65, 0x66, 0x67, 0xc3, 0x96], s));
+
+ let dc = &d(dat);
+ let buf: [4]u8 = [0...];
+ open_seq(dc)!;
+ let r = strreader(dc, utag::UTF8_STRING)!;
+ assert(io::read(&r, buf)! == 3);
+ assert(close_seq(dc) is badformat);
+
+ // check unclosed
+ let dc = &d(dat);
+ open_seq(dc)!;
+ assert(finish(dc) is invalid);
+
+ let dc = &d(dat);
+ open_seq(dc)!;
+ let r = strreader(dc, utag::UTF8_STRING)!;
+ let s = io::drain(&r)!;
+ assert(finish(dc) is invalid);
+};
+
+@test fn invalid_seq() void = {
+ let dat: [_]u8 = [
+ 0x30, 0x03, // seq containing data of size 3
+ 0x02, 0x03, 0x01, 0x02, 0x03, // int 0x010203 overflows seq
+ ];
+
+ let dc = &d(dat);
+ open_seq(dc)!;
+
+ let buf: [3]u8 = [0...];
+ assert(read_int(dc, buf) is invalid);
+};
+
+@test fn read_implicit() void = {
+ let dat: [_]u8 = [
+ 0x30, 0x06, // seq
+ 0x85, 0x01, 0xff, // IMPLICIT bool true
+ 0x01, 0x01, 0x00, // bool false
+ ];
+
+ let dc = &d(dat);
+ open_seq(dc)!;
+ expect_implicit(dc, class::CONTEXT, 5)!;
+ assert(read_bool(dc)! == true);
+ assert(read_u16(dc) is badformat);
+};
+
+@test fn read_bool() void = {
+ assert(read_bool(&d([0x01, 0x01, 0xff]))!);
+ assert(read_bool(&d([0x01, 0x01, 0x00]))! == false);
+ assert(read_bool(&d([0x01, 0x02, 0x00, 0x00])) is invalid);
+ // X.690, ch. 11.1
+ assert(read_bool(&d([0x01, 0x01, 0x01])) is invalid);
+
+ // invalid class
+ assert(read_bool(&d([0x81, 0x01, 0x01])) is badformat);
+ // must be primitive
+ assert(read_bool(&d([0x21, 0x01, 0x01])) is invalid);
+ // invalid tag
+ assert(read_bool(&d([0x02, 0x01, 0x01])) is badformat);
+};
+
+@test fn read_null() void = {
+ read_null(&d([0x05, 0x00]))!;
+ read_null(&d([0x05, 0x01, 0x00])) is invalid;
+ read_null(&d([0x85, 0x00])) is invalid;
+ read_null(&d([0x01, 0x00])) is invalid;
+};
+
+@test fn read_int() void = {
+ let buf: [8]u8 = [0...];
+
+ assert(read_int(&d([0x02, 0x01, 0x01]), buf)! == 1);
+ assert(buf[0] == 0x01);
+ assert(read_int(&d([0x02, 0x01, 0x00]), buf)! == 1);
+ assert(buf[0] == 0x00);
+ assert(read_int(&d([0x02, 0x02, 0x01, 0x02]), buf)! == 2);
+ assert(buf[0] == 0x01);
+ assert(buf[1] == 0x02);
+
+ // must have at least one byte
+ assert(read_int(&d([0x02, 0x00]), buf) is invalid);
+ // non minimal
+ assert(read_int(&d([0x02, 0x02, 0x00, 0x01]), buf) is invalid);
+ assert(read_int(&d([0x02, 0x02, 0xff, 0x81]), buf) is invalid);
+
+ assert(read_u8(&d([0x02, 0x01, 0x00]))! == 0);
+ assert(read_u8(&d([0x02, 0x01, 0x01]))! == 1);
+ assert(read_u8(&d([0x02, 0x01, 0x7f]))! == 0x7f);
+ assert(read_u8(&d([0x02, 0x01, 0x80])) is invalid);
+ assert(read_u8(&d([0x02, 0x01, 0x81])) is invalid);
+ assert(read_u8(&d([0x02, 0x02, 0x00, 0x80]))! == 0x80);
+ assert(read_u8(&d([0x02, 0x02, 0x00, 0xff]))! == 0xff);
+
+ assert(read_u16(&d([0x02, 0x01, 0x00]))! == 0);
+ assert(read_u16(&d([0x02, 0x02, 0x0f, 0xff]))! == 0xfff);
+ assert(read_u16(&d([0x02, 0x03, 0x00, 0xff, 0xff]))! == 0xffff);
+ assert(read_u16(&d([0x02, 0x03, 0x01, 0xff, 0xff])) is invalid);
+ assert(read_u32(&d([0x02, 0x03, 0x00, 0xff, 0xff]))! == 0xffff);
+
+ let maxu64: [_]u8 = [
+ 0x02, 0x09, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ ];
+ assert(read_u64(&d(maxu64))! == 0xffffffffffffffff);
+ maxu64[2] = 0x01;
+ assert(read_u64(&d(maxu64)) is invalid);
+};
+
+@test fn read_bitstr() void = {
+ let buf: [8]u8 = [0...];
+ let bs = read_bitstr(&d([0x03, 0x01, 0x00]), buf)!;
+ assert(len(bs.0) == 0 && bs.1 == 0);
+ assert(bitstr_isset(bs, 0)! == false);
+
+ let bs = read_bitstr(&d([0x03, 0x02, 0x00, 0xff]), buf)!;
+ assert(bytes::equal(bs.0, [0xff]) && bs.1 == 0);
+ assert(bitstr_isset(bs, 0)!);
+ assert(bitstr_isset(bs, 7)!);
+
+ let bs = read_bitstr(&d([0x03, 0x03, 0x04, 0xab, 0xc0]), buf)!;
+ assert(bytes::equal(bs.0, [0xab, 0xc0]) && bs.1 == 4);
+ assert(bitstr_isset(bs, 0)!);
+ assert(bitstr_isset(bs, 1)! == false);
+ assert(bitstr_isset(bs, 8)!);
+ assert(bitstr_isset(bs, 9)!);
+ assert(!bitstr_isset(bs, 11)!);
+ assert(bitstr_isset(bs, 12) is invalid);
+
+ // unused bits must be zero
+ assert(read_bitstr(&d([0x03, 0x03, 0x04, 0xab, 0xc1]), buf) is invalid);
+ assert(read_bitstr(&d([0x03, 0x03, 0x07, 0xab, 0x40]), buf) is invalid);
+};
+
+@test fn read_oid() void = {
+ assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))! == oid::ID_AT_COMMON_NAME);
+
+ assert(bytes::equal([0x55, 0x04, 0x03],
+ read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
+};
+
+let datbuf: [64]u8 = [0...];
+
+fn newdatetime(s: str, tag: utag) []u8 = {
+ let datetime = strings::toutf8(s);
+ let datsz = len(datetime): u8;
+ datbuf[..2] = [tag, datsz];
+ datbuf[2..2 + datsz] = datetime;
+ return datbuf[..2 + datsz];
+};
+
+@test fn read_utctime() void = {
+ let derdatetime = newdatetime("231030133710Z", utag::UTC_TIME);
+ let dt = read_utctime(&d(derdatetime), 2046)!;
+
+ let fbuf: [24]u8 = [0...];
+ assert(date::bsformat(fbuf, date::RFC3339, &dt)!
+ == "2023-10-30T13:37:10+0000");
+
+ let dt = read_utctime(&d(derdatetime), 2020)!;
+ assert(date::bsformat(fbuf, date::RFC3339, &dt)!
+ == "1923-10-30T13:37:10+0000");
+
+ let derdatetime = newdatetime("2310301337100", utag::UTC_TIME);
+ assert(read_utctime(&d(derdatetime), 2020) is error);
+
+ let derdatetime = newdatetime("231030133710", utag::UTC_TIME);
+ assert(read_utctime(&d(derdatetime), 2020) is error);
+
+ let derdatetime = newdatetime("231030133a10Z", utag::UTC_TIME);
+ assert(read_utctime(&d(derdatetime), 2020) is error);
+
+ let derdatetime = newdatetime("231330133710Z", utag::UTC_TIME);
+ assert(read_utctime(&d(derdatetime), 2020) is error);
+};
+
+@test fn read_gtime() void = {
+ let derdatetime = newdatetime("20231030133710Z", utag::GENERALIZED_TIME);
+
+ let dt = read_gtime(&d(derdatetime))!;
+
+ let fbuf: [32]u8 = [0...];
+ assert(date::bsformat(fbuf, date::RFC3339, &dt)!
+ == "2023-10-30T13:37:10+0000");
+
+ let derdatetime = newdatetime("20231030133710.1Z", utag::GENERALIZED_TIME);
+ let dt = read_gtime(&d(derdatetime))!;
+ assert(date::bsformat(fbuf, date::STAMP_NANO, &dt)!
+ == "2023-10-30 13:37:10.100000000");
+
+ // must end with Z
+ let derdatetime = newdatetime("20231030133710", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+ let derdatetime = newdatetime("202310301337100", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+
+ // seconds must always be present
+ let derdatetime = newdatetime("202310301337", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+ let derdatetime = newdatetime("202310301337Z", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+
+ // fractional seconds must not end with 0. must be ommitted if 0
+ let derdatetime = newdatetime("20231030133710.", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+
+ let derdatetime = newdatetime("20231030133710.Z", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+
+ let derdatetime = newdatetime("20231030133710.0", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+
+ let derdatetime = newdatetime("20231030133710.0Z", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+
+ let derdatetime = newdatetime("20231030133710.10Z", utag::GENERALIZED_TIME);
+ assert(read_gtime(&d(derdatetime)) is error);
+
+ // TODO midnight is YYYYMMDD000000Z
+};
diff --git a/encoding/asn1/decoder.ha b/encoding/asn1/decoder.ha
new file mode 100644
index 00000000..78c01ca9
--- /dev/null
+++ b/encoding/asn1/decoder.ha
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use bufio;
+use bytes;
+use errors;
+use io;
+use math::{bit_size_u8};
+use os;
+use strings;
+use time::date;
+use types;
+
+
+def TAGMASK: u8 = 0x1f;
+def MAX_CONS_DEPTH: size = 32;
+
+// Each DER entry starts with an header that describes the content.
+export type head = struct {
+
+ // Tells whether the data is constructed and encapsulates multiple
+ // other data fields; or primitive and the value follows.
+ cons: bool,
+
+ // Class info
+ class: class,
+
+ // Tag id of the data
+ tagid: u32,
+
+ // Start position in stream
+ start: size,
+
+ // Start position of data in stream
+ data: size,
+
+ // End position in stream
+ end: size,
+
+ implicit: bool,
+};
+
+fn head_endpos(d: head) size = d.end;
+
+// Size of current element (header size + data size)
+export fn sz(d: head) size = d.end - d.start;
+
+// Size of the encoded data.
+export fn dsz(d: head) size = d.end - d.data;
+
+export type decoder = struct {
+ src: io::handle,
+ pos: size,
+ cstack: [MAX_CONS_DEPTH]head,
+ cstackp: size,
+ next: (void | head),
+ cur: (void | head),
+ unbuf: [3]u8,
+ unbufn: u8,
+ implicit: bool,
+};
+
+// Creates a new DER decoder that reads from 'src'. The decoder will do a lot of
+// short reads, hence a buffered stream is recommended.
+//
+// Each entry must be read to the end, before the next one is attended to.
+// [[finish]] must be called at the end to make sure everything is read.
+export fn derdecoder(src: io::handle) decoder = {
+ return decoder {
+ src = src,
+ pos = 0,
+ cstackp = 0,
+ cur = void,
+ next = void,
+ implicit = false,
+ ...
+ };
+};
+
+export fn finish(d: *decoder) (void | error) = {
+ if (d.cstackp != 0 || d.next is head) return invalid;
+ match (d.cur) {
+ case void =>
+ return;
+ case let h: head =>
+ if (h.end != d.pos) return invalid;
+ };
+};
+
+// Returns last opened cons or void if none is open.
+fn curcons(d: *decoder) (void | head) = {
+ if (d.cstackp == 0) {
+ return;
+ };
+ return d.cstack[d.cstackp-1];
+};
+
+// Peeks the header of the next data field. Fails with [[badformat]] if no data
+// follows.
+export fn peek(d: *decoder) (head | error) = {
+ match (trypeek(d)?) {
+ case io::EOF =>
+ return badformat;
+ case let h: head =>
+ return h;
+ };
+};
+
+// Tries to peek the header of the next data and returns EOF, if none exists.
+export fn trypeek(d: *decoder) (head | error | io::EOF) = {
+ if (!(d.next is void)) {
+ return d.next: head;
+ };
+
+ if (is_endofcons(d)) return io::EOF;
+
+ match (parse_header(d)?) {
+ case io::EOF =>
+ const unreaddata = d.unbufn > 0;
+ if (d.cstackp != 0 || unreaddata) {
+ return badformat;
+ };
+ return io::EOF;
+ case let dh: head =>
+ d.next = dh;
+ return dh;
+ };
+};
+
+// Cons is open and end is reached.
+fn is_endofcons(d: *decoder) bool = {
+ match (curcons(d)) {
+ case void =>
+ return false;
+ case let cur: head =>
+ return d.pos == head_endpos(cur);
+ };
+};
+
+// Returns the next data element or [[badformat]] on EOF.
+fn next(d: *decoder) (head | error) = {
+ match (trynext(d)?) {
+ case io::EOF =>
+ return badformat;
+ case let dh: head =>
+ return dh;
+ };
+};
+
+fn trynext(d: *decoder) (head | error | io::EOF) = {
+ if (d.next is head) {
+ let dh = d.next: head;
+ d.cur = dh;
+ d.next = void;
+ dh.implicit = d.implicit;
+ d.implicit = false;
+ return dh;
+ };
+
+ if (is_endofcons(d)) return io::EOF;
+
+ let dh = match (parse_header(d)?) {
+ case io::EOF =>
+ return io::EOF;
+ case let dh: head =>
+ yield dh;
+ };
+
+ d.cur = dh;
+ dh.implicit = d.implicit;
+ d.implicit = false;
+ return dh;
+};
+
+fn parse_header(d: *decoder) (head | error | io::EOF) = {
+ const consend = match (curcons(d)) {
+ case void =>
+ yield types::SIZE_MAX;
+ case let h: head =>
+ yield h.end;
+ };
+
+ if (d.pos == consend) return invalid;
+
+ const epos = d.pos;
+ const id = match (tryscan_byte(d)?) {
+ case io::EOF =>
+ d.cur = void;
+ return io::EOF;
+ case let id: u8 =>
+ yield id;
+ };
+
+ const class = ((id & 0xc0) >> 6): class;
+
+ let tagid: u32 = id & TAGMASK;
+ if (tagid == TAGMASK) {
+ tagid = parse_longtag(d, consend - d.pos)?;
+ };
+ const l = parse_len(d, consend - d.pos)?;
+ const hl = d.pos - epos;
+
+ const end = epos + hl + l;
+ if (end > consend) return invalid;
+
+ return head {
+ class = class,
+ cons = ((id >> 5) & 1) == 1,
+ tagid = tagid,
+ start = epos,
+ data = epos + hl,
+ end = end,
+ implicit = d.implicit,
+ ...
+ };
+};
+
+fn tryscan_byte(d: *decoder) (u8 | io::EOF | error) = {
+ let buf: [1]u8 = [0...];
+ match (io::readall(d.src, buf)?) {
+ case io::EOF =>
+ return io::EOF;
+ case size =>
+ d.pos += 1;
+ return buf[0];
+ };
+};
+
+fn scan_byte(d: *decoder) (u8 | error) = {
+ match (tryscan_byte(d)?) {
+ case io::EOF =>
+ return truncated;
+ case let b: u8 =>
+ return b;
+ };
+};
+
+// Reads data of current entry and advances pointer. Data must have been opened
+// using [[next]] or [[trynext]]. EOF is returned on end of data.
+fn dataread(d: *decoder, buf: []u8) (size | io::EOF | io::error) = {
+ let cur = match (d.cur) {
+ case void =>
+ abort("primitive must be opened with [[next]] or [[trynext]]");
+ case let dh: head =>
+ yield dh;
+ };
+
+ const dataleft = head_endpos(cur) - d.pos + d.unbufn;
+ if (dataleft == 0) {
+ return io::EOF;
+ };
+
+ let n = 0z;
+ if (d.unbufn > 0) {
+ const max = if (d.unbufn > len(buf)) len(buf): u8 else d.unbufn;
+ buf[..max] = d.unbuf[..max];
+ d.unbufn -= max;
+ n += max;
+ };
+
+ const max = if (dataleft < len(buf) - n) dataleft else len(buf) - n;
+
+ match (io::read(d.src, buf[n..n + max])?) {
+ case io::EOF =>
+ // there should be data left
+ return wrap_err(truncated);
+ case let sz: size =>
+ d.pos += sz;
+ return n + sz;
+ };
+};
+
+// unread incomplete utf8 runes.
+fn dataunread(d: *decoder, buf: []u8) void = {
+ assert(len(buf) + d.unbufn <= len(d.unbuf));
+
+ d.unbuf[d.unbufn..d.unbufn + len(buf)] = buf;
+ d.unbufn += len(buf): u8;
+};
+
+fn dataeof(d: *decoder) bool = {
+ match (d.cur) {
+ case void =>
+ return true;
+ case let h: head =>
+ return d.pos + d.unbufn == head_endpos(h);
+ };
+};
+
+fn parse_longtag(p: *decoder, max: size) (u32 | error) = {
+ // XXX: u32 too much?
+ let tag: u32 = 0;
+ let maxbits = size(u32) * 8;
+ let nbits = 0z;
+
+ for (let i = 0z; i < max; i += 1) {
+ let b = scan_byte(p)?;
+ const part = b & 0x7f;
+
+ nbits += if (tag == 0) bit_size_u8(part) else 7;
+ if (nbits > maxbits) {
+ // overflows u32
+ return invalid;
+ };
+
+ tag = (tag << 7) + part;
+ if (tag == 0) {
+ // first tag part must not be 0
+ return invalid;
+ };
+
+ if ((b >> 7) == 0) {
+ return tag;
+ };
+ };
+ return invalid; // max has been reached
+};
+
+fn parse_len(p: *decoder, max: size) (size | error) = {
+ if (max == 0) return invalid;
+
+ const b = scan_byte(p)?;
+ if (b == 0xff) {
+ return invalid;
+ };
+ if (b >> 7 == 0) {
+ // short form
+ return b: size;
+ };
+
+ let l = 0z;
+ const n = b & 0x7f;
+ if (n == 0) {
+ // Indefinite encoding is not supported in DER.
+ return invalid;
+ };
+
+ if (n > size(size)) {
+ // would cause a size overflow
+ return invalid;
+ };
+
+ if (n + 1 > max) return invalid;
+
+ for (let i = 0z; i < n; i += 1) {
+ const b = scan_byte(p)?;
+ l = (l << 8) + b;
+ if (l == 0) {
+ // Leading zeroes means minimum number of bytes for
+ // length encoding has not been used.
+ return invalid;
+ };
+ };
+
+ if (l <= 0x7f) {
+ // Could've used short form.
+ return invalid;
+ };
+
+ return l;
+};
+
+// Expects an IMPLICIT defined data field having class 'c' and tag 'tag'.
+// If the requirements meet, a read function (read_{*} or {*}reader) must
+// follow, that defines and reads the actual data as its stored.
+export fn expect_implicit(d: *decoder, c: class, tag: u32) (void | error) = {
+ let h = peek(d)?;
+ expect_tag(h, c, tag)?;
+ d.implicit = true;
+};
+
+// Opens an EXPLICIT encoded field of given class 'c' and 'tag'. The user must
+// call [[close_explicit]] after containing data has been read.
+export fn open_explicit(d: *decoder, c: class, tag: u32) (void | error) =
+ open_cons(d, c, tag);
+
+// Closes an EXPLICIT encoded field.
+export fn close_explicit(d: *decoder) (void | badformat) = close_cons(d);
+
+
+// Opens a constructed value of given 'class' and 'tagid'. Fails if not a
+// constructed value or it has an unexpected tag.
+fn open_cons(d: *decoder, class: class, tagid: u32) (void | error) = {
+ let dh = next(d)?;
+ if (!dh.cons) {
+ return invalid;
+ };
+
+ expect_tag(dh, class, tagid)?;
+
+ if (d.cstackp == len(d.cstack)) {
+ return badformat;
+ };
+
+ d.cstack[d.cstackp] = dh;
+ d.cstackp += 1;
+};
+
+// Closes current constructed value. badformat is returend, if not all data has
+// been read.
+fn close_cons(d: *decoder) (void | badformat) = {
+ if (d.implicit) {
+ // a datafield marked implicit has not been read
+ return badformat;
+ };
+
+ match (curcons(d)) {
+ case void =>
+ abort("No constructed value open");
+ case let h: head =>
+ if (d.pos != head_endpos(h) || d.unbufn > 0) {
+ // All data must have been read before closing the seq
+ return badformat;
+ };
+ };
+
+ d.cstackp -= 1;
+};
+
+// Opens a sequence
+export fn open_seq(d: *decoder) (void | error) =
+ open_cons(d, class::UNIVERSAL, utag::SEQUENCE: u32)?;
+
+// Closes current sequence. [[badformat]] is returned, if not all data has
+// been read.
+export fn close_seq(d: *decoder) (void | badformat) = close_cons(d);
+
+// Opens a set. Though a set must be sorted according to DER, the order will not
+// be validated.
+export fn open_set(d: *decoder) (void | error) =
+ open_cons(d, class::UNIVERSAL, utag::SET: u32)?;
+
+// Closes current set. [[badformat]] is returend, if not all data has been read.
+export fn close_set(d: *decoder) (void | badformat) = close_cons(d);
+
+fn expect_tag(h: head, class: class, tagid: u32) (void | invalid | badformat) = {
+ if (class == class::UNIVERSAL && (tagid == utag::SEQUENCE
+ || tagid == utag::SET) && !h.cons) {
+ return invalid;
+ };
+
+ if (h.implicit) {
+ return;
+ };
+
+ if (h.class != class || h.tagid != tagid) {
+ return badformat;
+ };
+};
+
+fn expect_utag(dh: head, tag: utag) (void | invalid | badformat) =
+ expect_tag(dh, class::UNIVERSAL, tag: u32);
+
+fn read_bytes(d: *decoder, buf: []u8) (size | error) = {
+ match (dataread(d, buf)) {
+ case io::EOF =>
+ return 0z;
+ case let n: size =>
+ if (!dataeof(d)) {
+ return badformat;
+ };
+ return n;
+ };
+};
+
+fn read_nbytes(d: *decoder, buf: []u8) (size | error) = {
+ const n = read_bytes(d, buf)?;
+ if (n != len(buf)) {
+ return badformat;
+ };
+ return n;
+};
+
+// Read a boolean.
+export fn read_bool(d: *decoder) (bool | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, utag::BOOLEAN)?;
+ if (dsz(dh) != 1) {
+ return invalid;
+ };
+
+ let b = scan_byte(d)?;
+
+ if (b != 0x00 && b != 0xff) {
+ return invalid;
+ };
+
+ return b == 0xff;
+};
+
+fn validate_intprefix(i: []u8) (void | error) = {
+ switch (len(i)) {
+ case 0 =>
+ return invalid;
+ case 1 =>
+ return;
+ case =>
+ // An int must be encoded using the minimal number of bytes
+ // possible as defined in X.690 s8.3.2
+ if ((i[0] == 0x00 && i[1] >> 7 == 0)
+ || (i[0] == 0xff && i[1] >> 7 == 1)) {
+ return invalid;
+ };
+ };
+};
+
+// Read an integer into 'buf'. Fails if size exceeds the buffer size. The
+// integer is stored in big endian format. Negative values are stored as a
+// twos complement. The minimum integer size is one byte.
+export fn read_int(d: *decoder, buf: []u8) (size | error) = {
+ assert(len(buf) > 0);
+
+ let dh = next(d)?;
+ expect_utag(dh, utag::INTEGER)?;
+ const n = read_bytes(d, buf)?;
+ validate_intprefix(buf[..n])?;
+ return n;
+};
+
+// Similar to [[read_int]], but fails if it's not an unsigned integer. Will
+// left trim 0 bytes.
+export fn read_uint(d: *decoder, buf: []u8) (size | error) = {
+ let s = read_int(d, buf)?;
+ if (buf[0] & 0x80 == 0x80) {
+ return badformat;
+ };
+ if (buf[0] == 0) {
+ buf[..s-1] = buf[1..s];
+ s -= 1;
+ };
+ return s;
+};
+
+fn read_ux(d: *decoder, x: u8) (u64 | error) = {
+ assert(x <= 8);
+ let b: [9]u8 = [0...];
+ const n = read_int(d, b[..x+1])?;
+
+ if (b[0] & 0x80 != 0) {
+ // sign bit is set
+ return invalid;
+ };
+
+ const s = if (b[0] == 0x00) 1u8 else 0u8;
+ if (n - s > x) {
+ return invalid;
+ };
+
+ let r = 0u64;
+ for (let i = s; i < n; i += 1) {
+ r <<= 8;
+ r += b[i];
+ };
+ return r;
+};
+
+// Reads an integer that is expected to fit into u8.
+export fn read_u8(d: *decoder) (u8 | error) = read_ux(d, 1)?: u8;
+
+// Reads an integer that is expected to fit into u16.
+export fn read_u16(d: *decoder) (u16 | error) = read_ux(d, 2)?: u16;
+
+// Reads an integer that is expected to fit into u32.
+export fn read_u32(d: *decoder) (u32 | error) = read_ux(d, 4)?: u32;
+
+// Reads an integer that is expected to fit into u64.
+export fn read_u64(d: *decoder) (u64 | error) = read_ux(d, 8)?;
+
+// Reads a bitstring value. The result tuple contains the bitstring and the
+// number of unused bits in the last byte. The [[bitstr_isset]] function may be
+// used to check for set bits.
+export fn read_bitstr(d: *decoder, buf: []u8) (([]u8, u8) | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, utag::BITSTRING)?;
+
+ let unused: [1]u8 = [0...];
+ match (dataread(d, unused)?) {
+ case io::EOF =>
+ return invalid;
+ case let n: size =>
+ if (n != 1) {
+ return invalid;
+ };
+ };
+ const unused = unused[0];
+ if (unused > 7) {
+ return invalid;
+ };
+
+ const n = read_bytes(d, buf)?;
+ const mask = (1 << unused) - 1;
+ if (n > 0 && buf[n-1] & mask != 0) {
+ // unused bits must be zero
+ return invalid;
+ };
+ return (buf[..n], unused);
+};
+
+// Checks whether bit at 'pos' is set in given bitstring. 'pos' starts from 0,
+// which is the highest order bit in the first byte.
+export fn bitstr_isset(bitstr: ([]u8, u8), pos: size) (bool | invalid) = {
+ const i = pos / 8;
+ if (i >= len(bitstr.0)) {
+ return false;
+ };
+ let b = bitstr.0[i];
+
+ const j = pos - i * 8;
+ if (i == len(bitstr.0) - 1 && j >= (8 - bitstr.1)) {
+ return invalid;
+ };
+ const mask = (1 << (7 - j));
+ return mask & b == mask;
+};
+
+// Returns an [[io::reader]] for octet string data.
+// TODO add limit?
+export fn octetstrreader(d: *decoder) (bytestream | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, utag::OCTET_STRING)?;
+ return newbytereader(d);
+};
+
+// Read an octet string into 'buf'. Fails if 'buf' is to small.
+export fn read_octetstr(d: *decoder, buf: []u8) (size | error) = {
+ assert(len(buf) > 0);
+
+ let dh = next(d)?;
+ expect_utag(dh, utag::OCTET_STRING)?;
+ return read_bytes(d, buf);
+};
+
+// Reads a null entry.
+export fn read_null(d: *decoder) (void | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, utag::NULL)?;
+ if (dsz(dh) != 0) {
+ return invalid;
+ };
+};
+
+export type bytestream = struct {
+ stream: io::stream,
+ d: *decoder,
+};
+
+fn newbytereader(d: *decoder) bytestream = {
+ return bytestream {
+ stream = &bytestream_vtable,
+ d = d,
+ ...
+ };
+};
+
+const bytestream_vtable: io::vtable = io::vtable {
+ reader = &bytestream_reader,
+ ...
+};
+
+fn bytestream_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) =
+ dataread((s: *bytestream).d, buf);
+
+// Returns an [[io::reader]] that allows to read the raw data in its encoded
+// form. Note that this reader won't do any kind of validation.
+export fn bytereader(d: *decoder, c: class, tagid: u32) (bytestream | error) = {
+ let dh = next(d)?;
+ expect_tag(dh, c, tagid)?;
+ return newbytereader(d);
+};
+
+// Reads an UTC time. Since the stored date only has a two digit year, 'maxyear'
+// is required to define the epoch switch. For example 'maxyear' = 2046 causes
+// all encoded years <= 46 to be after 2000 and all values > 46 will have 1900
+// as the century.
+export fn read_utctime(d: *decoder, maxyear: u16) (date::date | error) = {
+ assert(maxyear > 100);
+
+ let dh = next(d)?;
+ expect_utag(dh, utag::UTC_TIME)?;
+
+ let time: [13]u8 = [0...];
+ read_nbytes(d, time[..])?;
+
+ if (time[len(time)-1] != 'Z') {
+ return invalid;
+ };
+
+ let year: u16 = (time[0] - 0x30): u16 * 10 + (time[1] - 0x30): u16;
+ let cent = maxyear - (maxyear % 100);
+ if (year > maxyear % 100) {
+ cent -= 100;
+ };
+
+ let v = date::newvirtual();
+ v.year = (year + cent): int;
+ v.zoff = 0;
+ v.nanosecond = 0;
+
+ let datestr = strings::fromutf8(time[2..])!;
+ if (!(date::parse(&v, "%m%d%H%M%S%Z", datestr) is void)) {
+ return invalid;
+ };
+
+ let dt = match (date::realize(v)) {
+ case let dt: date::date =>
+ yield dt;
+ case let e: (date::insufficient | date::invalid) =>
+ return invalid;
+ };
+
+ return dt;
+};
+
+// Reads a generalized datetime.
+export fn read_gtime(d: *decoder) (date::date | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, utag::GENERALIZED_TIME)?;
+
+ // The date begins with the encoded datetime
+ def DATESZ = 14z;
+ // followed by optional fractional seconds separated by '.'
+ def NANOSZ = 10z;
+ def NANOSEPPOS = 14;
+ // and ends with the zone info 'Z'
+ def ZONESZ = 1z;
+
+ let time: [DATESZ + NANOSZ + ZONESZ]u8 = [0...];
+ let n = read_bytes(d, time[..])?;
+
+ // zone info and seconds must always be present
+ if (time[n-1] != 'Z' || n < DATESZ + ZONESZ) {
+ return invalid;
+ };
+
+ // validate fractional seconds
+ if (n > DATESZ + ZONESZ) {
+ // fractional seconds must not be empty
+ if (time[NANOSEPPOS] != '.' || n == DATESZ + ZONESZ + 1) {
+ return invalid;
+ };
+ // fractional seconds must not end with 0 and must be > 0
+ if (time[n-2] == '0') return invalid;
+ };
+
+ // right pad fractional seconds to make them valid nanoseconds
+ time[n-1..] = ['0'...];
+ time[NANOSEPPOS] = '.';
+
+ match (date::from_str("%Y%m%d%H%M%S.%N", strings::fromutf8(time)!)) {
+ case let d: date::date =>
+ return d;
+ case let e: date::error =>
+ return invalid;
+ };
+};
+
+// Skips an element and returns the size of the data that has been skipped.
+// Returns an error, if the skipped data is invalid.
+export fn skip(d: *decoder, tag: utag, max: size) (size | error) = {
+ static let buf: [os::BUFSZ]u8 = [0...];
+ let s = 0z;
+ switch (tag) {
+ case utag::BOOLEAN =>
+ read_bool(d)?;
+ return 1z;
+ case utag::INTEGER =>
+ let br = bytereader(d, class::UNIVERSAL, utag::INTEGER)?;
+ let n = match (io::read(&br, buf)?) {
+ case let n: size =>
+ yield n;
+ case io::EOF =>
+ return invalid;
+ };
+ validate_intprefix(buf[..n])?;
+ n += streamskip(&br, max, buf)?;
+ return n;
+ case utag::NULL =>
+ read_null(d)?;
+ return 0z;
+ case utag::OCTET_STRING =>
+ let r = octetstrreader(d)?;
+ return streamskip(&r, max, buf)?;
+ case utag::BITSTRING =>
+ assert(max <= len(buf));
+ let buf = buf[..max];
+ let p = read_bitstr(d, buf)?;
+ bytes::zero(p.0);
+ return len(p.0) + 1;
+ case =>
+ abort("skip for given utag not implemented");
+ };
+};
+
+fn streamskip(r: io::handle, max: size, buf: []u8) (size | error) = {
+ defer bytes::zero(buf);
+ let buf = if (max < len(buf)) buf[..max] else buf[..];
+ let s = 0z;
+ for (true) {
+ match (io::read(r, buf)?) {
+ case let n: size =>
+ s += n;
+ case io::EOF =>
+ return s;
+ };
+
+ if (s > max) {
+ return badformat;
+ };
+ };
+};
--
2.43.0
encoding/asn1/oiddb/db.txt contains all oids required by the stdlib. It
is in a separate module so that projects may create their own oid list
by appending to the stdlib and generating the db with the genoiddb
command.
It is still no ideal solution if multiple libs will require their own
oids added to the db.
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
v3: Rework to allow different oid databases in different modules.
v4: Fix oid reader test. Improve documentation.
cmd/genoiddb/main.ha | 209 ++++++++++++++++++++++++++++
encoding/asn1/+test/decoder_test.ha | 21 ++-
encoding/asn1/oid.ha | 137 ++++++++++++++++++
encoding/asn1/stdoid/db.ha | 127 +++++++++++++++++
encoding/asn1/stdoid/db.txt | 51 +++++++
5 files changed, 538 insertions(+), 7 deletions(-)
create mode 100644 cmd/genoiddb/main.ha
create mode 100644 encoding/asn1/oid.ha
create mode 100644 encoding/asn1/stdoid/db.ha
create mode 100644 encoding/asn1/stdoid/db.txt
diff --git a/cmd/genoiddb/main.ha b/cmd/genoiddb/main.ha
new file mode 100644
index 00000000..5ce36ccb
--- /dev/null
+++ b/cmd/genoiddb/main.ha
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use ascii;
+use bufio;
+use fmt;
+use io;
+use os;
+use strconv;
+use strings;
+use types;
+
+type entry = struct {
+ name: str,
+ val: str,
+ idx: size,
+};
+
+// Parses an oid database from stdin and writes the database as hare code to
+// stdout.
+export fn main() void = {
+ let oids = parse_oids();
+ defer free_oids(oids);
+
+ fmt::println("// SPDX-License-Identifier: MPL-2.0\n"
+ "// (c) Hare authors <https://harelang.org>\n"
+ "// This is an auto generated file. Do not edit.\n"
+ "\n"
+ "use encoding::asn1;\n")!;
+
+ fmt::println("const _db = asn1::oiddb {")!;
+
+ write_db(os::stdout, oids)!;
+
+ fmt::println("\tnames = [")!;
+ for (let i = 0z; i < len(oids); i += 1) {
+ fmt::printfln("\t\t\"{}\",", oids[i].name)!;
+ };
+ fmt::println("\t],")!;
+ fmt::println("};\n")!;
+
+ fmt::println("export const db = &_db;\n")!;
+
+ for (let i = 0z; i < len(oids); i += 1) {
+ fmt::print("export def ")!;
+ write_varname(os::stdout, oids[i].name)!;
+ fmt::printfln(": asn1::oid = {};", i)!;
+ };
+};
+
+fn parse_oids() []entry = {
+ let s = bufio::newscanner(os::stdin, types::SIZE_MAX);
+ defer bufio::finish(&s);
+ let oids: []entry = [];
+
+ for (true) {
+ const l = match (bufio::scan_line(&s)!) {
+ case io::EOF =>
+ break;
+ case let s: const str =>
+ yield s;
+ };
+
+ if (l == "" || strings::hasprefix(l, '#')) {
+ continue;
+ };
+
+
+ const p = strings::split(l, " ");
+ defer free(p);
+ const name = p[0];
+ const val = p[len(p)-1];
+
+ append(oids, entry {
+ name = strings::dup(name),
+ val = strings::dup(val),
+ ...
+ });
+ };
+
+ return oids;
+};
+
+fn free_oids(oids: []entry) void = {
+ for (let i = 0z; i < len(oids); i += 1) {
+ free(oids[i].name);
+ free(oids[i].val);
+ };
+
+ free(oids);
+};
+
+fn write_db(h: io::handle, oids: []entry) (void | io::error) = {
+ fmt::print("\tlut = [")?;
+
+ const maxcols = 12z;
+ let idx = 0z;
+
+ for (let i = 0z; i < len(oids); i += 1) {
+ let e = &oids[i];
+ e.idx = idx;
+
+ let der = oidtoder(e.val);
+ assert(len(der) <= 0xff);
+ insert(der[0], len(der): u8);
+ defer free(der);
+
+ for (let j = 0z; j < len(der); j += 1) {
+ fmt::print(if (idx % maxcols == 0) "\n\t\t" else " ")?;
+ fmt::printf("0x{:.2x},", der[j])?;
+ idx += 1;
+ };
+ };
+ fmt::println("\n\t],")?;
+
+ const maxcols = 9z;
+ fmt::print("\tindex = [")?;
+ for (let i = 0z; i < len(oids); i += 1) {
+ fmt::print(if (i % maxcols == 0) "\n\t\t" else " ")?;
+ fmt::printf("0x{:.4x},", oids[i].idx)?;
+ };
+ fmt::println("\n\t],")?;
+};
+
+fn oidtoder(oid: str) []u8 = {
+ let nums = oidtou64s(oid);
+ defer free(nums);
+
+ let der: []u8 = alloc([0...], 1);
+ assert(nums[0] <= 6);
+ assert(nums[1] < 40);
+ der[0] = nums[0]: u8 * 40 + nums[1]: u8;
+ let end = 1z;
+
+ for (let i = 2z; i < len(nums); i += 1) {
+ let n = nums[i];
+ if (n == 0) {
+ insert(der[end], 0u8);
+ end = len(der);
+ continue;
+ };
+
+ let first = true;
+ for (n > 0) {
+ let p: u8 = n: u8 & 0x7f;
+ n >>= 7;
+ if (first) {
+ first = false;
+ } else {
+ p |= 0x80;
+ };
+ insert(der[end], p);
+ };
+
+ end = len(der);
+ };
+
+ return der;
+};
+
+fn oidtou64s(oid: str) []u64 = {
+ let nums = strings::tokenize(oid, ".");
+ let intnums: []u64 = [];
+
+ for (true) {
+ match (strings::next_token(&nums)) {
+ case let s: str =>
+ append(intnums, strconv::stou64(s)!);
+ case void =>
+ break;
+ };
+ };
+
+ return intnums;
+};
+
+fn write_varname(h: io::handle, name: str) (void | io::error) = {
+ // assume that names are in ascii
+ let i = strings::iter(name);
+ let prevlow = false;
+ for (true) {
+ match (strings::next(&i)) {
+ case void =>
+ break;
+ case let r: rune =>
+ let r = if (r == '-') {
+ prevlow = false;
+ yield '_';
+ } else if (ascii::isdigit(r)) {
+ prevlow = true;
+ yield r;
+ } else if (ascii::isupper(r)) {
+ if (prevlow) {
+ fmt::fprint(h, "_")?;
+ prevlow = false;
+ };
+ yield r;
+ } else if (ascii::islower(r)) {
+ prevlow = true;
+ yield ascii::toupper(r);
+ } else {
+ fmt::fatalf("Unexpected character in oid name: {}", r);
+ };
+
+ fmt::fprint(h, r)?;
+ };
+ };
+};
+
diff --git a/encoding/asn1/+test/decoder_test.ha b/encoding/asn1/+test/decoder_test.ha
index 2282fc40..cb32d7c3 100644
--- a/encoding/asn1/+test/decoder_test.ha
+++ b/encoding/asn1/+test/decoder_test.ha
@@ -243,13 +243,6 @@ fn d(i: []u8) decoder = {
assert(read_bitstr(&d([0x03, 0x03, 0x07, 0xab, 0x40]), buf) is invalid);
};
-@test fn read_oid() void = {
- assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))! == oid::ID_AT_COMMON_NAME);
-
- assert(bytes::equal([0x55, 0x04, 0x03],
- read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
-};
-
let datbuf: [64]u8 = [0...];
fn newdatetime(s: str, tag: utag) []u8 = {
@@ -329,3 +322,17 @@ fn newdatetime(s: str, tag: utag) []u8 = {
// TODO midnight is YYYYMMDD000000Z
};
+
+@test fn read_oid() void = {
+ let db = oiddb {
+ lut = [0x03, 0x2b, 0x65, 0x70, 0x03, 0x55, 0x04, 0x03],
+ index = [0, 4],
+ names = ["ed25519", "id-at-commonName"],
+ };
+
+ assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]), &db)! == 1);
+ assert(stroid(&db, 1) == "id-at-commonName");
+
+ assert(bytes::equal([0x55, 0x04, 0x03],
+ read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
+};
diff --git a/encoding/asn1/oid.ha b/encoding/asn1/oid.ha
new file mode 100644
index 00000000..d32c1400
--- /dev/null
+++ b/encoding/asn1/oid.ha
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use bytes;
+use errors;
+use fmt;
+use io;
+use math::{divu};
+use memio;
+use strings;
+
+
+// An oid database that contains a lookup table of known oids in the DER format.
+// A database of oids required by the standard library can be found in
+// [[encoding::asn1::stdoid]].
+//
+// The database can be used with [[oid_from_der]] and [[oid_to_der]] to convert
+// an oid between integer and DER encoding. [[read_oid]] and [[write_oid]] can
+// be used to decode or encode the oid directly from and to DER.
+//
+// If the standard oid database is missing entries for the given use case, an
+// individual database can be generated using the genoiddb command found in
+// cmd/. Take a look at encoding/asn1/stdoid/db.txt for an example database
+// file.
+export type oiddb = struct {
+ lut: []u8,
+ index: []size,
+ names: []str,
+};
+
+// Numeric id of an oid which is unique within an [[oiddb]].
+export type oid = u32;
+
+// Reads an oid if present in 'db'. Returns [[badformat]] if the oid is unknown.
+export fn read_oid(d: *decoder, db: *oiddb) (oid | error) = {
+ let raw = read_rawoid(d)?;
+
+ match (oid_from_der(db, raw)) {
+ case let o: oid =>
+ return o;
+ case =>
+ return badformat;
+ };
+};
+
+// Reads any [[oid]] and returns the DER encoded form. The returned value is
+// borrowed from a static buffer.
+export fn read_rawoid(d: *decoder) ([]u8 | error) = {
+ def OIDBUFSZ: size = 64; // estimated
+ static let oidbuf: [OIDBUFSZ]u8 = [0...];
+
+ const dh = next(d)?;
+ expect_utag(dh, utag::OID)?;
+ if (dsz(dh) < 2) {
+ return invalid;
+ };
+ const n = read_bytes(d, oidbuf)?;
+ return oidbuf[..n];
+};
+
+// Writes given [[oid]] from the [[oiddb]] 'db'.
+export fn write_oid(e: *encoder, db: *oiddb, oid: oid) (void | overflow) = {
+ let doid = oid_to_der(db, oid);
+ write_fixedprim(e, class::UNIVERSAL, utag::OID, doid)?;
+};
+
+// Looks up DER encoded oid 'raw' in 'db' and returns an [[oid]] if found, or
+// void otheriwse.
+export fn oid_from_der(db: *oiddb, raw: []u8) (void | oid) = {
+ for (let i = 0z; i < len(db.index); i += 1) {
+ const off = db.index[i];
+ const l = db.lut[off];
+ if (bytes::equal(raw, db.lut[off + 1..off + 1 + l])) {
+ return i: oid;
+ };
+ };
+};
+
+// Borrows the DER representation of a known oid from 'db'.
+export fn oid_to_der(db: *oiddb, o: oid) []u8 = {
+ const off = db.index[o];
+ const l = db.lut[off];
+ return db.lut[off + 1..off + 1 + l];
+};
+
+// Looks up a str representation of an oid from the database.
+export fn stroid(db: *oiddb, o: oid) str = {
+ return db.names[o];
+};
+
+// Returns the dot id as string. The caller must free returned value. This
+// function may fail if the oid overflows the internal buffer, or an invalid
+// value is provided.
+export fn strrawoid(der: []u8) (str | io::error) = {
+ let s = memio::dynamic();
+ let ok = false;
+ defer if (!ok) io::close(&s)!;
+
+ if (len(der) < 1) {
+ return errors::invalid;
+ };
+
+ const (a, b) = divu(0, der[0], 40);
+ fmt::fprintf(&s, "{}.{}", a, b)?;
+
+ let j = 2z;
+ let el = 0u32;
+ let bits: int = size(u32): int * 8;
+
+ for (let i = 1z; i < len(der); i += 1) {
+ el += der[i] & 0x7f;
+
+ if (der[i] & 0x80 != 0) {
+ if (bits - 7 < 0) {
+ return errors::overflow;
+ };
+ el <<= 7;
+ bits -= 7;
+ } else {
+ fmt::fprintf(&s, ".{}", el)?;
+ el = 0;
+ j += 1;
+ bits = size(u32): int * 8;
+ };
+ };
+
+ ok = true;
+ return memio::string(&s)!;
+};
+
+@test fn strrawoid() void = {
+ let der: [_]u8 = [0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01];
+ let s = strrawoid(der)!;
+ defer free(s);
+ assert(s == "1.2.840.113549.1.1.1");
+};
+
diff --git a/encoding/asn1/stdoid/db.ha b/encoding/asn1/stdoid/db.ha
new file mode 100644
index 00000000..3bef778c
--- /dev/null
+++ b/encoding/asn1/stdoid/db.ha
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+// This is an auto generated file. Do not edit.
+
+use encoding::asn1;
+
+const _db = asn1::oiddb {
+ lut = [
+ 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x03, 0x2b,
+ 0x65, 0x70, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05,
+ 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0e, 0x09, 0x2a,
+ 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b, 0x09, 0x2a, 0x86, 0x48,
+ 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0c, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7,
+ 0x0d, 0x01, 0x01, 0x0d, 0x05, 0x2b, 0x0e, 0x03, 0x02, 0x1a, 0x09, 0x60,
+ 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x09, 0x60, 0x86, 0x48,
+ 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65,
+ 0x03, 0x04, 0x02, 0x02, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04,
+ 0x02, 0x03, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x02, 0x01, 0x08, 0x2a,
+ 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07, 0x08, 0x2a, 0x86, 0x48, 0xce,
+ 0x3d, 0x03, 0x01, 0x07, 0x05, 0x2b, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2b,
+ 0x81, 0x04, 0x00, 0x23, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x01,
+ 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x03, 0x01, 0x08, 0x2a, 0x86,
+ 0x48, 0xce, 0x3d, 0x04, 0x03, 0x02, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d,
+ 0x04, 0x03, 0x03, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x03, 0x04,
+ 0x03, 0x55, 0x04, 0x03, 0x03, 0x55, 0x04, 0x04, 0x03, 0x55, 0x04, 0x05,
+ 0x03, 0x55, 0x04, 0x06, 0x03, 0x55, 0x04, 0x07, 0x03, 0x55, 0x04, 0x08,
+ 0x03, 0x55, 0x04, 0x0a, 0x03, 0x55, 0x04, 0x0b, 0x03, 0x55, 0x04, 0x0c,
+ 0x03, 0x55, 0x04, 0x2a, 0x03, 0x55, 0x04, 0x2b, 0x03, 0x55, 0x04, 0x2b,
+ 0x03, 0x55, 0x04, 0x2e, 0x03, 0x55, 0x04, 0x41, 0x0a, 0x09, 0x92, 0x26,
+ 0x89, 0x93, 0xf2, 0x2c, 0x64, 0x01, 0x19, 0x03, 0x55, 0x1d, 0x0f, 0x03,
+ 0x55, 0x1d, 0x11, 0x03, 0x55, 0x1d, 0x13, 0x03, 0x55, 0x1d, 0x25,
+ ],
+ index = [
+ 0x0000, 0x000a, 0x000e, 0x0018, 0x0022, 0x002c, 0x0036, 0x0040, 0x0046,
+ 0x0050, 0x005a, 0x0064, 0x006e, 0x0076, 0x007f, 0x0088, 0x008e, 0x0094,
+ 0x009c, 0x00a5, 0x00ae, 0x00b7, 0x00c0, 0x00c4, 0x00c8, 0x00cc, 0x00d0,
+ 0x00d4, 0x00d8, 0x00dc, 0x00e0, 0x00e4, 0x00e8, 0x00ec, 0x00f0, 0x00f4,
+ 0x00f8, 0x0103, 0x0107, 0x010b, 0x010f,
+ ],
+ names = [
+ "rsaEncryption",
+ "ed25519",
+ "sha1WithRSAEncryption",
+ "sha224WithRSAEncryption",
+ "sha256WithRSAEncryption",
+ "sha384WithRSAEncryption",
+ "sha512WithRSAEncryption",
+ "id-sha1",
+ "id-sha224",
+ "id-sha256",
+ "id-sha384",
+ "id-sha512",
+ "id-ecPublicKey",
+ "prime256v1",
+ "ansix9p256r1",
+ "ansix9p384r1",
+ "ansix9p521r1",
+ "ecdsa-with-SHA1",
+ "ecdsa-with-SHA224",
+ "ecdsa-with-SHA256",
+ "ecdsa-with-SHA384",
+ "ecdsa-with-SHA512",
+ "id-at-commonName",
+ "id-at-surname",
+ "id-at-serialNumber",
+ "id-at-countryName",
+ "id-at-localityName",
+ "id-at-stateOrProvinceName",
+ "id-at-organizationName",
+ "id-at-organizationalUnitName",
+ "id-at-title",
+ "id-at-givenName",
+ "id-at-initials",
+ "id-at-generationQualifier",
+ "id-at-dnQualifier",
+ "id-at-pseudonym",
+ "id-domainComponent",
+ "id-ce-keyUsage",
+ "id-ce-subjectAltName",
+ "id-ce-basicConstraints",
+ "id-ce-extKeyUsage",
+ ],
+};
+
+export const db = &_db;
+
+export def RSA_ENCRYPTION: asn1::oid = 0;
+export def ED25519: asn1::oid = 1;
+export def SHA1_WITH_RSAENCRYPTION: asn1::oid = 2;
+export def SHA224_WITH_RSAENCRYPTION: asn1::oid = 3;
+export def SHA256_WITH_RSAENCRYPTION: asn1::oid = 4;
+export def SHA384_WITH_RSAENCRYPTION: asn1::oid = 5;
+export def SHA512_WITH_RSAENCRYPTION: asn1::oid = 6;
+export def ID_SHA1: asn1::oid = 7;
+export def ID_SHA224: asn1::oid = 8;
+export def ID_SHA256: asn1::oid = 9;
+export def ID_SHA384: asn1::oid = 10;
+export def ID_SHA512: asn1::oid = 11;
+export def ID_EC_PUBLIC_KEY: asn1::oid = 12;
+export def PRIME256V1: asn1::oid = 13;
+export def ANSIX9P256R1: asn1::oid = 14;
+export def ANSIX9P384R1: asn1::oid = 15;
+export def ANSIX9P521R1: asn1::oid = 16;
+export def ECDSA_WITH_SHA1: asn1::oid = 17;
+export def ECDSA_WITH_SHA224: asn1::oid = 18;
+export def ECDSA_WITH_SHA256: asn1::oid = 19;
+export def ECDSA_WITH_SHA384: asn1::oid = 20;
+export def ECDSA_WITH_SHA512: asn1::oid = 21;
+export def ID_AT_COMMON_NAME: asn1::oid = 22;
+export def ID_AT_SURNAME: asn1::oid = 23;
+export def ID_AT_SERIAL_NUMBER: asn1::oid = 24;
+export def ID_AT_COUNTRY_NAME: asn1::oid = 25;
+export def ID_AT_LOCALITY_NAME: asn1::oid = 26;
+export def ID_AT_STATE_OR_PROVINCE_NAME: asn1::oid = 27;
+export def ID_AT_ORGANIZATION_NAME: asn1::oid = 28;
+export def ID_AT_ORGANIZATIONAL_UNIT_NAME: asn1::oid = 29;
+export def ID_AT_TITLE: asn1::oid = 30;
+export def ID_AT_GIVEN_NAME: asn1::oid = 31;
+export def ID_AT_INITIALS: asn1::oid = 32;
+export def ID_AT_GENERATION_QUALIFIER: asn1::oid = 33;
+export def ID_AT_DN_QUALIFIER: asn1::oid = 34;
+export def ID_AT_PSEUDONYM: asn1::oid = 35;
+export def ID_DOMAIN_COMPONENT: asn1::oid = 36;
+export def ID_CE_KEY_USAGE: asn1::oid = 37;
+export def ID_CE_SUBJECT_ALT_NAME: asn1::oid = 38;
+export def ID_CE_BASIC_CONSTRAINTS: asn1::oid = 39;
+export def ID_CE_EXT_KEY_USAGE: asn1::oid = 40;
diff --git a/encoding/asn1/stdoid/db.txt b/encoding/asn1/stdoid/db.txt
new file mode 100644
index 00000000..de4e42d4
--- /dev/null
+++ b/encoding/asn1/stdoid/db.txt
@@ -0,0 +1,51 @@
+# OIDs that will be translated into db.ha using `genoiddb`
+
+rsaEncryption 1.2.840.113549.1.1.1
+ed25519 1.3.101.112
+
+sha1WithRSAEncryption 1.2.840.113549.1.1.5
+sha224WithRSAEncryption 1.2.840.113549.1.1.14
+sha256WithRSAEncryption 1.2.840.113549.1.1.11
+sha384WithRSAEncryption 1.2.840.113549.1.1.12
+sha512WithRSAEncryption 1.2.840.113549.1.1.13
+
+id-sha1 1.3.14.3.2.26
+id-sha224 2.16.840.1.101.3.4.2.4
+id-sha256 2.16.840.1.101.3.4.2.1
+id-sha384 2.16.840.1.101.3.4.2.2
+id-sha512 2.16.840.1.101.3.4.2.3
+
+id-ecPublicKey 1.2.840.10045.2.1
+prime256v1 1.2.840.10045.3.1.7
+
+ansix9p256r1 1.2.840.10045.3.1.7
+ansix9p384r1 1.3.132.0.34
+ansix9p521r1 1.3.132.0.35
+
+ecdsa-with-SHA1 1.2.840.10045.4.1
+ecdsa-with-SHA224 1.2.840.10045.4.3.1
+ecdsa-with-SHA256 1.2.840.10045.4.3.2
+ecdsa-with-SHA384 1.2.840.10045.4.3.3
+ecdsa-with-SHA512 1.2.840.10045.4.3.4
+
+id-at-commonName 2.5.4.3
+id-at-surname 2.5.4.4
+id-at-serialNumber 2.5.4.5
+id-at-countryName 2.5.4.6
+id-at-localityName 2.5.4.7
+id-at-stateOrProvinceName 2.5.4.8
+id-at-organizationName 2.5.4.10
+id-at-organizationalUnitName 2.5.4.11
+id-at-title 2.5.4.12
+id-at-givenName 2.5.4.42
+id-at-initials 2.5.4.43
+id-at-generationQualifier 2.5.4.43
+id-at-dnQualifier 2.5.4.46
+id-at-pseudonym 2.5.4.65
+
+id-domainComponent 0.9.2342.19200300.100.1.25
+
+id-ce-keyUsage 2.5.29.15
+id-ce-subjectAltName 2.5.29.17
+id-ce-basicConstraints 2.5.29.19
+id-ce-extKeyUsage 2.5.29.37
--
2.43.0
numeric and ia5, which is basically ascii, are required by some
entries in x.509. UniversalString, BMPString and T61 are only provided
for legacy support.
Note that this only supports a subset of T61. There are still some
certificates in the mozilla trust store that have T61 encoded strings.
Though the characters may only be a subset of ASCII.
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
encoding/asn1/+test/strings_test.ha | 164 +++++++++
encoding/asn1/charset+test.ha | 154 ++++++++
encoding/asn1/strings.ha | 362 +++++++++++++++++++
encoding/asn1/t61.ha | 534 ++++++++++++++++++++++++++++
4 files changed, 1214 insertions(+)
create mode 100644 encoding/asn1/+test/strings_test.ha
create mode 100644 encoding/asn1/charset+test.ha
create mode 100644 encoding/asn1/strings.ha
create mode 100644 encoding/asn1/t61.ha
diff --git a/encoding/asn1/+test/strings_test.ha b/encoding/asn1/+test/strings_test.ha
new file mode 100644
index 00000000..f338ed26
--- /dev/null
+++ b/encoding/asn1/+test/strings_test.ha
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use bytes;
+use errors;
+use fmt;
+use io;
+use strings;
+
+
+fn c_checkrange(chars: []u8, f: *fn (c: u8) bool) void = {
+ for (let i = 0z; i < 256; i += 1) {
+ let expected = false;
+ for (let j = 0z; j < len(chars); j += 1) {
+ if (chars[j] == i: u8) {
+ expected = true;
+ break;
+ };
+ };
+
+ if (f(i: u8) != expected) {
+ fmt::println(i, expected, f(i: u8))!;
+ };
+ assert(f(i: u8) == expected);
+ };
+};
+
+@test fn c_is_num() void = {
+ const chars: [_]u8 = [
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ',
+ ];
+ c_checkrange(chars, &c_is_num);
+};
+
+@test fn c_is_print() void = {
+ const chars: [_]u8 = [
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ', '\'',
+ '(', ')', '+', ',', '-', '.', '/', ':', '=', '?',
+ ];
+ c_checkrange(chars, &c_is_print);
+};
+
+@test fn utf8() void = {
+ let buf: [16]u8 = [0...];
+ let b: [_]u8 = [
+ 0x55,
+ 0x56,
+ 0xd0, 0x98,
+ 0xe0, 0xa4, 0xb9,
+ 0xf0, 0x90, 0x8d, 0x88
+ ];
+ const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8];
+
+ let expected: str = strings::fromutf8([0xf0, 0x90, 0x8d, 0x88])!;
+ assert(read_utf8str(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), buf)!
+ == expected);
+ assert(read_utf8str(&d([0x0c, 0x03, 0xf0, 0x90, 0x8d]), buf) is invalid);
+
+ bytes::zero(buf);
+ let r = strreader(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
+ assert(io::read(&r, buf)! == 4);
+ assert(bytes::equal(buf[..4], strings::toutf8(expected)));
+
+ bytes::zero(buf);
+ let expected: str = strings::fromutf8([0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88])!;
+ assert(read_utf8str(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), buf)!
+ == expected);
+ assert(read_utf8str(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), buf) is invalid);
+
+ bytes::zero(buf);
+ let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
+ assert(io::read(&r, buf)! == 6);
+ assert(bytes::equal(buf[..6], strings::toutf8(expected)));
+
+ let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!;
+ assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid);
+
+ bytes::zero(buf);
+ let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
+ assert(io::read(&r, buf[..4])! == 2);
+ assert(io::read(&r, buf[2..])! == 4);
+ assert(bytes::equal(buf[..6], strings::toutf8(expected)));
+
+ bytes::zero(buf);
+ let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!;
+ assert(io::read(&r, buf[..4])! == 2);
+ assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid);
+};
+
+@test fn t61() void = {
+ let input: [_]u8 = [
+ 0x14, 0x29,
+ 0x42, 0xc8, 0x61, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72,
+ 0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72,
+ 0x6e, 0x65, 0x20, 0xc8, 0x75, 0x62, 0x65, 0x72, 0x6d, 0xc8,
+ 0x61, 0xfb, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69, 0x67,
+ 0x0a,
+ ];
+
+ const expected: [_]u8 = [
+ 0x42, 0xc3, 0xa4, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72,
+ 0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72,
+ 0x6e, 0x65, 0x20, 0xc3, 0xbc, 0x62, 0x65, 0x72, 0x6d, 0xc3,
+ 0xa4, 0xc3, 0x9f, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69,
+ 0x67, 0x0a,
+ ];
+
+ let dec = d(input);
+ let r = strreader(&dec, utag::TELETEX_STRING)!;
+ let result = io::drain(&r)!;
+ defer free(result);
+ assert(bytes::equal(expected, result));
+ assert(trypeek(&dec) is io::EOF);
+
+ // cut off multibyte char
+ input[1] = 0x2;
+ let r = strreader(&d(input[..4]), utag::TELETEX_STRING)!;
+ assert(unwrap_err(io::drain(&r) as io::error) is invalid);
+
+ // not enough space for multibyte char
+ let buf: [24]u8 = [0...];
+ let in = input[..27];
+ in[1] = (len(in) - 2): u8;
+ let dec = d(in);
+ let r = strreader(&dec, utag::TELETEX_STRING)!;
+ assert(io::read(&r, buf)! == 23);
+ assert(trypeek(&dec) is badformat);
+
+ let r = strreader(&d([
+ 0x14, 0x0f, 0x63, 0x6c, 0xc2, 0x65, 0x73, 0x20, 0x70, 0x75,
+ 0x62, 0x6c, 0x69, 0x71, 0x75, 0x65, 0x73,
+ ]), utag::TELETEX_STRING)!;
+ let b = io::drain(&r)!;
+ defer free(b);
+
+ assert(strings::fromutf8(b)! == "cl\u00e9s publiques");
+};
+
+@test fn bmp() void = {
+ let input: [_]u8 = [
+ 0x1e, 0x26,
+ 0x00, 0x48, 0x00, 0xe4, 0x00, 0x72, 0x00, 0x65, 0x00, 0x6c,
+ 0x00, 0x61, 0x00, 0x6e, 0x00, 0x67, 0x00, 0x20, 0x00, 0x69,
+ 0x01, 0x61, 0x00, 0x20, 0x00, 0x6e, 0x00, 0x65, 0x00, 0x61,
+ 0x00, 0x74, 0x00, 0x6f, 0x00, 0x20, 0x27, 0x64,
+ ];
+
+ const expected: [_]u8 = [
+ 0x48, 0xc3, 0xa4, 0x72, 0x65, 0x6c, 0x61, 0x6e, 0x67, 0x20,
+ 0x69, 0xc5, 0xa1, 0x20, 0x6e, 0x65, 0x61, 0x74, 0x6f, 0x20,
+ 0xe2, 0x9d, 0xa4,
+ ];
+
+ let dec = d(input);
+ let r = strreader(&dec, utag::BMP_STRING)!;
+ let result = io::drain(&r)!;
+ defer free(result);
+ assert(bytes::equal(expected, result));
+ assert(trypeek(&dec) is io::EOF);
+};
diff --git a/encoding/asn1/charset+test.ha b/encoding/asn1/charset+test.ha
new file mode 100644
index 00000000..acf66f9b
--- /dev/null
+++ b/encoding/asn1/charset+test.ha
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use ascii;
+use bytes;
+use fmt;
+use io;
+use memio;
+
+
+// Encodes all characters from 0x00 to 0xff separated by \t. Invalid characters
+// will not be printed. All possible accents follow the table as defined in
+// the two bytes chapter at https://en.wikipedia.org/wiki/T.51/ISO/IEC_6937
+fn print_t61_table(dest: io::handle) void = {
+ for (let i = 0z; i < 16; i +=1 ) {
+ fmt::fprintf(dest, "{:x}\t", i)!;
+ };
+ fmt::fprintln(dest)!;
+
+ for (let i = 0z; i < 256; i += 1) {
+ if (i % 16 == 0) {
+ fmt::fprintln(dest)!;
+ };
+ match (t61_chardecode([i: u8])) {
+ case insufficient =>
+ fmt::fprint(dest, "")!;
+ case invalid =>
+ yield;
+ case let r: rune =>
+ if (i > 0xa0 || (ascii::isprint(r) && !ascii::isspace(r))) {
+ fmt::fprint(dest, r)!;
+ } else {
+ fmt::fprintf(dest, "x{:.4x}", r: u32)!;
+ };
+ };
+
+ if (i + 1 % 16 != 0) {
+ fmt::fprint(dest, "\t")!;
+ };
+ };
+
+ fmt::fprintln(dest)!;
+
+ for (let i = 0xc1u8; i < 0xd0; i += 1) {
+ if (i == 0xcc) continue;
+ fmt::fprintf(dest, "{:.2x}\t", i)!;
+ for (let j = 0x41u32; j < 0x7b; j += 1) {
+ if (!ascii::isprint(j: rune)) {
+ continue;
+ };
+ if (!(t61_chardecode([i: u8]) is insufficient)) {
+ assert(false);
+ };
+ match (t61_chardecode([i: u8, j: u8])) {
+ case let r: rune =>
+ fmt::fprint(dest, r)!;
+ case =>
+ yield;
+ };
+ };
+ fmt::fprintln(dest)!;
+ };
+};
+
+@test fn t61encode() void = {
+ let table = memio::dynamic();
+ defer io::close(&table)!;
+ print_t61_table(&table);
+ assert(bytes::equal(t61_test_table, memio::buffer(&table)));
+};
+
+// Print this table as UTF-8, to visual check the characters.
+const t61_test_table: [_]u8 = [
+ 0x30, 0x09, 0x31, 0x09, 0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09,
+ 0x36, 0x09, 0x37, 0x09, 0x38, 0x09, 0x39, 0x09, 0x61, 0x09, 0x62, 0x09,
+ 0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x0a, 0x0a, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30,
+ 0x61, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30, 0x63, 0x09, 0x78, 0x30, 0x30,
+ 0x30, 0x64, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x31, 0x61, 0x09, 0x78, 0x30,
+ 0x30, 0x31, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78, 0x30, 0x30,
+ 0x32, 0x30, 0x09, 0x21, 0x09, 0x22, 0x09, 0x09, 0x09, 0x25, 0x09, 0x26,
+ 0x09, 0x27, 0x09, 0x28, 0x09, 0x29, 0x09, 0x2a, 0x09, 0x2b, 0x09, 0x2c,
+ 0x09, 0x2d, 0x09, 0x2e, 0x09, 0x2f, 0x09, 0x0a, 0x30, 0x09, 0x31, 0x09,
+ 0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09, 0x36, 0x09, 0x37, 0x09,
+ 0x38, 0x09, 0x39, 0x09, 0x3a, 0x09, 0x3b, 0x09, 0x3c, 0x09, 0x3d, 0x09,
+ 0x3e, 0x09, 0x3f, 0x09, 0x0a, 0x40, 0x09, 0x41, 0x09, 0x42, 0x09, 0x43,
+ 0x09, 0x44, 0x09, 0x45, 0x09, 0x46, 0x09, 0x47, 0x09, 0x48, 0x09, 0x49,
+ 0x09, 0x4a, 0x09, 0x4b, 0x09, 0x4c, 0x09, 0x4d, 0x09, 0x4e, 0x09, 0x4f,
+ 0x09, 0x0a, 0x50, 0x09, 0x51, 0x09, 0x52, 0x09, 0x53, 0x09, 0x54, 0x09,
+ 0x55, 0x09, 0x56, 0x09, 0x57, 0x09, 0x58, 0x09, 0x59, 0x09, 0x5a, 0x09,
+ 0x5b, 0x09, 0x09, 0x5d, 0x09, 0x09, 0x5f, 0x09, 0x0a, 0x09, 0x61, 0x09,
+ 0x62, 0x09, 0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x67, 0x09,
+ 0x68, 0x09, 0x69, 0x09, 0x6a, 0x09, 0x6b, 0x09, 0x6c, 0x09, 0x6d, 0x09,
+ 0x6e, 0x09, 0x6f, 0x09, 0x0a, 0x70, 0x09, 0x71, 0x09, 0x72, 0x09, 0x73,
+ 0x09, 0x74, 0x09, 0x75, 0x09, 0x76, 0x09, 0x77, 0x09, 0x78, 0x09, 0x79,
+ 0x09, 0x7a, 0x09, 0x09, 0x7c, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30,
+ 0x38, 0x62, 0x09, 0x78, 0x30, 0x30, 0x38, 0x63, 0x09, 0x09, 0x09, 0x09,
+ 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x78, 0x30, 0x30, 0x39, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78,
+ 0x30, 0x30, 0x61, 0x30, 0x09, 0xc2, 0xa1, 0x09, 0xc2, 0xa2, 0x09, 0xc2,
+ 0xa3, 0x09, 0x24, 0x09, 0xc2, 0xa5, 0x09, 0x23, 0x09, 0xc2, 0xa7, 0x09,
+ 0xc2, 0xa4, 0x09, 0x09, 0x09, 0xc2, 0xab, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x0a, 0xc2, 0xb0, 0x09, 0xc2, 0xb1, 0x09, 0xc2, 0xb2, 0x09, 0xc2, 0xb3,
+ 0x09, 0xc3, 0x97, 0x09, 0xc2, 0xb5, 0x09, 0xc2, 0xb6, 0x09, 0xc2, 0xb7,
+ 0x09, 0xc3, 0xb7, 0x09, 0x09, 0x09, 0xc2, 0xbb, 0x09, 0xc2, 0xbc, 0x09,
+ 0xc2, 0xbd, 0x09, 0xc2, 0xbe, 0x09, 0xc2, 0xbf, 0x09, 0x0a, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0xe2, 0x84, 0xa6, 0x09,
+ 0xc3, 0x86, 0x09, 0xc3, 0x90, 0x09, 0xc2, 0xaa, 0x09, 0xc4, 0xa6, 0x09,
+ 0x09, 0xc4, 0xb2, 0x09, 0xc4, 0xbf, 0x09, 0xc5, 0x81, 0x09, 0xc3, 0x98,
+ 0x09, 0xc5, 0x92, 0x09, 0xc2, 0xba, 0x09, 0xc3, 0x9e, 0x09, 0xc5, 0xa6,
+ 0x09, 0xc5, 0x8a, 0x09, 0xc5, 0x89, 0x09, 0x0a, 0xc4, 0xb8, 0x09, 0xc3,
+ 0xa6, 0x09, 0xc4, 0x91, 0x09, 0xc3, 0xb0, 0x09, 0xc4, 0xa7, 0x09, 0xc4,
+ 0xb1, 0x09, 0xc4, 0xb3, 0x09, 0xc5, 0x80, 0x09, 0xc5, 0x82, 0x09, 0xc3,
+ 0xb8, 0x09, 0xc5, 0x93, 0x09, 0xc3, 0x9f, 0x09, 0xc3, 0xbe, 0x09, 0xc5,
+ 0xa7, 0x09, 0xc5, 0x8b, 0x09, 0x09, 0x0a, 0x63, 0x31, 0x09, 0xc3, 0x80,
+ 0xc3, 0x88, 0xc3, 0x8c, 0xc3, 0x92, 0xc3, 0x99, 0xc3, 0xa0, 0xc3, 0xa8,
+ 0xc3, 0xac, 0xc3, 0xb2, 0xc3, 0xb9, 0x0a, 0x63, 0x32, 0x09, 0xc3, 0x81,
+ 0xc4, 0x86, 0xc3, 0x89, 0xc3, 0x8d, 0xc4, 0xb9, 0xc5, 0x83, 0xc3, 0x93,
+ 0xc5, 0x94, 0xc5, 0x9a, 0xc3, 0x9a, 0xc3, 0x9d, 0xc5, 0xb9, 0xc3, 0xa1,
+ 0xc4, 0x87, 0xc3, 0xa9, 0xc4, 0xa3, 0xc3, 0xad, 0xc4, 0xba, 0xc5, 0x84,
+ 0xc3, 0xb3, 0xc5, 0x95, 0xc5, 0x9b, 0xc3, 0xba, 0xc3, 0xbd, 0xc5, 0xba,
+ 0x0a, 0x63, 0x33, 0x09, 0xc3, 0x82, 0xc4, 0x88, 0xc3, 0x8a, 0xc4, 0x9c,
+ 0xc4, 0xa4, 0xc3, 0x8e, 0xc4, 0xb4, 0xc3, 0x94, 0xc5, 0x9c, 0xc3, 0x9b,
+ 0xc5, 0xb4, 0xc5, 0xb6, 0xc3, 0xa2, 0xc4, 0x89, 0xc3, 0xaa, 0xc4, 0x9d,
+ 0xc4, 0xa5, 0xc3, 0xae, 0xc4, 0xb5, 0xc3, 0xb4, 0xc5, 0x9d, 0xc3, 0xbb,
+ 0xc5, 0xb5, 0xc5, 0xb7, 0x0a, 0x63, 0x34, 0x09, 0xc3, 0x83, 0xc4, 0xa8,
+ 0xc3, 0x91, 0xc3, 0x95, 0xc5, 0xa8, 0xc3, 0xa3, 0xc4, 0xa9, 0xc3, 0xb1,
+ 0xc3, 0xb5, 0xc5, 0xa9, 0x0a, 0x63, 0x35, 0x09, 0xc4, 0x80, 0xc4, 0x92,
+ 0xc4, 0xaa, 0xc5, 0x8c, 0xc5, 0xaa, 0xc4, 0x81, 0xc4, 0x93, 0xc4, 0xab,
+ 0xc5, 0x8d, 0xc5, 0xab, 0x0a, 0x63, 0x36, 0x09, 0xc4, 0x82, 0xc4, 0x9e,
+ 0xc5, 0xac, 0xc4, 0x83, 0xc4, 0x9f, 0xc5, 0xad, 0x0a, 0x63, 0x37, 0x09,
+ 0xc4, 0x8a, 0xc4, 0x96, 0xc4, 0xa0, 0xc4, 0xb0, 0xc5, 0xbb, 0xc4, 0x8b,
+ 0xc4, 0x97, 0xc4, 0xa1, 0xc5, 0xbc, 0x0a, 0x63, 0x38, 0x09, 0xc3, 0x84,
+ 0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c, 0xc5, 0xb8, 0xc3, 0xa4,
+ 0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc, 0xc3, 0xbf, 0x0a, 0x63,
+ 0x39, 0x09, 0xc3, 0x84, 0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c,
+ 0xc5, 0xb8, 0xc3, 0xa4, 0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc,
+ 0xc3, 0xbf, 0x0a, 0x63, 0x61, 0x09, 0xc3, 0x85, 0xc5, 0xae, 0xc3, 0xa5,
+ 0xc5, 0xaf, 0x0a, 0x63, 0x62, 0x09, 0xc3, 0x87, 0xc4, 0xa2, 0xc4, 0xb6,
+ 0xc4, 0xbb, 0xc5, 0x85, 0xc5, 0x96, 0xc5, 0x9e, 0xc5, 0xa2, 0xc3, 0xa7,
+ 0xc4, 0xb7, 0xc4, 0xbc, 0xc5, 0x86, 0xc5, 0x97, 0xc5, 0x9f, 0xc5, 0xa3,
+ 0x0a, 0x63, 0x64, 0x09, 0xc5, 0x90, 0xc5, 0xb0, 0xc5, 0x91, 0xc5, 0xb1,
+ 0x0a, 0x63, 0x65, 0x09, 0xc4, 0x84, 0xc4, 0x98, 0xc4, 0xae, 0xc5, 0xb2,
+ 0xc4, 0x85, 0xc4, 0x99, 0xc4, 0xaf, 0xc5, 0xb3, 0x0a, 0x63, 0x66, 0x09,
+ 0xc4, 0x8c, 0xc4, 0x8e, 0xc4, 0x9a, 0xc4, 0xbd, 0xc5, 0x87, 0xc5, 0x98,
+ 0xc5, 0xa0, 0xc5, 0xa4, 0xc5, 0xbd, 0xc4, 0x8d, 0xc4, 0x8f, 0xc4, 0x9b,
+ 0xc4, 0xbe, 0xc5, 0x88, 0xc5, 0x99, 0xc5, 0xa1, 0xc5, 0xa5, 0xc5, 0xbe,
+ 0x0a,
+];
+
diff --git a/encoding/asn1/strings.ha b/encoding/asn1/strings.ha
new file mode 100644
index 00000000..a381958b
--- /dev/null
+++ b/encoding/asn1/strings.ha
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use encoding::utf8;
+use endian;
+use errors;
+use io;
+use strings;
+
+
+// numeric string
+def N: u8 = 0o1;
+
+// printable string
+def P: u8 = 0o2;
+
+// LUT of bitfields with character attributes
+const cclass: [_]u8 = [
+// 0 1 2 3 4 5 6 7
+ 0, 0, 0, 0, 0, 0, 0, 0, // 0
+ 0, 0, 0, 0, 0, 0, 0, 0, // 10
+ 0, 0, 0, 0, 0, 0, 0, 0, // 20
+ 0, 0, 0, 0, 0, 0, 0, 0, // 30
+ N|P, 0, 0, 0, 0, 0, 0, P, // 40
+ P, P, 0, P, P, P, P, P, // 50
+ N|P, N|P, N|P, N|P, N|P, N|P, N|P, N|P, // 60
+ N|P, N|P, P, 0, 0, P, 0, P, // 70
+ 0, P, P, P, P, P, P, P, // 100
+ P, P, P, P, P, P, P, P, // 110
+ P, P, P, P, P, P, P, P, // 120
+ P, P, P, 0, 0, 0, 0, 0, // 130
+ 0, P, P, P, P, P, P, P, // 140
+ P, P, P, P, P, P, P, P, // 150
+ P, P, P, P, P, P, P, P, // 160
+ P, P, P, 0, 0, 0, 0, 0, // 170
+];
+
+type char_validator = fn (c: u8) bool;
+
+// Whether 'c' is valid in a NumericString
+fn c_is_num(c: u8) bool = c & 0x80 == 0 && cclass[c] & N != 0;
+
+// Whether 'c' is valid in a PrintableString
+fn c_is_print(c: u8) bool = c & 0x80 == 0 && cclass[c] & P != 0;
+
+fn c_is_ia5(c: u8) bool = c & 0x80 == 0;
+
+// Returns the number of bytes of the biggest complete utf8 chunk. Returns
+// invalid, if the biggest complete chunk contains invalid utf8 characters.
+fn validutf8(buf: []u8) (size | invalid) = {
+ if (len(buf) == 0) {
+ return 0z;
+ };
+
+ const min = if (len(buf) < 4) 0z else len(buf) - 4;
+
+ let lastvalid = 0z;
+ let lastsz = 0z;
+ for (let i = min; i < len(buf); i += 1) {
+ match (utf8::utf8sz(buf[i])) {
+ case utf8::invalid =>
+ yield;
+ case let s: size =>
+ lastsz = s;
+ lastvalid = i;
+ };
+ };
+
+ if (lastsz == 0) return invalid;
+
+ const n = if (len(buf) - lastvalid == lastsz) len(buf) else lastvalid;
+ if (utf8::validate(buf[..n]) is utf8::invalid) {
+ return invalid;
+ };
+
+ return n;
+};
+
+@test fn validutf8() void = {
+ let b: [_]u8 = [
+ 0x55, 0x56, 0xd0, 0x98, 0xe0, 0xa4, 0xb9, 0xf0, 0x90, 0x8d, 0x88
+ ];
+ const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8];
+
+ for (let i = 0z; i < len(b); i += 1) {
+ assert(validutf8(b[..i])! == runesat[i]);
+ };
+
+ b[10] = 0x55;
+ assert(validutf8(b[..10])! == 7);
+ assert(validutf8(b) is invalid);
+};
+
+// An io::stream reader that returns only valid utf8 chunks on read.
+export type utf8stream = struct {
+ stream: io::stream,
+ d: *decoder,
+ strdec: *strdecoder,
+};
+
+const utf8stream_vtable = io::vtable {
+ reader = &utf8stream_reader,
+ ...
+};
+
+fn utf8stream_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
+ // at least a rune must fit in buf
+ assert(len(buf) >= 4);
+ let s = s: *utf8stream;
+ let cur = match (s.d.cur) {
+ case void =>
+ abort();
+ case let dh: head =>
+ yield dh;
+ };
+
+ match (s.strdec(s, buf)?) {
+ case let n: size =>
+ return n;
+ case io::EOF =>
+ return io::EOF;
+ };
+};
+
+export type strdecoder = fn(
+ s: *utf8stream,
+ buf: []u8,
+) (size | io::EOF | io::error);
+
+fn no_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
+ dataread(s.d, buf);
+
+fn char_decoder(
+ s: *utf8stream, buf: []u8,
+ v: *char_validator,
+) (size | io::EOF | io::error) = {
+ let n = match (dataread(s.d, buf)?) {
+ case let n: size =>
+ yield n;
+ case io::EOF =>
+ return io::EOF;
+ };
+
+ for (let i = 0z; i < n; i += 1) {
+ if (!v(buf[i])) return wrap_err(invalid);
+ };
+ return n;
+};
+
+fn num_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
+ char_decoder(s, buf, &c_is_num);
+
+fn print_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
+ char_decoder(s, buf, &c_is_print);
+
+fn ia5_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
+ char_decoder(s, buf, &c_is_ia5);
+
+fn utf8_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
+ let n = 0z;
+
+ n += match (dataread(s.d, buf)?) {
+ case let sz: size =>
+ yield sz;
+ case io::EOF =>
+ if (s.d.unbufn > 0) return wrap_err(invalid);
+ return io::EOF;
+ };
+
+ const max = match (validutf8(buf[..n])) {
+ case let s: size =>
+ yield s;
+ case invalid =>
+ return wrap_err(invalid);
+ };
+
+ if (max < n) {
+ if (dataeof(s.d)) {
+ // string ends with incomplete rune
+ return wrap_err(invalid);
+ };
+ dataunread(s.d, buf[max..n]);
+ return max;
+ };
+
+ return n;
+};
+
+// A bmp string is an UTF-16 string.
+fn bmp_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
+ const max = len(buf) - (len(buf) % 2);
+
+ // TODO disallow control functions (X.690: 8.23.9)
+
+ let n = 0z;
+ let rbuf: [2]u8 = [0...];
+ for (true) {
+ match (dataread(s.d, rbuf)?) {
+ case let sz: size =>
+ if (sz < 2) return wrap_err(invalid);
+ case io::EOF =>
+ return if (n == 0) io::EOF else n;
+ };
+
+ let r = endian::begetu16(rbuf): rune;
+ let rb = utf8::encoderune(r);
+ if (len(buf) - n < len(rb)) {
+ dataunread(s.d, rbuf);
+ return n;
+ };
+
+ buf[n..n + len(rb)] = rb;
+ n += len(rb);
+ };
+};
+
+// Universal string is an UTF32BE string.
+fn universal_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
+ const max = len(buf) - (len(buf) % 4);
+
+ let n = 0z;
+ let rbuf: [4]u8 = [0...];
+ for (true) {
+ match (dataread(s.d, rbuf)?) {
+ case let sz: size =>
+ if (sz < 4) return wrap_err(invalid);
+ case io::EOF =>
+ return if (n == 0) io::EOF else n;
+ };
+
+ let r = endian::begetu32(rbuf): rune;
+ let rb = utf8::encoderune(r);
+ if (len(buf) - n < len(rb)) {
+ dataunread(s.d, rbuf);
+ return n;
+ };
+
+ buf[n..n + len(rb)] = rb;
+ n += len(rb);
+ };
+};
+
+fn t61_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
+ let inbuf: [2]u8 = [0...];
+ let in = inbuf[..0];
+
+ let n = 0z;
+
+ for (true) {
+ let chr: [1]u8 = [0];
+ match (dataread(s.d, chr)?) {
+ case let sz: size =>
+ assert(sz == 1);
+ static append(in, chr[0]);
+ case io::EOF =>
+ if (len(in) > 0) return wrap_err(invalid);
+ if (n > 0) return n;
+ return io::EOF;
+ };
+
+ match (t61_chardecode(in)) {
+ case let r: rune =>
+ let raw = utf8::encoderune(r);
+ const bufremain = len(buf) - n;
+ if (len(raw) < bufremain) {
+ buf[n..n + len(raw)] = raw[..];
+ n += len(raw);
+ in = inbuf[..0];
+ } else {
+ dataunread(s.d, in);
+ break;
+ };
+ case insufficient =>
+ // leave combining char in in
+ yield;
+ case invalid =>
+ return wrap_err(invalid);
+ };
+ };
+
+ return n;
+};
+
+fn newstrreader(d: *decoder, t: utag) (utf8stream | error) = {
+ let strdec: *strdecoder = switch (t) {
+ case utag::NUMERIC_STRING =>
+ yield &num_decoder;
+ case utag::PRINTABLE_STRING =>
+ yield &print_decoder;
+ case utag::IA5_STRING =>
+ yield &ia5_decoder;
+ case utag::UTF8_STRING =>
+ yield &utf8_decoder;
+ case utag::TELETEX_STRING =>
+ yield &t61_decoder;
+ case utag::BMP_STRING =>
+ yield &bmp_decoder;
+ case utag::UNIVERSAL_STRING =>
+ yield &universal_decoder;
+ case =>
+ return invalid;
+ };
+
+ return utf8stream {
+ stream = &utf8stream_vtable,
+ d = d,
+ strdec = strdec,
+ ...
+ };
+};
+
+// Returns an [[utf8stream]] for a supported utag 't', which is one of:
+// * utag::NUMERIC_STRING
+// * utag::PRINTABLE_STRING
+// * utag::IA5_STRING
+// * utag::UTF8_STRING
+// * utag::TELETEX_STRING
+// * utag::BMP_STRING
+// * utag::UNIVERSAL_STRING
+export fn strreader(d: *decoder, t: utag) (utf8stream | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, t)?;
+ return newstrreader(d, t)!;
+};
+
+// Reads a printable string into 'buf'.
+export fn read_printstr(d: *decoder, buf: []u8) (size | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, utag::PRINTABLE_STRING)?;
+
+ const n = read_bytes(d, buf)?;
+
+ for (let i = 0z; i < n; i += 1) {
+ if (!c_is_print(buf[i])) {
+ return invalid;
+ };
+ };
+ return n;
+};
+
+// Reads an utf8 string into 'buf' and returns a str that borrows from buf.
+export fn read_utf8str(d: *decoder, buf: []u8) (str | error) = {
+ let dh = next(d)?;
+ expect_utag(dh, utag::UTF8_STRING)?;
+
+ let r = newstrreader(d, utag::UTF8_STRING)!;
+ let n = 0z;
+
+ for (true) {
+ n += match (io::read(&r, buf[n..])) {
+ case let sz: size =>
+ yield sz;
+ case io::EOF =>
+ break;
+ case let e: io::error =>
+ return unwrap_err(e);
+ };
+ };
+
+ return strings::fromutf8(buf[..n])!;
+};
+
diff --git a/encoding/asn1/t61.ha b/encoding/asn1/t61.ha
new file mode 100644
index 00000000..d0e14ab8
--- /dev/null
+++ b/encoding/asn1/t61.ha
@@ -0,0 +1,534 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+// https://en.wikipedia.org/wiki/ITU_T.61
+const t61toascii: [_]u8 = [
+// 0 1 2 3 4 5 6 7
+// 8 9 a b c d e f
+ 0, 0, 0, 0, 0, 0, 0, 0, // 0
+ 0, 0, 0x0a, 0, 0x0c, 0x0d, 0, 0, // 0
+ 0, 0, 0, 0, 0, 0, 0, 0, // 10
+ 0, 0, 0x1a, 0x1b, 0, 0, 0, 0, // 10
+ 0x20, 0x21, 0x22, 0, 0, 0x25, 0x26, 0x27, // 20
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, // 20
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 30
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, // 30
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, // 40
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, // 40
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, // 50
+ 0x58, 0x59, 0x5a, 0x5b, 0, 0x5d, 0, 0x5f, // 50
+ 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, // 60
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, // 60
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, // 70
+ 0x78, 0x79, 0x7a, 0, 0x7c, 0, 0, 0, // 70
+];
+
+const t61toutf8: [_]rune = [
+ // 0x80
+ '\u0000', '\u0000', '\u0000', '\u0000',
+ '\u0000', '\u0000', '\u0000', '\u0000',
+ '\u0000', '\u0000', '\u0000', '\u008b',
+ '\u008c', '\u0000', '\u0000', '\u0000',
+
+ // 0x90
+ '\u0000', '\u0000', '\u0000', '\u0000',
+ '\u0000', '\u0000', '\u0000', '\u0000',
+ '\u0000', '\u0000', '\u0000', '\u009b',
+ '\u0000', '\u0000', '\u0000', '\u0000',
+
+ // 0xa0
+ '\u00a0', '\u00a1', '\u00a2', '\u00a3',
+ '\u0024', '\u00a5', '\u0023', '\u00a7',
+ '\u00a4', '\u0000', '\u0000', '\u00ab',
+ '\u0000', '\u0000', '\u0000', '\u0000',
+
+ // 0x0b
+ '\u00b0', '\u00b1', '\u00b2', '\u00b3',
+ '\u00d7', '\u00b5', '\u00b6', '\u00b7',
+ '\u00f7', '\u0000', '\u0000', '\u00bb',
+ '\u00bc', '\u00bd', '\u00be', '\u00bf',
+
+ // 0xc0
+ '\u0000', '\u0300', '\u0301', '\u0302',
+ '\u0303', '\u0304', '\u0306', '\u0307',
+ '\u0308', '\u0308', '\u030a', '\u0327',
+ '\u0332', '\u030b', '\u0328', '\u030c',
+
+ // 0xd0
+ '\u0000', '\u0000', '\u0000', '\u0000',
+ '\u0000', '\u0000', '\u0000', '\u0000',
+ '\u0000', '\u0000', '\u0000', '\u0000',
+ '\u0000', '\u0000', '\u0000', '\u0000',
+
+ // 0xe0
+ '\u2126', '\u00c6', '\u00d0', '\u00aa',
+ '\u0126', '\u0000', '\u0132', '\u013f',
+ '\u0141', '\u00d8', '\u0152', '\u00ba',
+ '\u00de', '\u0166', '\u014a', '\u0149',
+
+ // 0xf0
+ '\u0138', '\u00e6', '\u0111', '\u00f0',
+ '\u0127', '\u0131', '\u0133', '\u0140',
+ '\u0142', '\u00f8', '\u0153', '\u00df',
+ '\u00fe', '\u0167', '\u014b', '\u0000',
+];
+
+fn decode(out: []u8, in: []u8) void = {
+ for (let i = 0z; i < len(in); i += 1) {
+ const c = in[i];
+ const r: rune = if (c & 0x80 != 0) {
+ // TODO special cases
+ yield t61toutf8[c - 0x80];
+ } else {
+ const c = t61toascii[in[i]];
+ yield c: u32: rune;
+ };
+
+ // write r to out
+ };
+ return;
+};
+
+export type insufficient = !void;
+
+export fn t61_chardecode(in: []u8) (rune | insufficient | invalid) = {
+ // 'in' is either one char or two if first is a combining character.
+ if (len(in) == 2) {
+ return t61_combine(in);
+ };
+
+ const in = in[0];
+
+ if (in & 0x80 == 0) {
+ const r = t61toascii[in];
+ return if (r == 0) invalid else r: u32: rune;
+ };
+
+ const c = t61toutf8[in - 0x80];
+ if (c == '\u0000') {
+ return invalid;
+ };
+
+ if (in == 0xcc) {
+ return invalid;
+ };
+ if (in > 0xc0 && in <= 0xcf) {
+ return insufficient;
+ };
+
+ return c;
+};
+
+fn t61_combine(in: []u8) (rune | invalid) = {
+ const comb = in[0];
+ const in = in[1];
+ switch (comb) {
+ case 0xc1 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u00c0';
+ case 'E' =>
+ return '\u00c8';
+ case 'I' =>
+ return '\u00cc';
+ case 'O' =>
+ return '\u00d2';
+ case 'U' =>
+ return '\u00d9';
+ case 'a' =>
+ return '\u00e0';
+ case 'e' =>
+ return '\u00e8';
+ case 'i' =>
+ return '\u00ec';
+ case 'o' =>
+ return '\u00f2';
+ case 'u' =>
+ return '\u00f9';
+ case =>
+ return invalid;
+ };
+ case 0xc2 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u00c1';
+ case 'C' =>
+ return '\u0106';
+ case 'E' =>
+ return '\u00c9';
+ case 'I' =>
+ return '\u00cd';
+ case 'L' =>
+ return '\u0139';
+ case 'N' =>
+ return '\u0143';
+ case 'O' =>
+ return '\u00d3';
+ case 'R' =>
+ return '\u0154';
+ case 'S' =>
+ return '\u015a';
+ case 'U' =>
+ return '\u00da';
+ case 'Y' =>
+ return '\u00dd';
+ case 'Z' =>
+ return '\u0179';
+ case 'a' =>
+ return '\u00e1';
+ case 'c' =>
+ return '\u0107';
+ case 'e' =>
+ return '\u00e9';
+ case 'g' =>
+ return '\u0123';
+ case 'i' =>
+ return '\u00ed';
+ case 'l' =>
+ return '\u013a';
+ case 'n' =>
+ return '\u0144';
+ case 'o' =>
+ return '\u00f3';
+ case 'r' =>
+ return '\u0155';
+ case 's' =>
+ return '\u015b';
+ case 'u' =>
+ return '\u00fa';
+ case 'y' =>
+ return '\u00fd';
+ case 'z' =>
+ return '\u017a';
+ case =>
+ return invalid;
+ };
+ case 0xc3 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u00c2';
+ case 'C' =>
+ return '\u0108';
+ case 'E' =>
+ return '\u00ca';
+ case 'G' =>
+ return '\u011c';
+ case 'H' =>
+ return '\u0124';
+ case 'I' =>
+ return '\u00ce';
+ case 'J' =>
+ return '\u0134';
+ case 'O' =>
+ return '\u00d4';
+ case 'S' =>
+ return '\u015c';
+ case 'U' =>
+ return '\u00db';
+ case 'W' =>
+ return '\u0174';
+ case 'Y' =>
+ return '\u0176';
+ case 'a' =>
+ return '\u00e2';
+ case 'c' =>
+ return '\u0109';
+ case 'e' =>
+ return '\u00ea';
+ case 'g' =>
+ return '\u011d';
+ case 'h' =>
+ return '\u0125';
+ case 'i' =>
+ return '\u00ee';
+ case 'j' =>
+ return '\u0135';
+ case 'o' =>
+ return '\u00f4';
+ case 's' =>
+ return '\u015d';
+ case 'u' =>
+ return '\u00fb';
+ case 'w' =>
+ return '\u0175';
+ case 'y' =>
+ return '\u0177';
+ case =>
+ return invalid;
+ };
+ case 0xc4 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u00c3';
+ case 'I' =>
+ return '\u0128';
+ case 'N' =>
+ return '\u00d1';
+ case 'O' =>
+ return '\u00d5';
+ case 'U' =>
+ return '\u0168';
+ case 'a' =>
+ return '\u00e3';
+ case 'i' =>
+ return '\u0129';
+ case 'n' =>
+ return '\u00f1';
+ case 'o' =>
+ return '\u00f5';
+ case 'u' =>
+ return '\u0169';
+ case =>
+ return invalid;
+ };
+ case 0xc5 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u0100';
+ case 'E' =>
+ return '\u0112';
+ case 'I' =>
+ return '\u012a';
+ case 'O' =>
+ return '\u014c';
+ case 'U' =>
+ return '\u016a';
+ case 'a' =>
+ return '\u0101';
+ case 'e' =>
+ return '\u0113';
+ case 'i' =>
+ return '\u012b';
+ case 'o' =>
+ return '\u014d';
+ case 'u' =>
+ return '\u016b';
+ case =>
+ return invalid;
+ };
+ case 0xc6 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u0102';
+ case 'G' =>
+ return '\u011e';
+ case 'U' =>
+ return '\u016c';
+ case 'a' =>
+ return '\u0103';
+ case 'g' =>
+ return '\u011f';
+ case 'u' =>
+ return '\u016d';
+ case =>
+ return invalid;
+ };
+ case 0xc7 =>
+ switch (in: u32: rune) {
+ case 'C' =>
+ return '\u010a';
+ case 'E' =>
+ return '\u0116';
+ case 'G' =>
+ return '\u0120';
+ case 'I' =>
+ return '\u0130';
+ case 'Z' =>
+ return '\u017b';
+ case 'c' =>
+ return '\u010b';
+ case 'e' =>
+ return '\u0117';
+ case 'g' =>
+ return '\u0121';
+ case 'z' =>
+ return '\u017c';
+ case =>
+ return invalid;
+ };
+ case 0xc8 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u00c4';
+ case 'E' =>
+ return '\u00cb';
+ case 'I' =>
+ return '\u00cf';
+ case 'O' =>
+ return '\u00d6';
+ case 'U' =>
+ return '\u00dc';
+ case 'Y' =>
+ return '\u0178';
+ case 'a' =>
+ return '\u00e4';
+ case 'e' =>
+ return '\u00eb';
+ case 'i' =>
+ return '\u00ef';
+ case 'o' =>
+ return '\u00f6';
+ case 'u' =>
+ return '\u00fc';
+ case 'y' =>
+ return '\u00ff';
+ case =>
+ return invalid;
+ };
+ case 0xc9 =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u00c4';
+ case 'E' =>
+ return '\u00cb';
+ case 'I' =>
+ return '\u00cf';
+ case 'O' =>
+ return '\u00d6';
+ case 'U' =>
+ return '\u00dc';
+ case 'Y' =>
+ return '\u0178';
+ case 'a' =>
+ return '\u00e4';
+ case 'e' =>
+ return '\u00eb';
+ case 'i' =>
+ return '\u00ef';
+ case 'o' =>
+ return '\u00f6';
+ case 'u' =>
+ return '\u00fc';
+ case 'y' =>
+ return '\u00ff';
+ case =>
+ return invalid;
+ };
+ case 0xca =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u00c5';
+ case 'U' =>
+ return '\u016e';
+ case 'a' =>
+ return '\u00e5';
+ case 'u' =>
+ return '\u016f';
+ case =>
+ return invalid;
+ };
+ case 0xcb =>
+ switch (in: u32: rune) {
+ case 'C' =>
+ return '\u00c7';
+ case 'G' =>
+ return '\u0122';
+ case 'K' =>
+ return '\u0136';
+ case 'L' =>
+ return '\u013b';
+ case 'N' =>
+ return '\u0145';
+ case 'R' =>
+ return '\u0156';
+ case 'S' =>
+ return '\u015e';
+ case 'T' =>
+ return '\u0162';
+ case 'c' =>
+ return '\u00e7';
+ case 'k' =>
+ return '\u0137';
+ case 'l' =>
+ return '\u013c';
+ case 'n' =>
+ return '\u0146';
+ case 'r' =>
+ return '\u0157';
+ case 's' =>
+ return '\u015f';
+ case 't' =>
+ return '\u0163';
+ case =>
+ return invalid;
+ };
+ case 0xcd =>
+ switch (in: u32: rune) {
+ case 'O' =>
+ return '\u0150';
+ case 'U' =>
+ return '\u0170';
+ case 'o' =>
+ return '\u0151';
+ case 'u' =>
+ return '\u0171';
+ case =>
+ return invalid;
+ };
+ case 0xce =>
+ switch (in: u32: rune) {
+ case 'A' =>
+ return '\u0104';
+ case 'E' =>
+ return '\u0118';
+ case 'I' =>
+ return '\u012e';
+ case 'U' =>
+ return '\u0172';
+ case 'a' =>
+ return '\u0105';
+ case 'e' =>
+ return '\u0119';
+ case 'i' =>
+ return '\u012f';
+ case 'u' =>
+ return '\u0173';
+ case =>
+ return invalid;
+ };
+ case 0xCf =>
+ switch (in: u32: rune) {
+ case 'C' =>
+ return '\u010c';
+ case 'D' =>
+ return '\u010e';
+ case 'E' =>
+ return '\u011a';
+ case 'L' =>
+ return '\u013d';
+ case 'N' =>
+ return '\u0147';
+ case 'R' =>
+ return '\u0158';
+ case 'S' =>
+ return '\u0160';
+ case 'T' =>
+ return '\u0164';
+ case 'Z' =>
+ return '\u017d';
+ case 'c' =>
+ return '\u010d';
+ case 'd' =>
+ return '\u010f';
+ case 'e' =>
+ return '\u011b';
+ case 'l' =>
+ return '\u013e';
+ case 'n' =>
+ return '\u0148';
+ case 'r' =>
+ return '\u0159';
+ case 's' =>
+ return '\u0161';
+ case 't' =>
+ return '\u0165';
+ case 'z' =>
+ return '\u017e';
+ case =>
+ return invalid;
+ };
+ case =>
+ return invalid;
+ };
+};
+
--
2.43.0