~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
9 3

[PATCH hare v4 1/6] asn1: add types and errors

Details
Message ID
<20240207185633.9327-1-apreiml@strohwolke.at>
DKIM signature
pass
Download raw message
Patch: +204 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---

 encoding/asn1/errors.ha |  68 ++++++++++++++++++++
 encoding/asn1/types.ha  | 136 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+)
 create mode 100644 encoding/asn1/errors.ha
 create mode 100644 encoding/asn1/types.ha

diff --git a/encoding/asn1/errors.ha b/encoding/asn1/errors.ha
new file mode 100644
index 00000000..f79f5e84
--- /dev/null
+++ b/encoding/asn1/errors.ha
@@ -0,0 +1,68 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use errors;
use io;

// Invalid DER encoding.
export type invalid = !void;

// Unexpected data format.
export type badformat = !void;

// Premature EOF
export type truncated = !void;

// Data does not fit into the encoder buffer.
export type overflow = !void;

type asn1error = !(invalid | badformat | overflow | truncated);

// Any error within the asn1 module.
export type error = !(...io::error | ...asn1error);


// Converts an [[error]] into a user-friendly string.
export fn strerror(e: error) str = {
	match (e) {
	case invalid =>
		return "Data encoding does not follow the DER format";
	case badformat =>
		return "Unexpected data format";
	case truncated =>
		return "Premature EOF";
	case overflow =>
		return "Data does not fit into the encoder buffer";
	case let e: io::error =>
		return io::strerror(e);
	};
};

fn wrap_err(e: error) io::error = {
	match (e) {
	case let e: io::error =>
		return e;
	case let e: asn1error =>
		static assert(size(asn1error) <= size(errors::opaque_data));
		let w = errors::opaque_ { strerror = &wrap_strerror, ... };
		let ptr = &w.data: *error;
		*ptr = e;
		return w;
	};
};

fn wrap_strerror(err: *errors::opaque_data) const str = {
	let e = err: *error;
	return strerror(*e);
};

// Unwrap [[io::error]] returned by readers into [[error]].
export fn unwrap_err(e: io::error) error = {
	match (e) {
	case let e: errors::opaque_ =>
		let ptr = &e.data: *error;
		return *ptr;
	case let e: io::error =>
		return e;
	};
};
diff --git a/encoding/asn1/types.ha b/encoding/asn1/types.ha
new file mode 100644
index 00000000..be68a065
--- /dev/null
+++ b/encoding/asn1/types.ha
@@ -0,0 +1,136 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use errors;
use fmt;
use io;
use memio;


// Data types specified in the standard
export type class = enum u8 {
	UNIVERSAL = 0x0,
	APPLICATION = 0x1,
	CONTEXT = 0x2,
	PRIVATE = 0x3,
};

// String representation of 'c'.
export fn strclass(c: class) str = {
	switch (c) {
	case class::UNIVERSAL =>
		return "UNIVERSAL";
	case class::APPLICATION =>
		return "APPLICATION";
	case class::CONTEXT =>
		return "CONTEXT_SPECIFIC";
	case class::PRIVATE =>
		return "PRIVATE";
	};
};

// Universal tags as defined in x.690. Not all are supported by this
// implemenation.
export type utag = enum u8 {
	RESERVED = 0x00,
	BOOLEAN = 0x01,
	INTEGER = 0x02,
	BITSTRING = 0x03,
	OCTET_STRING = 0x04,
	NULL = 0x05,
	OID = 0x06,
	OBJECT_DESCRIPTOR = 0x07,
	EXTERNAL = 0x08,
	REAL = 0x09,
	ENUMERATED = 0x0a,
	EMBEDDED_PDV = 0x0b,
	UTF8_STRING = 0x0c,
	RELATIVE_OID = 0x0d,
	TIME = 0x0e,
	RESERVED2 = 0x0f,
	SEQUENCE = 0x10,
	SET = 0x11,
	NUMERIC_STRING = 0x12,
	PRINTABLE_STRING = 0x13,
	TELETEX_STRING = 0x14, // T61String
	VIDEOTEX_STRING = 0x15,
	IA5_STRING = 0x16,
	UTC_TIME = 0x17,
	GENERALIZED_TIME = 0x18,
	GRAPHIC_STRING = 0x19,
	VISIBLE_STRING = 0x1a, // iso646String
	GENERAL_STRING = 0x1b,
	UNIVERSAL_STRING = 0x1c,
	UNKNOWN = 0x1d,
	BMP_STRING = 0x1e,
	DATE = 0x1f,
	TIME_OF_DAY = 0x20,
	DATE_TIME = 0x21,
	DURATION = 0x22,
	OID_IRI = 0x23,
	OID_RELATIVE_IRI = 0x24,
};

// String representation of universal tag ids. May return a statically allocated
// string and will be overwritten on the next call.
export fn strtag(dh: head) str = {
	static let tagstrbuf: [128]u8 = [0...];

	if (dh.class != class::UNIVERSAL) {
		let tagstr = memio::fixed(tagstrbuf);

		fmt::fprint(&tagstr, "[")!;
		if (dh.class != class::CONTEXT) {
			fmt::fprintf(&tagstr, "{} ", strclass(dh.class))!;
		};
		fmt::fprintf(&tagstr, "{:x}]", dh.tagid)!;
		return memio::string(&tagstr)!;
	};

	if (dh.tagid >> 8 != 0) {
		return "UNKNOWN";
	};

	switch (dh.tagid: u8) {
	case utag::BOOLEAN =>
		return "BOOLEAN";
	case utag::INTEGER =>
		return "INTEGER";
	case utag::BITSTRING =>
		return "BITSTRING";
	case utag::OCTET_STRING =>
		return "OCTET_STRING";
	case utag::NULL =>
		return "NULL";
	case utag::OID =>
		return "OBJECT_IDENTIFIER";
	case utag::OBJECT_DESCRIPTOR =>
		return "OBJECT_DESCRIPTOR";
	case utag::EXTERNAL =>
		return "EXTERNAL";
	case utag::REAL =>
		return "REAL";
	case utag::ENUMERATED =>
		return "ENUMERATED";
	case utag::EMBEDDED_PDV =>
		return "EMBEDDED_PDV";
	case utag::UTF8_STRING =>
		return "UTF8_STRING";
	case utag::RELATIVE_OID =>
		return "RELATIVE_OID";
	case utag::TIME =>
		return "TIME";
	case utag::SEQUENCE =>
		return "SEQUENCE";
	case utag::SET =>
		return "SET";
	case utag::PRINTABLE_STRING =>
		return "PRINTABLE_STRING";
	case utag::TELETEX_STRING =>
		return "TELETEX_STRING";
	case utag::UTC_TIME =>
		return "UTC_TIME";
	case =>
		return "UNKNOWN";
	};
};
-- 
2.43.0

[PATCH hare v4 2/6] asn1: encoder

Details
Message ID
<20240207185633.9327-2-apreiml@strohwolke.at>
In-Reply-To
<20240207185633.9327-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +569 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---

v3: the encoder now uses io operations on 'mem' except in encode

 encoding/asn1/+test/encoder_test.ha | 147 ++++++++++
 encoding/asn1/encoder.ha            | 422 ++++++++++++++++++++++++++++
 2 files changed, 569 insertions(+)
 create mode 100644 encoding/asn1/+test/encoder_test.ha
 create mode 100644 encoding/asn1/encoder.ha

diff --git a/encoding/asn1/+test/encoder_test.ha b/encoding/asn1/+test/encoder_test.ha
new file mode 100644
index 00000000..6ac2a21a
--- /dev/null
+++ b/encoding/asn1/+test/encoder_test.ha
@@ -0,0 +1,147 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use errors;
use io;
use memio;
use os;
use strings;
use time::date;
use types;


@test fn write_id() void = {
	let buf = memio::dynamic();
	let e = derencoder(&buf);

	write_fixedprim(&e, class::UNIVERSAL, 0x2aa, [0x00])!;
	encode(&e)!;
	assert(bytes::equal([0x1f, 0x85, 0x2a, 0x01, 0x00],
			memio::buffer(&buf)));

	io::seek(&buf, 0, io::whence::SET)!;
	let d = derdecoder(&buf);
	let h = peek(&d)!;
	assert(h.tagid == 0x2aa);

	let buf = memio::dynamic();
	let e = derencoder(&buf);

	write_fixedprim(&e, class::UNIVERSAL, types::U32_MAX, [0x00])!;
	encode(&e)!;
	assert(bytes::equal([0x1f, 0x8f, 0xff, 0xff, 0xff, 0x7f, 0x01, 0x00],
			memio::buffer(&buf)));

	io::seek(&buf, 0, io::whence::SET)!;
	let d = derdecoder(&buf);
	let h = peek(&d)!;
	assert(h.tagid == types::U32_MAX);
};

@test fn write_prim() void = {
	let buf = memio::dynamic();
	defer io::close(&buf)!;
	let dest = memio::dynamic();
	defer io::close(&dest)!;

	let enc = derencoder(&buf);

	create_prim(&enc, class::UNIVERSAL, utag::INTEGER)!;
	write(&enc, [0x01, 0x05, 0x07])!;
	finish_prim(&enc);

	assert(encodeto(&enc, &dest)! == 5);

	assert(bytes::equal(memio::buffer(&dest), [
		0x02, 0x03, 0x01, 0x05, 0x07
	]));
};

@test fn encode_dsz() void = {
	assert(bytes::equal([0x7f], encode_dsz(0x7f)));
	assert(bytes::equal([0x81, 0x8f], encode_dsz(0x8f)));
	assert(bytes::equal([0x81, 0xff], encode_dsz(0xff)));
	assert(bytes::equal([0x82, 0x01, 0x00], encode_dsz(0x100)));
};

@test fn write_seq() void = {
	let buf = memio::dynamic();
	defer io::close(&buf)!;
	let dest = memio::dynamic();
	defer io::close(&dest)!;

	let enc = derencoder(&buf);

	create_seq(&enc)!;
	write_bool(&enc, false)!;
	create_seq(&enc)!;
	write_int(&enc, [0x01, 0x02, 0x03])!;
	finish_seq(&enc);
	finish_seq(&enc);
	assert(encodeto(&enc, &dest)! == 12);

	assert(bytes::equal(memio::buffer(&dest), [
		0x30, 0x0a, // seq
		0x01, 0x01, 0x00, // bool
		0x30, 0x05, // seq
		0x02, 0x03, 0x01, 0x02, 0x03, // int
	]));
};

@test fn write_bool() void = {
	let dest = memio::dynamic();
	defer io::close(&dest)!;

	let buf = memio::dynamic();
	defer io::close(&buf)!;
	let enc = derencoder(&buf);

	write_bool(&enc, true)!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x01, 0x01, 0xff]));
};

@test fn write_int() void = {
	let dest = memio::dynamic();
	defer io::close(&dest)!;
	let buf = memio::dynamic();
	defer io::close(&buf)!;

	let enc = derencoder(&buf);

	write_int(&enc, [0x00, 0x00, 0x00, 0x00, 0x80])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x02, 0x00, 0x80]));

	memio::reset(&dest);
	memio::reset(&buf);
	let enc = derencoder(&buf);

	write_int(&enc, [0xff, 0xff, 0xff, 0x80, 0x10])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x02, 0x80, 0x10]));

	memio::reset(&dest);
	memio::reset(&buf);
	let enc = derencoder(&buf);

	write_int(&enc, [0x00, 0x00, 0x00])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x01, 0x00]));

	memio::reset(&dest);
	memio::reset(&buf);
	let enc = derencoder(&buf);

	write_uint(&enc, [0x8f, 0x01])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x03, 0x00, 0x8f, 0x01]));
};

diff --git a/encoding/asn1/encoder.ha b/encoding/asn1/encoder.ha
new file mode 100644
index 00000000..372f989a
--- /dev/null
+++ b/encoding/asn1/encoder.ha
@@ -0,0 +1,422 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use endian;
use errors;
use io;
use math::{bit_size_u8,bit_size_u32};
use memio;
use strings;
use time::date;
use types;


export type datasz = u32; // XXX: might want to use size here
let szput = &endian::beputu32;
let szget = &endian::begetu32;
def DATASZ_MAX = types::U32_MAX;

// The maximum header size possible for u32 tag ids.
export def MAXHEADSZ = 1 + 5 + 1 + size(datasz);

// The maximum header size possible for entries of [[utag]].
export def MAXUTAGHEADSZ = 1 + 1 + size(datasz);

export type encoder = struct {
	mem: *memio::stream,
	start: io::off,
	pos: size,
	bt: [MAX_CONS_DEPTH](size, datasz),
	btn: size,

	cur_dpos: size,
	cur_prim: bool,
	cur_fixed: bool,

	parent: nullable *bytewstream,
};

// Creates a DER encoder. create_* methods are used to create constructed
// values. Functions to write primitive values start with write_ or end with
// writer. After the entries have been written, the result is encoded using
// [[encode]] or [[encodeto]].
//
// 'mem' is required to buffer the written data before encoding it.Each entry
// will have an maximum overhead of [[MAXUTAGHEADSZ]], if entries are written
// using only methods provided here; or [[MAXHEADSZ]], if custom tag ids are in
// use. The encoder doesn't close after use 'mem', hence it's the caller's
// responsibility manage its lifetime.
//
// 'mem' as memio::stream allows the caller to decide whether to use a static or
// a dynamic allocated buffer.
export fn derencoder(mem: *memio::stream) encoder = encoder {
	mem = mem,
	start = io::tell(mem)!,
	...
};

// Creates a DER encoder that is nested within another DER entry and hence can
// use the buffer of the parent.
export fn derencoder_nested(b: *bytewstream) encoder = encoder {
	mem = b.e.mem,
	start = io::tell(b.e.mem)!,
	parent = b,
	...
};

fn write(e: *encoder, buf: []u8) (void | overflow) = {
	if (len(buf) > (DATASZ_MAX - e.pos)) return overflow;

	match (io::write(e.mem, buf)) {
	case let n: size =>
		if (n < len(buf)) {
			// short writes happen, if a fixed e.mem reaches its end
			return overflow;
		};
	case errors::overflow =>
		return overflow;
	case =>
		 // writing to mem does not throw any other errors
		abort();
	};
	e.pos += len(buf);
};

fn write_id(e: *encoder, c: class, t: u32, cons: bool) (void | overflow) = {
	let head: u8 = c << 6;
	if (cons) {
		head |= (1 << 5);
	};

	if (t < 31) {
		bt_add_sz(e, 1);
		return write(e, [head | t: u8]);
	};

	write(e, [head | 0x1f])?;

	const bsz = bit_size_u32(t);
	const n = ((bsz + 6) / 7) - 1;
	for (let i = 0z; i < n; i += 1) {
		write(e, [0x80 | (t >> ((n - i) * 7)): u8])?;
	};
	write(e, [t: u8 & 0x7f])?;
};

fn write_fixedprim(e: *encoder, c: class, t: u32, b: []u8) (void | overflow) = {
	if (e.cur_prim) {
		finish_prim(e);
	};

	e.cur_prim = true;
	e.cur_fixed = true;
	write_id(e, c, t, false)?;

	write(e, encode_dsz(len(b)))?;
	write(e, b)?;

	bt_add_dsz(e, len(b): datasz);
};

fn create_prim(e: *encoder, class: class, tag: u32) (void | overflow) = {
	if (e.cur_prim) {
		finish_prim(e);
	};

	e.cur_prim = true;
	e.cur_fixed = false;

	write_id(e, class, tag, false)?;

	// write size placeholder
	const placehsz = 0x80 | size(datasz): u8;
	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
	write(e, lbuf)?;

	e.cur_dpos = e.pos;
};

fn finish_prim(e: *encoder) void = {
	e.cur_prim = false;
	if (e.pos == 0 || e.cur_fixed) {
		return;
	};

	const pos = io::tell(e.mem)!;
	defer io::seek(e.mem, pos, io::whence::SET)!;

	// write back size to placeholder
	const dszpos = e.start: size + e.cur_dpos - size(datasz);
	const dsz = e.pos - e.cur_dpos;
	let dszbuf: [size(datasz)]u8 = [0...];
	szput(dszbuf, dsz: datasz);

	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
	io::write(e.mem, dszbuf)!;

	bt_add_dsz(e, dsz: datasz);
};

// Push n empty size value to backtrace stack
fn push_bt(e: *encoder, pos: size) (void | overflow) = {
	if (e.btn + 1 >= len(e.bt)) return overflow;

	e.bt[e.btn] = (pos, 0);
	e.btn += 1;
};

// Add 'sz' to the current value of the backtrack stack
fn bt_add_sz(e: *encoder, sz: size) void = {
	if (e.btn == 0) return;
	const csz = e.bt[e.btn - 1].1;
	e.bt[e.btn - 1].1 = csz + sz: datasz;
};

// Add data size 'sz' + size length to current value of the backtrack stack
fn bt_add_dsz(e: *encoder, sz: datasz) void = {
	if (e.btn == 0) return;
	const lsz = lensz(sz);
	return bt_add_sz(e, lsz + sz);
};

// Pop current backtrace value from stack
fn pop_bt(e: *encoder) (size, datasz) = {
	e.btn -= 1;
	let x = e.bt[e.btn];
	e.bt[e.btn] = (0, 0);
	return x;
};

fn lensz(l: datasz) u8 = if (l < 128) 1: u8 else (1 + (bit_size_u32(l) + 7) / 8);

fn encode_dsz(sz: size) []u8 = {
	static let buf: [size(datasz) + 1]u8 = [0...];
	if (sz < 128) {
		buf[0] = sz: u8;
		return buf[..1];
	};

	let n = lensz(sz: datasz);
	buf[0] = (n - 1) | 0x80;
	for (let i: size = n - 1; sz > 0; i -= 1) {
		buf[i] = sz: u8;
		sz >>= 8;
	};

	return buf[..n];
};

// Creates an explicit constructed entry. [[finish_explicit]] must be called
// to close the entry.
export fn create_explicit(e: *encoder, c: class, tag: u32) (void | overflow) =
	create_cons(e, c, tag);

// Finishes an explicit constructed entry.
export fn finish_explicit(e: *encoder) void = finish_cons(e);

fn create_cons(e: *encoder, class: class, tagid: u32) (void | overflow) = {
	if (e.cur_prim) {
		finish_prim(e);
	};
	write_id(e, class, tagid, true)?;

	const placehsz = 0x80 | size(datasz): u8;
	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
	write(e, lbuf)?;

	push_bt(e, e.pos - size(datasz))?;
	return;
};

fn finish_cons(e: *encoder) void = {
	if (e.cur_prim) {
		finish_prim(e);
	};

	let (dszpos, sz) = pop_bt(e);
	let lbuf: [size(datasz)]u8 = [0...];
	szput(lbuf, sz);

	const pos = io::tell(e.mem)!;
	defer io::seek(e.mem, pos, io::whence::SET)!;

	dszpos += e.start: size;
	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
	io::write(e.mem, lbuf)!;
	bt_add_dsz(e, sz);
};

// Creates a sequence. [[finish_seq]] must be called to close it.
export fn create_seq(e: *encoder) (void | overflow) =
	return create_cons(e, class::UNIVERSAL, utag::SEQUENCE);

// Finishes a sequence.
export fn finish_seq(e: *encoder) void = finish_cons(e);

// Writes a boolean.
export fn write_bool(e: *encoder, b: bool) (void | overflow) = {
	let v: u8 = if (b) 0xff else 0x00;
	write_fixedprim(e, class::UNIVERSAL, utag::BOOLEAN, [v])?;
};

// Writes a null value.
export fn write_null(e: *encoder) (void | overflow) = {
	write_fixedprim(e, class::UNIVERSAL, utag::NULL, [])?;
};

export type bytewstream = struct {
	stream: io::stream,
	e: *encoder,
};

fn bytewriter(e: *encoder, c: class, tagid: u32) (bytewstream | overflow) = {
	create_prim(e, c, tagid)?;
	return bytewstream {
		stream = &bytewriter_vtable,
		e = e,
		...
	};
};

const bytewriter_vtable = io::vtable {
	writer = &bytewriter_write,
	...
};

fn bytewriter_write(s: *io::stream, buf: const []u8) (size | io::error) = {
	let w = s: *bytewstream;
	if (write(w.e, buf) is overflow) {
		return wrap_err(overflow);
	};
	return len(buf);
};

// Creates a io::writer that adds written bytes as OctetString.
export fn octetstrwriter(e: *encoder) (bytewstream | overflow) = {
	return bytewriter(e, class::UNIVERSAL, utag::OCTET_STRING);
};

// Writes an integer. 'n' must be stored in big endian order. The highest bit of
// the first byte marks the sign.
export fn write_int(e: *encoder, n: []u8) (void | overflow) = {
	const neg = n[0] & 0x80 == 0x80;

	// compact according to X.690 Chapt. 8.3.2
	let i = 0z;
	for (i < len(n) - 1; i += 1) {
		if (neg && (n[i] != 0xff || n[i+1] & 0x80 != 0x80)) {
			break;
		};

		if (!neg && (n[i] != 0x00 || n[i+1] & 0x80 == 0x80)) {
			break;
		};
	};

	write_fixedprim(e, class::UNIVERSAL, utag::INTEGER, n[i..])?;
};

// Writes an integer asuming 'n' is unsigned.
export fn write_uint(e: *encoder, n: []u8) (void | overflow) = {
	if (n[0] & 0x80 == 0) {
		return write_int(e, n);
	};

	// prepend 0 so that the highest valued bit is not interpreted as sign
	create_prim(e, class::UNIVERSAL, utag::INTEGER)?;
	write(e, [0])?;
	write(e, n)?;
	finish_prim(e);
};

// Writes 's' as Utf8String.
export fn write_utf8str(e: *encoder, s: str) (void | overflow) =
	write_fixedprim(e, class::UNIVERSAL, utag::UTF8_STRING,
		strings::toutf8(s))?;

// Encodes currently written data in given memio stream and returns the buffer
// containing the result borrowed from 'mem' provided for [[derencoder]].
export fn encode(e: *encoder) ([]u8 | io::error) = {
	assert(e.btn == 0);
	assert(e.start >= 0);

	if (e.cur_prim) {
		finish_prim(e);
	};

	let n = 0z;
	let buf = memio::buffer(e.mem)[e.start..];

	// iterate entries to minify tag ids and data sizes. 't' is the write
	// index and 'i' is the read index.
	let t = 0z;
	for (let i = 0z; i < e.pos) { // TODO cast seems off
		// encode id
		const id = buf[i];
		buf[t] = id;
		t += 1;
		i += 1;

		const cons = (id >> 5) & 1 == 1;
		if ((id & 0b11111) == 0b11111) {
			// id spans multiple bytes
			let id: u8 = 0x80;
			for (id & 0x80 == 0x80) {
				id = buf[i];
				buf[t] = id;
				t += 1;
				i += 1;
			};
		};

		// encode dsz
		let dsz: datasz = 0;
		let l = buf[i];
		i += 1;
		if (l < 128) {
			// data size fits in a single byte
			dsz = l;
			buf[t] = l;
			t += 1;
		} else {
			// decode multibyte size and minimize, since not all
			// placeholder bytes may have been used.
			const dn = l & 0x7f;
			for (let j = 0z; j < dn; j += 1) {
				dsz <<= 8;
				dsz |= buf[i];
				i += 1;
			};

			let dszbuf = encode_dsz(dsz);
			buf[t..t + len(dszbuf)] = dszbuf;
			t += len(dszbuf);
		};

		if (cons) {
			continue;
		};

		// write data of primitive fields
		buf[t..t+dsz] = buf[i..i+dsz];
		t += dsz;
		i += dsz;
	};

	bytes::zero(buf[t..]);
	match (e.parent) {
	case null =>
		yield;
	case let s: *bytewstream =>
		s.e.pos += t;
	};
	return buf[..t];
};

// Encodes written data and writes it to 'dest'.
export fn encodeto(e: *encoder, dest: io::handle) (size | io::error) = {
	const buf = encode(e)?;
	return io::writeall(dest, buf)?;
};
-- 
2.43.0

[PATCH hare v4 3/6] asn1: decoder

Details
Message ID
<20240207185633.9327-3-apreiml@strohwolke.at>
In-Reply-To
<20240207185633.9327-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +1141 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---

 encoding/asn1/+test/decoder_test.ha | 331 ++++++++++++
 encoding/asn1/decoder.ha            | 810 ++++++++++++++++++++++++++++
 2 files changed, 1141 insertions(+)
 create mode 100644 encoding/asn1/+test/decoder_test.ha
 create mode 100644 encoding/asn1/decoder.ha

diff --git a/encoding/asn1/+test/decoder_test.ha b/encoding/asn1/+test/decoder_test.ha
new file mode 100644
index 00000000..2282fc40
--- /dev/null
+++ b/encoding/asn1/+test/decoder_test.ha
@@ -0,0 +1,331 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use errors;
use io;
use memio;
use os;
use strings;
use time::date;
use types;


// XXX: would be nice to just declare this as mem: memio::stream
let mem: nullable *memio::stream = null;
let rbuf: [os::BUFSZ]u8 = [0...];

fn d(i: []u8) decoder = {
	let buf = memio::fixed(i);
	let h = match (mem) {
	case null =>
		let h = alloc(buf);
		mem = h;
		yield h;
	case let m: *memio::stream =>
		*m = buf;
		yield m;
	};
	return derdecoder(h);
};

@fini fn freetdec() void = {
	match (mem) {
	case null =>
		yield;
	case let m: *memio::stream =>
		free(m);
		mem = null;
	};
};

@test fn parsetag() void = {
	assert((next(&d([0x02, 0x01]))!).class == class::UNIVERSAL);
	assert((next(&d([0x02, 0x01]))!).tagid == 0x02);
	assert((next(&d([0x1e, 0x01]))!).tagid == 0x1e);
	assert((next(&d([0x1f, 0x7f, 0x01]))!).tagid == 0x7f);
	assert((next(&d([0x1f, 0x81, 0x00, 0x01]))!).tagid == 0x80);

	assert((next(&d([0x1f, 0x8f, 0xff, 0xff, 0xff, 0x7f, 0x01]))!).tagid
		== types::U32_MAX);
	assert(next(&d([0x1f, 0x90, 0x80, 0x80, 0x80, 0x00, 0x01])) is invalid);
};

@test fn parselen() void = {
	assert(dsz(next(&d([0x02, 0x1]))!) == 1);
	assert(dsz(next(&d([0x02, 0x7f]))!) == 127);
	assert(dsz(next(&d([0x02, 0x81, 0x80]))!) == 128);

	// must use minimal amount of bytes for length encoding
	assert(next(&d([0x02, 0x81, 0x01, 0x01])) is invalid);
	assert(next(&d([0x02, 0x81, 0x7f])) is invalid);
	assert(next(&d([0x02, 0x82, 0x00, 0xff])) is invalid);

	// indefinite form is not allowed in DER
	assert(next(&d([0x02, 0x80, 0x01, 0x00, 0x00])) is invalid);
};

@test fn emptydata() void = {
	assert(read_bool(&d([])) is badformat);
	assert(open_set(&d([])) is badformat);
};

@test fn seq() void = {
	let dat: [_]u8 = [
		0x30, 0x0a, // seq
		0x01, 0x01, 0xff, // bool true
		0x30, 0x05, // seq
		0x30, 0x03, // seq
		0x01, 0x01, 0x00, // bool false
	];

	let dc = &d(dat);
	open_seq(dc)!;
	assert(read_bool(dc)! == true);
	open_seq(dc)!;
	open_seq(dc)!;
	assert(read_bool(dc)! == false);
	close_seq(dc)!;
	close_seq(dc)!;
	close_seq(dc)!;
	finish(dc)!;

	let dc = &d(dat);
	open_seq(dc)!;
	assert(open_seq(dc) is invalid);

	let dc = &d(dat);
	open_seq(dc)!;
	assert(close_seq(dc) is badformat);

	let dat: [_]u8 = [
		0x30, 0x07, // seq
		0x0c, 0x05, 0x65, 0x66, 0x67, 0xc3, 0x96, // utf8 string
	];

	let dc = &d(dat);
	open_seq(dc)!;
	let r = strreader(dc, utag::UTF8_STRING)!;
	let s = io::drain(&r)!;
	defer free(s);
	assert(bytes::equal([0x65, 0x66, 0x67, 0xc3, 0x96], s));

	let dc = &d(dat);
	let buf: [4]u8 = [0...];
	open_seq(dc)!;
	let r = strreader(dc, utag::UTF8_STRING)!;
	assert(io::read(&r, buf)! == 3);
	assert(close_seq(dc) is badformat);

	// check unclosed
	let dc = &d(dat);
	open_seq(dc)!;
	assert(finish(dc) is invalid);

	let dc = &d(dat);
	open_seq(dc)!;
	let r = strreader(dc, utag::UTF8_STRING)!;
	let s = io::drain(&r)!;
	assert(finish(dc) is invalid);
};

@test fn invalid_seq() void = {
	let dat: [_]u8 = [
		0x30, 0x03, // seq containing data of size 3
		0x02, 0x03, 0x01, 0x02, 0x03, // int 0x010203 overflows seq
	];

	let dc = &d(dat);
	open_seq(dc)!;

	let buf: [3]u8 = [0...];
	assert(read_int(dc, buf) is invalid);
};

@test fn read_implicit() void = {
	let dat: [_]u8 = [
		0x30, 0x06, // seq
		0x85, 0x01, 0xff, // IMPLICIT bool true
		0x01, 0x01, 0x00, // bool false
	];

	let dc = &d(dat);
	open_seq(dc)!;
	expect_implicit(dc, class::CONTEXT, 5)!;
	assert(read_bool(dc)! == true);
	assert(read_u16(dc) is badformat);
};

@test fn read_bool() void = {
	assert(read_bool(&d([0x01, 0x01, 0xff]))!);
	assert(read_bool(&d([0x01, 0x01, 0x00]))! == false);
	assert(read_bool(&d([0x01, 0x02, 0x00, 0x00])) is invalid);
	// X.690, ch. 11.1
	assert(read_bool(&d([0x01, 0x01, 0x01])) is invalid);

	// invalid class
	assert(read_bool(&d([0x81, 0x01, 0x01])) is badformat);
	// must be primitive
	assert(read_bool(&d([0x21, 0x01, 0x01])) is invalid);
	// invalid tag
	assert(read_bool(&d([0x02, 0x01, 0x01])) is badformat);
};

@test fn read_null() void = {
	read_null(&d([0x05, 0x00]))!;
	read_null(&d([0x05, 0x01, 0x00])) is invalid;
	read_null(&d([0x85, 0x00])) is invalid;
	read_null(&d([0x01, 0x00])) is invalid;
};

@test fn read_int() void = {
	let buf: [8]u8 = [0...];

	assert(read_int(&d([0x02, 0x01, 0x01]), buf)! == 1);
	assert(buf[0] == 0x01);
	assert(read_int(&d([0x02, 0x01, 0x00]), buf)! == 1);
	assert(buf[0] == 0x00);
	assert(read_int(&d([0x02, 0x02, 0x01, 0x02]), buf)! == 2);
	assert(buf[0] == 0x01);
	assert(buf[1] == 0x02);

	// must have at least one byte
	assert(read_int(&d([0x02, 0x00]), buf) is invalid);
	// non minimal
	assert(read_int(&d([0x02, 0x02, 0x00, 0x01]), buf) is invalid);
	assert(read_int(&d([0x02, 0x02, 0xff, 0x81]), buf) is invalid);

	assert(read_u8(&d([0x02, 0x01, 0x00]))! == 0);
	assert(read_u8(&d([0x02, 0x01, 0x01]))! == 1);
	assert(read_u8(&d([0x02, 0x01, 0x7f]))! == 0x7f);
	assert(read_u8(&d([0x02, 0x01, 0x80])) is invalid);
	assert(read_u8(&d([0x02, 0x01, 0x81])) is invalid);
	assert(read_u8(&d([0x02, 0x02, 0x00, 0x80]))! == 0x80);
	assert(read_u8(&d([0x02, 0x02, 0x00, 0xff]))! == 0xff);

	assert(read_u16(&d([0x02, 0x01, 0x00]))! == 0);
	assert(read_u16(&d([0x02, 0x02, 0x0f, 0xff]))! == 0xfff);
	assert(read_u16(&d([0x02, 0x03, 0x00, 0xff, 0xff]))! == 0xffff);
	assert(read_u16(&d([0x02, 0x03, 0x01, 0xff, 0xff])) is invalid);
	assert(read_u32(&d([0x02, 0x03, 0x00, 0xff, 0xff]))! == 0xffff);

	let maxu64: [_]u8 = [
		0x02, 0x09, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
	];
	assert(read_u64(&d(maxu64))! == 0xffffffffffffffff);
	maxu64[2] = 0x01;
	assert(read_u64(&d(maxu64)) is invalid);
};

@test fn read_bitstr() void = {
	let buf: [8]u8 = [0...];
	let bs = read_bitstr(&d([0x03, 0x01, 0x00]), buf)!;
	assert(len(bs.0) == 0 && bs.1 == 0);
	assert(bitstr_isset(bs, 0)! == false);

	let bs = read_bitstr(&d([0x03, 0x02, 0x00, 0xff]), buf)!;
	assert(bytes::equal(bs.0, [0xff]) && bs.1 == 0);
	assert(bitstr_isset(bs, 0)!);
	assert(bitstr_isset(bs, 7)!);

	let bs = read_bitstr(&d([0x03, 0x03, 0x04, 0xab, 0xc0]), buf)!;
	assert(bytes::equal(bs.0, [0xab, 0xc0]) && bs.1 == 4);
	assert(bitstr_isset(bs, 0)!);
	assert(bitstr_isset(bs, 1)! == false);
	assert(bitstr_isset(bs, 8)!);
	assert(bitstr_isset(bs, 9)!);
	assert(!bitstr_isset(bs, 11)!);
	assert(bitstr_isset(bs, 12) is invalid);

	// unused bits must be zero
	assert(read_bitstr(&d([0x03, 0x03, 0x04, 0xab, 0xc1]), buf) is invalid);
	assert(read_bitstr(&d([0x03, 0x03, 0x07, 0xab, 0x40]), buf) is invalid);
};

@test fn read_oid() void = {
	assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))! == oid::ID_AT_COMMON_NAME);

	assert(bytes::equal([0x55, 0x04, 0x03],
			read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
};

let datbuf: [64]u8 = [0...];

fn newdatetime(s: str, tag: utag) []u8 = {
	let datetime = strings::toutf8(s);
	let datsz = len(datetime): u8;
	datbuf[..2] = [tag, datsz];
	datbuf[2..2 + datsz] = datetime;
	return datbuf[..2 + datsz];
};

@test fn read_utctime() void = {
	let derdatetime = newdatetime("231030133710Z", utag::UTC_TIME);
	let dt = read_utctime(&d(derdatetime), 2046)!;

	let fbuf: [24]u8 = [0...];
	assert(date::bsformat(fbuf, date::RFC3339, &dt)!
		== "2023-10-30T13:37:10+0000");

	let dt = read_utctime(&d(derdatetime), 2020)!;
	assert(date::bsformat(fbuf, date::RFC3339, &dt)!
		== "1923-10-30T13:37:10+0000");

	let derdatetime = newdatetime("2310301337100", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);

	let derdatetime = newdatetime("231030133710", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);

	let derdatetime = newdatetime("231030133a10Z", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);

	let derdatetime = newdatetime("231330133710Z", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);
};

@test fn read_gtime() void = {
	let derdatetime = newdatetime("20231030133710Z", utag::GENERALIZED_TIME);

	let dt = read_gtime(&d(derdatetime))!;

	let fbuf: [32]u8 = [0...];
	assert(date::bsformat(fbuf, date::RFC3339, &dt)!
		== "2023-10-30T13:37:10+0000");

	let derdatetime = newdatetime("20231030133710.1Z", utag::GENERALIZED_TIME);
	let dt = read_gtime(&d(derdatetime))!;
	assert(date::bsformat(fbuf, date::STAMP_NANO, &dt)!
		== "2023-10-30 13:37:10.100000000");

	// must end with Z
	let derdatetime = newdatetime("20231030133710", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);
	let derdatetime = newdatetime("202310301337100", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	// seconds must always be present
	let derdatetime = newdatetime("202310301337", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);
	let derdatetime = newdatetime("202310301337Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	// fractional seconds must not end with 0. must be ommitted if 0
	let derdatetime = newdatetime("20231030133710.", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.0", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.0Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.10Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	// TODO midnight is YYYYMMDD000000Z
};
diff --git a/encoding/asn1/decoder.ha b/encoding/asn1/decoder.ha
new file mode 100644
index 00000000..78c01ca9
--- /dev/null
+++ b/encoding/asn1/decoder.ha
@@ -0,0 +1,810 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use errors;
use io;
use math::{bit_size_u8};
use os;
use strings;
use time::date;
use types;


def TAGMASK: u8 = 0x1f;
def MAX_CONS_DEPTH: size = 32;

// Each DER entry starts with an header that describes the content.
export type head = struct {

	// Tells whether the data is constructed and encapsulates multiple
	// other data fields; or primitive and the value follows.
	cons: bool,

	// Class info
	class: class,

	// Tag id of the data
	tagid: u32,

	// Start position in stream
	start: size,

	// Start position of data in stream
	data: size,

	// End position in stream
	end: size,

	implicit: bool,
};

fn head_endpos(d: head) size = d.end;

// Size of current element (header size + data size)
export fn sz(d: head) size = d.end - d.start;

// Size of the encoded data.
export fn dsz(d: head) size = d.end - d.data;

export type decoder = struct {
	src: io::handle,
	pos: size,
	cstack: [MAX_CONS_DEPTH]head,
	cstackp: size,
	next: (void | head),
	cur: (void | head),
	unbuf: [3]u8,
	unbufn: u8,
	implicit: bool,
};

// Creates a new DER decoder that reads from 'src'. The decoder will do a lot of
// short reads, hence a buffered stream is recommended.
//
// Each entry must be read to the end, before the next one is attended to.
// [[finish]] must be called at the end to make sure everything is read.
export fn derdecoder(src: io::handle) decoder = {
	return decoder {
		src = src,
		pos = 0,
		cstackp = 0,
		cur = void,
		next = void,
		implicit = false,
		...
	};
};

export fn finish(d: *decoder) (void | error) = {
	if (d.cstackp != 0 || d.next is head) return invalid;
	match (d.cur) {
	case void =>
		return;
	case let h: head =>
		if (h.end != d.pos) return invalid;
	};
};

// Returns last opened cons or void if none is open.
fn curcons(d: *decoder) (void | head) = {
	if (d.cstackp == 0) {
		return;
	};
	return d.cstack[d.cstackp-1];
};

// Peeks the header of the next data field. Fails with [[badformat]] if no data
// follows.
export fn peek(d: *decoder) (head | error) = {
	match (trypeek(d)?) {
	case io::EOF =>
		return badformat;
	case let h: head =>
		return h;
	};
};

// Tries to peek the header of the next data and returns EOF, if none exists.
export fn trypeek(d: *decoder) (head | error | io::EOF) = {
	if (!(d.next is void)) {
		return d.next: head;
	};

	if (is_endofcons(d)) return io::EOF;

	match (parse_header(d)?) {
	case io::EOF =>
		const unreaddata = d.unbufn > 0;
		if (d.cstackp != 0 || unreaddata) {
			return badformat;
		};
		return io::EOF;
	case let dh: head =>
		d.next = dh;
		return dh;
	};
};

// Cons is open and end is reached.
fn is_endofcons(d: *decoder) bool = {
	match (curcons(d)) {
	case void =>
		return false;
	case let cur: head =>
		return d.pos == head_endpos(cur);
	};
};

// Returns the next data element or [[badformat]] on EOF.
fn next(d: *decoder) (head | error) = {
	match (trynext(d)?) {
	case io::EOF =>
		return badformat;
	case let dh: head =>
		return dh;
	};
};

fn trynext(d: *decoder) (head | error | io::EOF) = {
	if (d.next is head) {
		let dh = d.next: head;
		d.cur = dh;
		d.next = void;
		dh.implicit = d.implicit;
		d.implicit = false;
		return dh;
	};

	if (is_endofcons(d)) return io::EOF;

	let dh = match (parse_header(d)?) {
	case io::EOF =>
		return io::EOF;
	case let dh: head =>
		yield dh;
	};

	d.cur = dh;
	dh.implicit = d.implicit;
	d.implicit = false;
	return dh;
};

fn parse_header(d: *decoder) (head | error | io::EOF) = {
	const consend = match (curcons(d)) {
	case void =>
		yield types::SIZE_MAX;
	case let h: head =>
		yield h.end;
	};

	if (d.pos == consend) return invalid;

	const epos = d.pos;
	const id = match (tryscan_byte(d)?) {
	case io::EOF =>
		d.cur = void;
		return io::EOF;
	case let id: u8 =>
		yield id;
	};

	const class = ((id & 0xc0) >> 6): class;

	let tagid: u32 = id & TAGMASK;
	if (tagid == TAGMASK) {
		tagid = parse_longtag(d, consend - d.pos)?;
	};
	const l = parse_len(d, consend - d.pos)?;
	const hl = d.pos - epos;

	const end = epos + hl + l;
	if (end > consend) return invalid;

	return head {
		class = class,
		cons = ((id >> 5) & 1) == 1,
		tagid = tagid,
		start = epos,
		data = epos + hl,
		end = end,
		implicit = d.implicit,
		...
	};
};

fn tryscan_byte(d: *decoder) (u8 | io::EOF | error) = {
	let buf: [1]u8 = [0...];
	match (io::readall(d.src, buf)?) {
	case io::EOF =>
		return io::EOF;
	case size =>
		d.pos += 1;
		return buf[0];
	};
};

fn scan_byte(d: *decoder) (u8 | error) = {
	match (tryscan_byte(d)?) {
	case io::EOF =>
		return truncated;
	case let b: u8 =>
		return b;
	};
};

// Reads data of current entry and advances pointer. Data must have been opened
// using [[next]] or [[trynext]]. EOF is returned on end of data.
fn dataread(d: *decoder, buf: []u8) (size | io::EOF | io::error) = {
	let cur = match (d.cur) {
	case void =>
		abort("primitive must be opened with [[next]] or [[trynext]]");
	case let dh: head =>
		yield dh;
	};

	const dataleft = head_endpos(cur) - d.pos + d.unbufn;
	if (dataleft == 0) {
		return io::EOF;
	};

	let n = 0z;
	if (d.unbufn > 0) {
		const max = if (d.unbufn > len(buf)) len(buf): u8 else d.unbufn;
		buf[..max] = d.unbuf[..max];
		d.unbufn -= max;
		n += max;
	};

	const max = if (dataleft < len(buf) - n) dataleft else len(buf) - n;

	match (io::read(d.src, buf[n..n + max])?) {
	case io::EOF =>
		// there should be data left
		return wrap_err(truncated);
	case let sz: size =>
		d.pos += sz;
		return n + sz;
	};
};

// unread incomplete utf8 runes.
fn dataunread(d: *decoder, buf: []u8) void = {
	assert(len(buf) + d.unbufn <= len(d.unbuf));

	d.unbuf[d.unbufn..d.unbufn + len(buf)] = buf;
	d.unbufn += len(buf): u8;
};

fn dataeof(d: *decoder) bool = {
	match (d.cur) {
	case void =>
		return true;
	case let h: head =>
		return d.pos + d.unbufn == head_endpos(h);
	};
};

fn parse_longtag(p: *decoder, max: size) (u32 | error) = {
	// XXX: u32 too much?
	let tag: u32 = 0;
	let maxbits = size(u32) * 8;
	let nbits = 0z;

	for (let i = 0z; i < max; i += 1) {
		let b = scan_byte(p)?;
		const part = b & 0x7f;

		nbits += if (tag == 0) bit_size_u8(part) else 7;
		if (nbits > maxbits) {
			// overflows u32
			return invalid;
		};

		tag = (tag << 7) + part;
		if (tag == 0) {
			// first tag part must not be 0
			return invalid;
		};

		if ((b >> 7) == 0) {
			return tag;
		};
	};
	return invalid; // max has been reached
};

fn parse_len(p: *decoder, max: size) (size | error) = {
	if (max == 0) return invalid;

	const b = scan_byte(p)?;
	if (b == 0xff) {
		return invalid;
	};
	if (b >> 7 == 0) {
		// short form
		return b: size;
	};

	let l = 0z;
	const n = b & 0x7f;
	if (n == 0) {
		// Indefinite encoding is not supported in DER.
		return invalid;
	};

	if (n > size(size)) {
		// would cause a size overflow
		return invalid;
	};

	if (n + 1 > max) return invalid;

	for (let i = 0z; i < n; i += 1) {
		const b = scan_byte(p)?;
		l = (l << 8) + b;
		if (l == 0) {
			// Leading zeroes means minimum number of bytes for
			// length encoding has not been used.
			return invalid;
		};
	};

	if (l <= 0x7f) {
		// Could've used short form.
		return invalid;
	};

	return l;
};

// Expects an IMPLICIT defined data field having class 'c' and tag 'tag'.
// If the requirements meet, a read function (read_{*} or {*}reader) must
// follow, that defines and reads the actual data as its stored.
export fn expect_implicit(d: *decoder, c: class, tag: u32) (void | error) = {
	let h = peek(d)?;
	expect_tag(h, c, tag)?;
	d.implicit = true;
};

// Opens an EXPLICIT encoded field of given class 'c' and 'tag'. The user must
// call [[close_explicit]] after containing data has been read.
export fn open_explicit(d: *decoder, c: class, tag: u32) (void | error) =
	open_cons(d, c, tag);

// Closes an EXPLICIT encoded field.
export fn close_explicit(d: *decoder) (void | badformat) = close_cons(d);


// Opens a constructed value of given 'class' and 'tagid'. Fails if not a
// constructed value or it has an unexpected tag.
fn open_cons(d: *decoder, class: class, tagid: u32) (void | error) = {
	let dh = next(d)?;
	if (!dh.cons) {
		return invalid;
	};

	expect_tag(dh, class, tagid)?;

	if (d.cstackp == len(d.cstack)) {
		return badformat;
	};

	d.cstack[d.cstackp] = dh;
	d.cstackp += 1;
};

// Closes current constructed value. badformat is returend, if not all data has
// been read.
fn close_cons(d: *decoder) (void | badformat) = {
	if (d.implicit) {
		// a datafield marked implicit has not been read
		return badformat;
	};

	match (curcons(d)) {
	case void =>
		abort("No constructed value open");
	case let h: head =>
		if (d.pos != head_endpos(h) || d.unbufn > 0) {
			// All data must have been read before closing the seq
			return badformat;
		};
	};

	d.cstackp -= 1;
};

// Opens a sequence
export fn open_seq(d: *decoder) (void | error) =
	open_cons(d, class::UNIVERSAL, utag::SEQUENCE: u32)?;

// Closes current sequence. [[badformat]] is returned, if not all data has
// been read.
export fn close_seq(d: *decoder) (void | badformat) = close_cons(d);

// Opens a set. Though a set must be sorted according to DER, the order will not
// be validated.
export fn open_set(d: *decoder) (void | error) =
	open_cons(d, class::UNIVERSAL, utag::SET: u32)?;

// Closes current set. [[badformat]] is returend, if not all data has been read.
export fn close_set(d: *decoder) (void | badformat) = close_cons(d);

fn expect_tag(h: head, class: class, tagid: u32) (void | invalid | badformat) = {
	if (class == class::UNIVERSAL && (tagid == utag::SEQUENCE
			|| tagid == utag::SET) && !h.cons) {
		return invalid;
	};

	if (h.implicit) {
		return;
	};

	if (h.class != class || h.tagid != tagid) {
		return badformat;
	};
};

fn expect_utag(dh: head, tag: utag) (void | invalid | badformat) =
	expect_tag(dh, class::UNIVERSAL, tag: u32);

fn read_bytes(d: *decoder, buf: []u8) (size | error) = {
	match (dataread(d, buf)) {
	case io::EOF =>
		return 0z;
	case let n: size =>
		if (!dataeof(d)) {
			return badformat;
		};
		return n;
	};
};

fn read_nbytes(d: *decoder, buf: []u8) (size | error) = {
	const n = read_bytes(d, buf)?;
	if (n != len(buf)) {
		return badformat;
	};
	return n;
};

// Read a boolean.
export fn read_bool(d: *decoder) (bool | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::BOOLEAN)?;
	if (dsz(dh) != 1) {
		return invalid;
	};

	let b = scan_byte(d)?;

	if (b != 0x00 && b != 0xff) {
		return invalid;
	};

	return b == 0xff;
};

fn validate_intprefix(i: []u8) (void | error) = {
	switch (len(i)) {
	case 0 =>
		return invalid;
	case 1 =>
		return;
	case =>
		// An int must be encoded using the minimal number of bytes
		// possible as defined in X.690 s8.3.2
		if ((i[0] == 0x00 && i[1] >> 7 == 0)
			|| (i[0] == 0xff && i[1] >> 7 == 1)) {
			return invalid;
		};
	};
};

// Read an integer into 'buf'. Fails if size exceeds the buffer size. The
// integer is stored in big endian format. Negative values are stored as a
// twos complement. The minimum integer size is one byte.
export fn read_int(d: *decoder, buf: []u8) (size | error) = {
	assert(len(buf) > 0);

	let dh = next(d)?;
	expect_utag(dh, utag::INTEGER)?;
	const n = read_bytes(d, buf)?;
	validate_intprefix(buf[..n])?;
	return n;
};

// Similar to [[read_int]], but fails if it's not an unsigned integer. Will
// left trim 0 bytes.
export fn read_uint(d: *decoder, buf: []u8) (size | error) = {
	let s = read_int(d, buf)?;
	if (buf[0] & 0x80 == 0x80) {
		return badformat;
	};
	if (buf[0] == 0) {
		buf[..s-1] = buf[1..s];
		s -= 1;
	};
	return s;
};

fn read_ux(d: *decoder, x: u8) (u64 | error) = {
	assert(x <= 8);
	let b: [9]u8 = [0...];
	const n = read_int(d, b[..x+1])?;

	if (b[0] & 0x80 != 0) {
		// sign bit is set
		return invalid;
	};

	const s = if (b[0] == 0x00) 1u8 else 0u8;
	if (n - s > x) {
		return invalid;
	};

	let r = 0u64;
	for (let i = s; i < n; i += 1) {
		r <<= 8;
		r += b[i];
	};
	return r;
};

// Reads an integer that is expected to fit into u8.
export fn read_u8(d: *decoder) (u8 | error) = read_ux(d, 1)?: u8;

// Reads an integer that is expected to fit into u16.
export fn read_u16(d: *decoder) (u16 | error) = read_ux(d, 2)?: u16;

// Reads an integer that is expected to fit into u32.
export fn read_u32(d: *decoder) (u32 | error) = read_ux(d, 4)?: u32;

// Reads an integer that is expected to fit into u64.
export fn read_u64(d: *decoder) (u64 | error) = read_ux(d, 8)?;

// Reads a bitstring value. The result tuple contains the bitstring and the
// number of unused bits in the last byte. The [[bitstr_isset]] function may be
// used to check for set bits.
export fn read_bitstr(d: *decoder, buf: []u8) (([]u8, u8) | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::BITSTRING)?;

	let unused: [1]u8 = [0...];
	match (dataread(d, unused)?) {
	case io::EOF =>
		return invalid;
	case let n: size =>
		if (n != 1) {
			return invalid;
		};
	};
	const unused = unused[0];
	if (unused > 7) {
		return invalid;
	};

	const n = read_bytes(d, buf)?;
	const mask = (1 << unused) - 1;
	if (n > 0 && buf[n-1] & mask != 0) {
		// unused bits must be zero
		return invalid;
	};
	return (buf[..n], unused);
};

// Checks whether bit at 'pos' is set in given bitstring. 'pos' starts from 0,
// which is the highest order bit in the first byte.
export fn bitstr_isset(bitstr: ([]u8, u8), pos: size) (bool | invalid) = {
	const i = pos / 8;
	if (i >= len(bitstr.0)) {
		return false;
	};
	let b = bitstr.0[i];

	const j = pos - i * 8;
	if (i == len(bitstr.0) - 1 && j >= (8 - bitstr.1)) {
		return invalid;
	};
	const mask = (1 << (7 - j));
	return mask & b == mask;
};

// Returns an [[io::reader]] for octet string data.
// TODO add limit?
export fn octetstrreader(d: *decoder) (bytestream | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::OCTET_STRING)?;
	return newbytereader(d);
};

// Read an octet string into 'buf'. Fails if 'buf' is to small.
export fn read_octetstr(d: *decoder, buf: []u8) (size | error) = {
	assert(len(buf) > 0);

	let dh = next(d)?;
	expect_utag(dh, utag::OCTET_STRING)?;
	return read_bytes(d, buf);
};

// Reads a null entry.
export fn read_null(d: *decoder) (void | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::NULL)?;
	if (dsz(dh) != 0) {
		return invalid;
	};
};

export type bytestream = struct {
	stream: io::stream,
	d: *decoder,
};

fn newbytereader(d: *decoder) bytestream = {
	return bytestream {
		stream = &bytestream_vtable,
		d = d,
		...
	};
};

const bytestream_vtable: io::vtable = io::vtable {
	reader = &bytestream_reader,
	...
};

fn bytestream_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) =
	dataread((s: *bytestream).d, buf);

// Returns an [[io::reader]] that allows to read the raw data in its encoded
// form. Note that this reader won't do any kind of validation.
export fn bytereader(d: *decoder, c: class, tagid: u32) (bytestream | error) = {
	let dh = next(d)?;
	expect_tag(dh, c, tagid)?;
	return newbytereader(d);
};

// Reads an UTC time. Since the stored date only has a two digit year, 'maxyear'
// is required to define the epoch switch. For example 'maxyear' = 2046 causes
// all encoded years <= 46 to be after 2000 and all values > 46 will have 1900
// as the century.
export fn read_utctime(d: *decoder, maxyear: u16) (date::date | error) = {
	assert(maxyear > 100);

	let dh = next(d)?;
	expect_utag(dh, utag::UTC_TIME)?;

	let time: [13]u8 = [0...];
	read_nbytes(d, time[..])?;

	if (time[len(time)-1] != 'Z') {
		return invalid;
	};

	let year: u16 = (time[0] - 0x30): u16 * 10 + (time[1] - 0x30): u16;
	let cent = maxyear - (maxyear % 100);
	if (year > maxyear % 100) {
		cent -= 100;
	};

	let v = date::newvirtual();
	v.year = (year + cent): int;
	v.zoff = 0;
	v.nanosecond = 0;

	let datestr = strings::fromutf8(time[2..])!;
	if (!(date::parse(&v, "%m%d%H%M%S%Z", datestr) is void)) {
		return invalid;
	};

	let dt = match (date::realize(v)) {
	case let dt: date::date =>
		yield dt;
	case let e: (date::insufficient | date::invalid) =>
		return invalid;
	};

	return dt;
};

// Reads a generalized datetime.
export fn read_gtime(d: *decoder) (date::date | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::GENERALIZED_TIME)?;

	// The date begins with the encoded datetime
	def DATESZ = 14z;
	// followed by optional fractional seconds separated by '.'
	def NANOSZ = 10z;
	def NANOSEPPOS = 14;
	// and ends with the zone info 'Z'
	def ZONESZ = 1z;

	let time: [DATESZ + NANOSZ + ZONESZ]u8 = [0...];
	let n = read_bytes(d, time[..])?;

	// zone info and seconds must always be present
	if (time[n-1] != 'Z' || n < DATESZ + ZONESZ) {
		return invalid;
	};

	// validate fractional seconds
	if (n > DATESZ + ZONESZ) {
		// fractional seconds must not be empty
		if (time[NANOSEPPOS] != '.' || n == DATESZ + ZONESZ + 1) {
			return invalid;
		};
		// fractional seconds must not end with 0 and must be > 0
		if (time[n-2] == '0') return invalid;
	};

	// right pad fractional seconds to make them valid nanoseconds
	time[n-1..] = ['0'...];
	time[NANOSEPPOS] = '.';

	match (date::from_str("%Y%m%d%H%M%S.%N", strings::fromutf8(time)!)) {
	case let d: date::date =>
		return d;
	case let e: date::error =>
		return invalid;
	};
};

// Skips an element and returns the size of the data that has been skipped.
// Returns an error, if the skipped data is invalid.
export fn skip(d: *decoder, tag: utag, max: size) (size | error) = {
	static let buf: [os::BUFSZ]u8 = [0...];
	let s = 0z;
	switch (tag) {
	case utag::BOOLEAN =>
		read_bool(d)?;
		return 1z;
	case utag::INTEGER =>
		let br = bytereader(d, class::UNIVERSAL, utag::INTEGER)?;
		let n = match (io::read(&br, buf)?) {
		case let n: size =>
			yield n;
		case io::EOF =>
			return invalid;
		};
		validate_intprefix(buf[..n])?;
		n += streamskip(&br, max, buf)?;
		return n;
	case utag::NULL =>
		read_null(d)?;
		return 0z;
	case utag::OCTET_STRING =>
		let r = octetstrreader(d)?;
		return streamskip(&r, max, buf)?;
	case utag::BITSTRING =>
		assert(max <= len(buf));
		let buf = buf[..max];
		let p = read_bitstr(d, buf)?;
		bytes::zero(p.0);
		return len(p.0) + 1;
	case =>
		abort("skip for given utag not implemented");
	};
};

fn streamskip(r: io::handle, max: size, buf: []u8) (size | error) = {
	defer bytes::zero(buf);
	let buf = if (max < len(buf)) buf[..max] else buf[..];
	let s = 0z;
	for (true) {
		match (io::read(r, buf)?) {
		case let n: size =>
			s += n;
		case io::EOF =>
			return s;
		};

		if (s > max) {
			return badformat;
		};
	};
};
-- 
2.43.0

[PATCH hare v4 4/6] asn1: oid support

Details
Message ID
<20240207185633.9327-4-apreiml@strohwolke.at>
In-Reply-To
<20240207185633.9327-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +538 -7
encoding/asn1/oiddb/db.txt contains all oids required by the stdlib. It
is in a separate module so that projects may create their own oid list
by appending to the stdlib and generating the db with the genoiddb
command.

It is still no ideal solution if multiple libs will require their own
oids added to the db.

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---

v3: Rework to allow different oid databases in different modules.
v4: Fix oid reader test. Improve documentation.

 cmd/genoiddb/main.ha                | 209 ++++++++++++++++++++++++++++
 encoding/asn1/+test/decoder_test.ha |  21 ++-
 encoding/asn1/oid.ha                | 137 ++++++++++++++++++
 encoding/asn1/stdoid/db.ha          | 127 +++++++++++++++++
 encoding/asn1/stdoid/db.txt         |  51 +++++++
 5 files changed, 538 insertions(+), 7 deletions(-)
 create mode 100644 cmd/genoiddb/main.ha
 create mode 100644 encoding/asn1/oid.ha
 create mode 100644 encoding/asn1/stdoid/db.ha
 create mode 100644 encoding/asn1/stdoid/db.txt

diff --git a/cmd/genoiddb/main.ha b/cmd/genoiddb/main.ha
new file mode 100644
index 00000000..5ce36ccb
--- /dev/null
+++ b/cmd/genoiddb/main.ha
@@ -0,0 +1,209 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use ascii;
use bufio;
use fmt;
use io;
use os;
use strconv;
use strings;
use types;

type entry = struct {
	name: str,
	val: str,
	idx: size,
};

// Parses an oid database from stdin and writes the database as hare code to
// stdout.
export fn main() void = {
	let oids = parse_oids();
	defer free_oids(oids);

	fmt::println("// SPDX-License-Identifier: MPL-2.0\n"
		"// (c) Hare authors <https://harelang.org>\n"
		"// This is an auto generated file. Do not edit.\n"
		"\n"
		"use encoding::asn1;\n")!;

	fmt::println("const _db = asn1::oiddb {")!;

	write_db(os::stdout, oids)!;

	fmt::println("\tnames = [")!;
	for (let i = 0z; i < len(oids); i += 1) {
		fmt::printfln("\t\t\"{}\",", oids[i].name)!;
	};
	fmt::println("\t],")!;
	fmt::println("};\n")!;

	fmt::println("export const db = &_db;\n")!;

	for (let i = 0z; i < len(oids); i += 1) {
		fmt::print("export def ")!;
		write_varname(os::stdout, oids[i].name)!;
		fmt::printfln(": asn1::oid = {};", i)!;
	};
};

fn parse_oids() []entry = {
	let s = bufio::newscanner(os::stdin, types::SIZE_MAX);
	defer bufio::finish(&s);
	let oids: []entry = [];

	for (true) {
		const l = match (bufio::scan_line(&s)!) {
		case io::EOF =>
			break;
		case let s: const str =>
			yield s;
		};

		if (l == "" || strings::hasprefix(l, '#')) {
			continue;
		};


		const p = strings::split(l, " ");
		defer free(p);
		const name = p[0];
		const val = p[len(p)-1];

		append(oids, entry {
			name = strings::dup(name),
			val = strings::dup(val),
			...
		});
	};

	return oids;
};

fn free_oids(oids: []entry) void = {
	for (let i = 0z; i < len(oids); i += 1) {
		free(oids[i].name);
		free(oids[i].val);
	};

	free(oids);
};

fn write_db(h: io::handle, oids: []entry) (void | io::error) = {
	fmt::print("\tlut = [")?;

	const maxcols = 12z;
	let idx = 0z;

	for (let i = 0z; i < len(oids); i += 1) {
		let e = &oids[i];
		e.idx = idx;

		let der = oidtoder(e.val);
		assert(len(der) <= 0xff);
		insert(der[0], len(der): u8);
		defer free(der);

		for (let j = 0z; j < len(der); j += 1) {
			fmt::print(if (idx % maxcols == 0) "\n\t\t" else " ")?;
			fmt::printf("0x{:.2x},", der[j])?;
			idx += 1;
		};
	};
	fmt::println("\n\t],")?;

	const maxcols = 9z;
	fmt::print("\tindex = [")?;
	for (let i = 0z; i < len(oids); i += 1) {
		fmt::print(if (i % maxcols == 0) "\n\t\t" else " ")?;
		fmt::printf("0x{:.4x},", oids[i].idx)?;
	};
	fmt::println("\n\t],")?;
};

fn oidtoder(oid: str) []u8 = {
	let nums = oidtou64s(oid);
	defer free(nums);

	let der: []u8 = alloc([0...], 1);
	assert(nums[0] <= 6);
	assert(nums[1] < 40);
	der[0] = nums[0]: u8 * 40 + nums[1]: u8;
	let end = 1z;

	for (let i = 2z; i < len(nums); i += 1) {
		let n = nums[i];
		if (n == 0) {
			insert(der[end], 0u8);
			end = len(der);
			continue;
		};

		let first = true;
		for (n > 0) {
			let p: u8 = n: u8 & 0x7f;
			n >>= 7;
			if (first) {
				first = false;
			} else {
				p |= 0x80;
			};
			insert(der[end], p);
		};

		end = len(der);
	};

	return der;
};

fn oidtou64s(oid: str) []u64 = {
	let nums = strings::tokenize(oid, ".");
	let intnums: []u64 = [];

	for (true) {
		match (strings::next_token(&nums)) {
		case let s: str =>
			append(intnums, strconv::stou64(s)!);
		case void =>
			break;
		};
	};

	return intnums;
};

fn write_varname(h: io::handle, name: str) (void | io::error) = {
	// assume that names are in ascii
	let i = strings::iter(name);
	let prevlow = false;
	for (true) {
		match (strings::next(&i)) {
		case void =>
			break;
		case let r: rune =>
			let r = if (r == '-') {
				prevlow = false;
				yield '_';
			} else if (ascii::isdigit(r)) {
				prevlow = true;
				yield r;
			} else if (ascii::isupper(r)) {
				if (prevlow) {
					fmt::fprint(h, "_")?;
					prevlow = false;
				};
				yield r;
			} else if (ascii::islower(r)) {
				prevlow = true;
				yield ascii::toupper(r);
			} else {
				fmt::fatalf("Unexpected character in oid name: {}", r);
			};

			fmt::fprint(h, r)?;
		};
	};
};

diff --git a/encoding/asn1/+test/decoder_test.ha b/encoding/asn1/+test/decoder_test.ha
index 2282fc40..cb32d7c3 100644
--- a/encoding/asn1/+test/decoder_test.ha
+++ b/encoding/asn1/+test/decoder_test.ha
@@ -243,13 +243,6 @@ fn d(i: []u8) decoder = {
	assert(read_bitstr(&d([0x03, 0x03, 0x07, 0xab, 0x40]), buf) is invalid);
};

@test fn read_oid() void = {
	assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))! == oid::ID_AT_COMMON_NAME);

	assert(bytes::equal([0x55, 0x04, 0x03],
			read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
};

let datbuf: [64]u8 = [0...];

fn newdatetime(s: str, tag: utag) []u8 = {
@@ -329,3 +322,17 @@ fn newdatetime(s: str, tag: utag) []u8 = {

	// TODO midnight is YYYYMMDD000000Z
};

@test fn read_oid() void = {
	let db = oiddb {
		lut = [0x03, 0x2b, 0x65, 0x70, 0x03, 0x55, 0x04, 0x03],
		index = [0, 4],
		names = ["ed25519", "id-at-commonName"],
	};

	assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]), &db)! == 1);
	assert(stroid(&db, 1) == "id-at-commonName");

	assert(bytes::equal([0x55, 0x04, 0x03],
			read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
};
diff --git a/encoding/asn1/oid.ha b/encoding/asn1/oid.ha
new file mode 100644
index 00000000..d32c1400
--- /dev/null
+++ b/encoding/asn1/oid.ha
@@ -0,0 +1,137 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bytes;
use errors;
use fmt;
use io;
use math::{divu};
use memio;
use strings;


// An oid database that contains a lookup table of known oids in the DER format.
// A database of oids required by the standard library can be found in
// [[encoding::asn1::stdoid]].
//
// The database can be used with [[oid_from_der]] and [[oid_to_der]] to convert
// an oid between integer and DER encoding. [[read_oid]] and [[write_oid]] can
// be used to decode or encode the oid directly from and to DER.
//
// If the standard oid database is missing entries for the given use case, an
// individual database can be generated using the genoiddb command found in
// cmd/. Take a look at encoding/asn1/stdoid/db.txt for an example database
// file.
export type oiddb = struct {
	lut: []u8,
	index: []size,
	names: []str,
};

// Numeric id of an oid which is unique within an [[oiddb]].
export type oid = u32;

// Reads an oid if present in 'db'. Returns [[badformat]] if the oid is unknown.
export fn read_oid(d: *decoder, db: *oiddb) (oid | error) = {
	let raw = read_rawoid(d)?;

	match (oid_from_der(db, raw)) {
	case let o: oid =>
		return o;
	case =>
		return badformat;
	};
};

// Reads any [[oid]] and returns the DER encoded form. The returned value is
// borrowed from a static buffer.
export fn read_rawoid(d: *decoder) ([]u8 | error) = {
	def OIDBUFSZ: size = 64; // estimated
	static let oidbuf: [OIDBUFSZ]u8 = [0...];

	const dh = next(d)?;
	expect_utag(dh, utag::OID)?;
	if (dsz(dh) < 2) {
		return invalid;
	};
	const n = read_bytes(d, oidbuf)?;
	return oidbuf[..n];
};

// Writes given [[oid]] from the [[oiddb]] 'db'.
export fn write_oid(e: *encoder, db: *oiddb, oid: oid) (void | overflow) = {
	let doid = oid_to_der(db, oid);
	write_fixedprim(e, class::UNIVERSAL, utag::OID, doid)?;
};

// Looks up DER encoded oid 'raw' in 'db' and returns an [[oid]] if found, or
// void otheriwse.
export fn oid_from_der(db: *oiddb, raw: []u8) (void | oid) = {
	for (let i = 0z; i < len(db.index); i += 1) {
		const off = db.index[i];
		const l = db.lut[off];
		if (bytes::equal(raw, db.lut[off + 1..off + 1 + l])) {
			return i: oid;
		};
	};
};

// Borrows the DER representation of a known oid from 'db'.
export fn oid_to_der(db: *oiddb, o: oid) []u8 = {
	const off = db.index[o];
	const l = db.lut[off];
	return db.lut[off + 1..off + 1 + l];
};

// Looks up a str representation of an oid from the database.
export fn stroid(db: *oiddb, o: oid) str = {
	return db.names[o];
};

// Returns the dot id as string. The caller must free returned value. This
// function may fail if the oid overflows the internal buffer, or an invalid
// value is provided.
export fn strrawoid(der: []u8) (str | io::error) = {
	let s = memio::dynamic();
	let ok = false;
	defer if (!ok) io::close(&s)!;

	if (len(der) < 1) {
		return errors::invalid;
	};

	const (a, b) = divu(0, der[0], 40);
	fmt::fprintf(&s, "{}.{}", a, b)?;

	let j = 2z;
	let el = 0u32;
	let bits: int = size(u32): int * 8;

	for (let i = 1z; i < len(der); i += 1) {
		el += der[i] & 0x7f;

		if (der[i] & 0x80 != 0) {
			if (bits - 7 < 0) {
				return errors::overflow;
			};
			el <<= 7;
			bits -= 7;
		} else {
			fmt::fprintf(&s, ".{}", el)?;
			el = 0;
			j += 1;
			bits = size(u32): int * 8;
		};
	};

	ok = true;
	return memio::string(&s)!;
};

@test fn strrawoid() void = {
	let der: [_]u8 = [0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01];
	let s = strrawoid(der)!;
	defer free(s);
	assert(s == "1.2.840.113549.1.1.1");
};

diff --git a/encoding/asn1/stdoid/db.ha b/encoding/asn1/stdoid/db.ha
new file mode 100644
index 00000000..3bef778c
--- /dev/null
+++ b/encoding/asn1/stdoid/db.ha
@@ -0,0 +1,127 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>
// This is an auto generated file. Do not edit.

use encoding::asn1;

const _db = asn1::oiddb {
	lut = [
		0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x03, 0x2b,
		0x65, 0x70, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05,
		0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0e, 0x09, 0x2a,
		0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b, 0x09, 0x2a, 0x86, 0x48,
		0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0c, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7,
		0x0d, 0x01, 0x01, 0x0d, 0x05, 0x2b, 0x0e, 0x03, 0x02, 0x1a, 0x09, 0x60,
		0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x09, 0x60, 0x86, 0x48,
		0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65,
		0x03, 0x04, 0x02, 0x02, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04,
		0x02, 0x03, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x02, 0x01, 0x08, 0x2a,
		0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07, 0x08, 0x2a, 0x86, 0x48, 0xce,
		0x3d, 0x03, 0x01, 0x07, 0x05, 0x2b, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2b,
		0x81, 0x04, 0x00, 0x23, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x01,
		0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x03, 0x01, 0x08, 0x2a, 0x86,
		0x48, 0xce, 0x3d, 0x04, 0x03, 0x02, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d,
		0x04, 0x03, 0x03, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x03, 0x04,
		0x03, 0x55, 0x04, 0x03, 0x03, 0x55, 0x04, 0x04, 0x03, 0x55, 0x04, 0x05,
		0x03, 0x55, 0x04, 0x06, 0x03, 0x55, 0x04, 0x07, 0x03, 0x55, 0x04, 0x08,
		0x03, 0x55, 0x04, 0x0a, 0x03, 0x55, 0x04, 0x0b, 0x03, 0x55, 0x04, 0x0c,
		0x03, 0x55, 0x04, 0x2a, 0x03, 0x55, 0x04, 0x2b, 0x03, 0x55, 0x04, 0x2b,
		0x03, 0x55, 0x04, 0x2e, 0x03, 0x55, 0x04, 0x41, 0x0a, 0x09, 0x92, 0x26,
		0x89, 0x93, 0xf2, 0x2c, 0x64, 0x01, 0x19, 0x03, 0x55, 0x1d, 0x0f, 0x03,
		0x55, 0x1d, 0x11, 0x03, 0x55, 0x1d, 0x13, 0x03, 0x55, 0x1d, 0x25,
	],
	index = [
		0x0000, 0x000a, 0x000e, 0x0018, 0x0022, 0x002c, 0x0036, 0x0040, 0x0046,
		0x0050, 0x005a, 0x0064, 0x006e, 0x0076, 0x007f, 0x0088, 0x008e, 0x0094,
		0x009c, 0x00a5, 0x00ae, 0x00b7, 0x00c0, 0x00c4, 0x00c8, 0x00cc, 0x00d0,
		0x00d4, 0x00d8, 0x00dc, 0x00e0, 0x00e4, 0x00e8, 0x00ec, 0x00f0, 0x00f4,
		0x00f8, 0x0103, 0x0107, 0x010b, 0x010f,
	],
	names = [
		"rsaEncryption",
		"ed25519",
		"sha1WithRSAEncryption",
		"sha224WithRSAEncryption",
		"sha256WithRSAEncryption",
		"sha384WithRSAEncryption",
		"sha512WithRSAEncryption",
		"id-sha1",
		"id-sha224",
		"id-sha256",
		"id-sha384",
		"id-sha512",
		"id-ecPublicKey",
		"prime256v1",
		"ansix9p256r1",
		"ansix9p384r1",
		"ansix9p521r1",
		"ecdsa-with-SHA1",
		"ecdsa-with-SHA224",
		"ecdsa-with-SHA256",
		"ecdsa-with-SHA384",
		"ecdsa-with-SHA512",
		"id-at-commonName",
		"id-at-surname",
		"id-at-serialNumber",
		"id-at-countryName",
		"id-at-localityName",
		"id-at-stateOrProvinceName",
		"id-at-organizationName",
		"id-at-organizationalUnitName",
		"id-at-title",
		"id-at-givenName",
		"id-at-initials",
		"id-at-generationQualifier",
		"id-at-dnQualifier",
		"id-at-pseudonym",
		"id-domainComponent",
		"id-ce-keyUsage",
		"id-ce-subjectAltName",
		"id-ce-basicConstraints",
		"id-ce-extKeyUsage",
	],
};

export const db = &_db;

export def RSA_ENCRYPTION: asn1::oid = 0;
export def ED25519: asn1::oid = 1;
export def SHA1_WITH_RSAENCRYPTION: asn1::oid = 2;
export def SHA224_WITH_RSAENCRYPTION: asn1::oid = 3;
export def SHA256_WITH_RSAENCRYPTION: asn1::oid = 4;
export def SHA384_WITH_RSAENCRYPTION: asn1::oid = 5;
export def SHA512_WITH_RSAENCRYPTION: asn1::oid = 6;
export def ID_SHA1: asn1::oid = 7;
export def ID_SHA224: asn1::oid = 8;
export def ID_SHA256: asn1::oid = 9;
export def ID_SHA384: asn1::oid = 10;
export def ID_SHA512: asn1::oid = 11;
export def ID_EC_PUBLIC_KEY: asn1::oid = 12;
export def PRIME256V1: asn1::oid = 13;
export def ANSIX9P256R1: asn1::oid = 14;
export def ANSIX9P384R1: asn1::oid = 15;
export def ANSIX9P521R1: asn1::oid = 16;
export def ECDSA_WITH_SHA1: asn1::oid = 17;
export def ECDSA_WITH_SHA224: asn1::oid = 18;
export def ECDSA_WITH_SHA256: asn1::oid = 19;
export def ECDSA_WITH_SHA384: asn1::oid = 20;
export def ECDSA_WITH_SHA512: asn1::oid = 21;
export def ID_AT_COMMON_NAME: asn1::oid = 22;
export def ID_AT_SURNAME: asn1::oid = 23;
export def ID_AT_SERIAL_NUMBER: asn1::oid = 24;
export def ID_AT_COUNTRY_NAME: asn1::oid = 25;
export def ID_AT_LOCALITY_NAME: asn1::oid = 26;
export def ID_AT_STATE_OR_PROVINCE_NAME: asn1::oid = 27;
export def ID_AT_ORGANIZATION_NAME: asn1::oid = 28;
export def ID_AT_ORGANIZATIONAL_UNIT_NAME: asn1::oid = 29;
export def ID_AT_TITLE: asn1::oid = 30;
export def ID_AT_GIVEN_NAME: asn1::oid = 31;
export def ID_AT_INITIALS: asn1::oid = 32;
export def ID_AT_GENERATION_QUALIFIER: asn1::oid = 33;
export def ID_AT_DN_QUALIFIER: asn1::oid = 34;
export def ID_AT_PSEUDONYM: asn1::oid = 35;
export def ID_DOMAIN_COMPONENT: asn1::oid = 36;
export def ID_CE_KEY_USAGE: asn1::oid = 37;
export def ID_CE_SUBJECT_ALT_NAME: asn1::oid = 38;
export def ID_CE_BASIC_CONSTRAINTS: asn1::oid = 39;
export def ID_CE_EXT_KEY_USAGE: asn1::oid = 40;
diff --git a/encoding/asn1/stdoid/db.txt b/encoding/asn1/stdoid/db.txt
new file mode 100644
index 00000000..de4e42d4
--- /dev/null
+++ b/encoding/asn1/stdoid/db.txt
@@ -0,0 +1,51 @@
# OIDs that will be translated into db.ha using `genoiddb`

rsaEncryption                 1.2.840.113549.1.1.1
ed25519                       1.3.101.112

sha1WithRSAEncryption         1.2.840.113549.1.1.5
sha224WithRSAEncryption       1.2.840.113549.1.1.14
sha256WithRSAEncryption       1.2.840.113549.1.1.11
sha384WithRSAEncryption       1.2.840.113549.1.1.12
sha512WithRSAEncryption       1.2.840.113549.1.1.13

id-sha1                       1.3.14.3.2.26
id-sha224                     2.16.840.1.101.3.4.2.4
id-sha256                     2.16.840.1.101.3.4.2.1
id-sha384                     2.16.840.1.101.3.4.2.2
id-sha512                     2.16.840.1.101.3.4.2.3

id-ecPublicKey                1.2.840.10045.2.1
prime256v1                    1.2.840.10045.3.1.7

ansix9p256r1                  1.2.840.10045.3.1.7
ansix9p384r1                  1.3.132.0.34
ansix9p521r1                  1.3.132.0.35

ecdsa-with-SHA1               1.2.840.10045.4.1
ecdsa-with-SHA224             1.2.840.10045.4.3.1
ecdsa-with-SHA256             1.2.840.10045.4.3.2
ecdsa-with-SHA384             1.2.840.10045.4.3.3
ecdsa-with-SHA512             1.2.840.10045.4.3.4

id-at-commonName              2.5.4.3
id-at-surname                 2.5.4.4
id-at-serialNumber            2.5.4.5
id-at-countryName             2.5.4.6
id-at-localityName            2.5.4.7
id-at-stateOrProvinceName     2.5.4.8
id-at-organizationName        2.5.4.10
id-at-organizationalUnitName  2.5.4.11
id-at-title                   2.5.4.12
id-at-givenName               2.5.4.42
id-at-initials                2.5.4.43
id-at-generationQualifier     2.5.4.43
id-at-dnQualifier             2.5.4.46
id-at-pseudonym               2.5.4.65

id-domainComponent            0.9.2342.19200300.100.1.25

id-ce-keyUsage                2.5.29.15
id-ce-subjectAltName          2.5.29.17
id-ce-basicConstraints        2.5.29.19
id-ce-extKeyUsage             2.5.29.37
-- 
2.43.0

[PATCH hare v4 5/6] asn1: add decode support for non utf8 string types

Details
Message ID
<20240207185633.9327-5-apreiml@strohwolke.at>
In-Reply-To
<20240207185633.9327-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +1214 -0
numeric and ia5, which is basically ascii, are required by some
entries in x.509. UniversalString, BMPString and T61 are only provided
for legacy support.

Note that this only supports a subset of T61. There are still some
certificates in the mozilla trust store that have T61 encoded strings.
Though the characters may only be a subset of ASCII.

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
 encoding/asn1/+test/strings_test.ha | 164 +++++++++
 encoding/asn1/charset+test.ha       | 154 ++++++++
 encoding/asn1/strings.ha            | 362 +++++++++++++++++++
 encoding/asn1/t61.ha                | 534 ++++++++++++++++++++++++++++
 4 files changed, 1214 insertions(+)
 create mode 100644 encoding/asn1/+test/strings_test.ha
 create mode 100644 encoding/asn1/charset+test.ha
 create mode 100644 encoding/asn1/strings.ha
 create mode 100644 encoding/asn1/t61.ha

diff --git a/encoding/asn1/+test/strings_test.ha b/encoding/asn1/+test/strings_test.ha
new file mode 100644
index 00000000..f338ed26
--- /dev/null
+++ b/encoding/asn1/+test/strings_test.ha
@@ -0,0 +1,164 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bytes;
use errors;
use fmt;
use io;
use strings;


fn c_checkrange(chars: []u8, f: *fn (c: u8) bool) void = {
	for (let i = 0z; i < 256; i += 1) {
		let expected = false;
		for (let j = 0z; j < len(chars); j += 1) {
			if (chars[j] == i: u8) {
				expected = true;
				break;
			};
		};

		if (f(i: u8) != expected) {
			fmt::println(i, expected, f(i: u8))!;
		};
		assert(f(i: u8) == expected);
	};
};

@test fn c_is_num() void = {
	const chars: [_]u8 = [
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ',
	];
	c_checkrange(chars, &c_is_num);
};

@test fn c_is_print() void = {
	const chars: [_]u8 = [
		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ', '\'',
		'(', ')', '+', ',', '-', '.', '/', ':', '=', '?',
	];
	c_checkrange(chars, &c_is_print);
};

@test fn utf8() void = {
	let buf: [16]u8 = [0...];
	let b: [_]u8 = [
		0x55,
		0x56,
		0xd0, 0x98,
		0xe0, 0xa4, 0xb9,
		0xf0, 0x90, 0x8d, 0x88
	];
	const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8];

	let expected: str = strings::fromutf8([0xf0, 0x90, 0x8d, 0x88])!;
	assert(read_utf8str(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), buf)!
		== expected);
	assert(read_utf8str(&d([0x0c, 0x03, 0xf0, 0x90, 0x8d]), buf) is invalid);

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf)! == 4);
	assert(bytes::equal(buf[..4], strings::toutf8(expected)));

	bytes::zero(buf);
	let expected: str = strings::fromutf8([0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88])!;
	assert(read_utf8str(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), buf)!
		== expected);
	assert(read_utf8str(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), buf) is invalid);

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf)! == 6);
	assert(bytes::equal(buf[..6], strings::toutf8(expected)));

	let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!;
	assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid);

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf[..4])! == 2);
	assert(io::read(&r, buf[2..])! == 4);
	assert(bytes::equal(buf[..6], strings::toutf8(expected)));

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf[..4])! == 2);
	assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid);
};

@test fn t61() void = {
	let input: [_]u8 = [
		0x14, 0x29,
		0x42, 0xc8, 0x61, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72,
		0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72,
		0x6e, 0x65, 0x20, 0xc8, 0x75, 0x62, 0x65, 0x72, 0x6d, 0xc8,
		0x61, 0xfb, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69, 0x67,
		0x0a,
	];

	const expected: [_]u8 = [
		0x42, 0xc3, 0xa4, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72,
		0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72,
		0x6e, 0x65, 0x20, 0xc3, 0xbc, 0x62, 0x65, 0x72, 0x6d, 0xc3,
		0xa4, 0xc3, 0x9f, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69,
		0x67, 0x0a,
	];

	let dec = d(input);
	let r = strreader(&dec, utag::TELETEX_STRING)!;
	let result = io::drain(&r)!;
	defer free(result);
	assert(bytes::equal(expected, result));
	assert(trypeek(&dec) is io::EOF);

	// cut off multibyte char
	input[1] = 0x2;
	let r = strreader(&d(input[..4]), utag::TELETEX_STRING)!;
	assert(unwrap_err(io::drain(&r) as io::error) is invalid);

	// not enough space for multibyte char
	let buf: [24]u8 = [0...];
	let in = input[..27];
	in[1] = (len(in) - 2): u8;
	let dec = d(in);
	let r = strreader(&dec, utag::TELETEX_STRING)!;
	assert(io::read(&r, buf)! == 23);
	assert(trypeek(&dec) is badformat);

	let r = strreader(&d([
		0x14, 0x0f, 0x63, 0x6c, 0xc2, 0x65, 0x73, 0x20, 0x70, 0x75,
		0x62, 0x6c, 0x69, 0x71, 0x75, 0x65, 0x73,
	]), utag::TELETEX_STRING)!;
	let b = io::drain(&r)!;
	defer free(b);

	assert(strings::fromutf8(b)! == "cl\u00e9s publiques");
};

@test fn bmp() void = {
	let input: [_]u8 = [
		0x1e, 0x26,
		0x00, 0x48, 0x00, 0xe4, 0x00, 0x72, 0x00, 0x65, 0x00, 0x6c,
		0x00, 0x61, 0x00, 0x6e, 0x00, 0x67, 0x00, 0x20, 0x00, 0x69,
		0x01, 0x61, 0x00, 0x20, 0x00, 0x6e, 0x00, 0x65, 0x00, 0x61,
		0x00, 0x74, 0x00, 0x6f, 0x00, 0x20, 0x27, 0x64,
	];

	const expected: [_]u8 = [
		0x48, 0xc3, 0xa4, 0x72, 0x65, 0x6c, 0x61, 0x6e, 0x67, 0x20,
		0x69, 0xc5, 0xa1, 0x20, 0x6e, 0x65, 0x61, 0x74, 0x6f, 0x20,
		0xe2, 0x9d, 0xa4,
	];

	let dec = d(input);
	let r = strreader(&dec, utag::BMP_STRING)!;
	let result = io::drain(&r)!;
	defer free(result);
	assert(bytes::equal(expected, result));
	assert(trypeek(&dec) is io::EOF);
};
diff --git a/encoding/asn1/charset+test.ha b/encoding/asn1/charset+test.ha
new file mode 100644
index 00000000..acf66f9b
--- /dev/null
+++ b/encoding/asn1/charset+test.ha
@@ -0,0 +1,154 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use ascii;
use bytes;
use fmt;
use io;
use memio;


// Encodes all characters from 0x00 to 0xff separated by \t. Invalid characters
// will not be printed. All possible accents follow the table as defined in
// the two bytes chapter at https://en.wikipedia.org/wiki/T.51/ISO/IEC_6937
fn print_t61_table(dest: io::handle) void = {
	for (let i = 0z; i < 16; i +=1 ) {
		fmt::fprintf(dest, "{:x}\t", i)!;
	};
	fmt::fprintln(dest)!;

	for (let i = 0z; i < 256; i += 1) {
		if (i % 16 == 0) {
			fmt::fprintln(dest)!;
		};
		match (t61_chardecode([i: u8])) {
		case insufficient =>
			fmt::fprint(dest, "")!;
		case invalid =>
			yield;
		case let r: rune =>
			if (i > 0xa0 || (ascii::isprint(r) && !ascii::isspace(r))) {
				fmt::fprint(dest, r)!;
			} else {
				fmt::fprintf(dest, "x{:.4x}", r: u32)!;
			};
		};

		if (i + 1 % 16 != 0) {
			fmt::fprint(dest, "\t")!;
		};
	};

	fmt::fprintln(dest)!;

	for (let i = 0xc1u8; i < 0xd0; i += 1) {
		if (i == 0xcc) continue;
		fmt::fprintf(dest, "{:.2x}\t", i)!;
		for (let j = 0x41u32; j < 0x7b; j += 1) {
			if (!ascii::isprint(j: rune)) {
				continue;
			};
			if (!(t61_chardecode([i: u8]) is insufficient)) {
				assert(false);
			};
			match (t61_chardecode([i: u8, j: u8])) {
			case let r: rune =>
				fmt::fprint(dest, r)!;
			case =>
				yield;
			};
		};
		fmt::fprintln(dest)!;
	};
};

@test fn t61encode() void = {
	let table = memio::dynamic();
	defer io::close(&table)!;
	print_t61_table(&table);
	assert(bytes::equal(t61_test_table, memio::buffer(&table)));
};

// Print this table as UTF-8, to visual check the characters.
const t61_test_table: [_]u8 = [
	0x30, 0x09, 0x31, 0x09, 0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09,
	0x36, 0x09, 0x37, 0x09, 0x38, 0x09, 0x39, 0x09, 0x61, 0x09, 0x62, 0x09,
	0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x0a, 0x0a, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30,
	0x61, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30, 0x63, 0x09, 0x78, 0x30, 0x30,
	0x30, 0x64, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x31, 0x61, 0x09, 0x78, 0x30,
	0x30, 0x31, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78, 0x30, 0x30,
	0x32, 0x30, 0x09, 0x21, 0x09, 0x22, 0x09, 0x09, 0x09, 0x25, 0x09, 0x26,
	0x09, 0x27, 0x09, 0x28, 0x09, 0x29, 0x09, 0x2a, 0x09, 0x2b, 0x09, 0x2c,
	0x09, 0x2d, 0x09, 0x2e, 0x09, 0x2f, 0x09, 0x0a, 0x30, 0x09, 0x31, 0x09,
	0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09, 0x36, 0x09, 0x37, 0x09,
	0x38, 0x09, 0x39, 0x09, 0x3a, 0x09, 0x3b, 0x09, 0x3c, 0x09, 0x3d, 0x09,
	0x3e, 0x09, 0x3f, 0x09, 0x0a, 0x40, 0x09, 0x41, 0x09, 0x42, 0x09, 0x43,
	0x09, 0x44, 0x09, 0x45, 0x09, 0x46, 0x09, 0x47, 0x09, 0x48, 0x09, 0x49,
	0x09, 0x4a, 0x09, 0x4b, 0x09, 0x4c, 0x09, 0x4d, 0x09, 0x4e, 0x09, 0x4f,
	0x09, 0x0a, 0x50, 0x09, 0x51, 0x09, 0x52, 0x09, 0x53, 0x09, 0x54, 0x09,
	0x55, 0x09, 0x56, 0x09, 0x57, 0x09, 0x58, 0x09, 0x59, 0x09, 0x5a, 0x09,
	0x5b, 0x09, 0x09, 0x5d, 0x09, 0x09, 0x5f, 0x09, 0x0a, 0x09, 0x61, 0x09,
	0x62, 0x09, 0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x67, 0x09,
	0x68, 0x09, 0x69, 0x09, 0x6a, 0x09, 0x6b, 0x09, 0x6c, 0x09, 0x6d, 0x09,
	0x6e, 0x09, 0x6f, 0x09, 0x0a, 0x70, 0x09, 0x71, 0x09, 0x72, 0x09, 0x73,
	0x09, 0x74, 0x09, 0x75, 0x09, 0x76, 0x09, 0x77, 0x09, 0x78, 0x09, 0x79,
	0x09, 0x7a, 0x09, 0x09, 0x7c, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30,
	0x38, 0x62, 0x09, 0x78, 0x30, 0x30, 0x38, 0x63, 0x09, 0x09, 0x09, 0x09,
	0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x78, 0x30, 0x30, 0x39, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78,
	0x30, 0x30, 0x61, 0x30, 0x09, 0xc2, 0xa1, 0x09, 0xc2, 0xa2, 0x09, 0xc2,
	0xa3, 0x09, 0x24, 0x09, 0xc2, 0xa5, 0x09, 0x23, 0x09, 0xc2, 0xa7, 0x09,
	0xc2, 0xa4, 0x09, 0x09, 0x09, 0xc2, 0xab, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x0a, 0xc2, 0xb0, 0x09, 0xc2, 0xb1, 0x09, 0xc2, 0xb2, 0x09, 0xc2, 0xb3,
	0x09, 0xc3, 0x97, 0x09, 0xc2, 0xb5, 0x09, 0xc2, 0xb6, 0x09, 0xc2, 0xb7,
	0x09, 0xc3, 0xb7, 0x09, 0x09, 0x09, 0xc2, 0xbb, 0x09, 0xc2, 0xbc, 0x09,
	0xc2, 0xbd, 0x09, 0xc2, 0xbe, 0x09, 0xc2, 0xbf, 0x09, 0x0a, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0xe2, 0x84, 0xa6, 0x09,
	0xc3, 0x86, 0x09, 0xc3, 0x90, 0x09, 0xc2, 0xaa, 0x09, 0xc4, 0xa6, 0x09,
	0x09, 0xc4, 0xb2, 0x09, 0xc4, 0xbf, 0x09, 0xc5, 0x81, 0x09, 0xc3, 0x98,
	0x09, 0xc5, 0x92, 0x09, 0xc2, 0xba, 0x09, 0xc3, 0x9e, 0x09, 0xc5, 0xa6,
	0x09, 0xc5, 0x8a, 0x09, 0xc5, 0x89, 0x09, 0x0a, 0xc4, 0xb8, 0x09, 0xc3,
	0xa6, 0x09, 0xc4, 0x91, 0x09, 0xc3, 0xb0, 0x09, 0xc4, 0xa7, 0x09, 0xc4,
	0xb1, 0x09, 0xc4, 0xb3, 0x09, 0xc5, 0x80, 0x09, 0xc5, 0x82, 0x09, 0xc3,
	0xb8, 0x09, 0xc5, 0x93, 0x09, 0xc3, 0x9f, 0x09, 0xc3, 0xbe, 0x09, 0xc5,
	0xa7, 0x09, 0xc5, 0x8b, 0x09, 0x09, 0x0a, 0x63, 0x31, 0x09, 0xc3, 0x80,
	0xc3, 0x88, 0xc3, 0x8c, 0xc3, 0x92, 0xc3, 0x99, 0xc3, 0xa0, 0xc3, 0xa8,
	0xc3, 0xac, 0xc3, 0xb2, 0xc3, 0xb9, 0x0a, 0x63, 0x32, 0x09, 0xc3, 0x81,
	0xc4, 0x86, 0xc3, 0x89, 0xc3, 0x8d, 0xc4, 0xb9, 0xc5, 0x83, 0xc3, 0x93,
	0xc5, 0x94, 0xc5, 0x9a, 0xc3, 0x9a, 0xc3, 0x9d, 0xc5, 0xb9, 0xc3, 0xa1,
	0xc4, 0x87, 0xc3, 0xa9, 0xc4, 0xa3, 0xc3, 0xad, 0xc4, 0xba, 0xc5, 0x84,
	0xc3, 0xb3, 0xc5, 0x95, 0xc5, 0x9b, 0xc3, 0xba, 0xc3, 0xbd, 0xc5, 0xba,
	0x0a, 0x63, 0x33, 0x09, 0xc3, 0x82, 0xc4, 0x88, 0xc3, 0x8a, 0xc4, 0x9c,
	0xc4, 0xa4, 0xc3, 0x8e, 0xc4, 0xb4, 0xc3, 0x94, 0xc5, 0x9c, 0xc3, 0x9b,
	0xc5, 0xb4, 0xc5, 0xb6, 0xc3, 0xa2, 0xc4, 0x89, 0xc3, 0xaa, 0xc4, 0x9d,
	0xc4, 0xa5, 0xc3, 0xae, 0xc4, 0xb5, 0xc3, 0xb4, 0xc5, 0x9d, 0xc3, 0xbb,
	0xc5, 0xb5, 0xc5, 0xb7, 0x0a, 0x63, 0x34, 0x09, 0xc3, 0x83, 0xc4, 0xa8,
	0xc3, 0x91, 0xc3, 0x95, 0xc5, 0xa8, 0xc3, 0xa3, 0xc4, 0xa9, 0xc3, 0xb1,
	0xc3, 0xb5, 0xc5, 0xa9, 0x0a, 0x63, 0x35, 0x09, 0xc4, 0x80, 0xc4, 0x92,
	0xc4, 0xaa, 0xc5, 0x8c, 0xc5, 0xaa, 0xc4, 0x81, 0xc4, 0x93, 0xc4, 0xab,
	0xc5, 0x8d, 0xc5, 0xab, 0x0a, 0x63, 0x36, 0x09, 0xc4, 0x82, 0xc4, 0x9e,
	0xc5, 0xac, 0xc4, 0x83, 0xc4, 0x9f, 0xc5, 0xad, 0x0a, 0x63, 0x37, 0x09,
	0xc4, 0x8a, 0xc4, 0x96, 0xc4, 0xa0, 0xc4, 0xb0, 0xc5, 0xbb, 0xc4, 0x8b,
	0xc4, 0x97, 0xc4, 0xa1, 0xc5, 0xbc, 0x0a, 0x63, 0x38, 0x09, 0xc3, 0x84,
	0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c, 0xc5, 0xb8, 0xc3, 0xa4,
	0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc, 0xc3, 0xbf, 0x0a, 0x63,
	0x39, 0x09, 0xc3, 0x84, 0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c,
	0xc5, 0xb8, 0xc3, 0xa4, 0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc,
	0xc3, 0xbf, 0x0a, 0x63, 0x61, 0x09, 0xc3, 0x85, 0xc5, 0xae, 0xc3, 0xa5,
	0xc5, 0xaf, 0x0a, 0x63, 0x62, 0x09, 0xc3, 0x87, 0xc4, 0xa2, 0xc4, 0xb6,
	0xc4, 0xbb, 0xc5, 0x85, 0xc5, 0x96, 0xc5, 0x9e, 0xc5, 0xa2, 0xc3, 0xa7,
	0xc4, 0xb7, 0xc4, 0xbc, 0xc5, 0x86, 0xc5, 0x97, 0xc5, 0x9f, 0xc5, 0xa3,
	0x0a, 0x63, 0x64, 0x09, 0xc5, 0x90, 0xc5, 0xb0, 0xc5, 0x91, 0xc5, 0xb1,
	0x0a, 0x63, 0x65, 0x09, 0xc4, 0x84, 0xc4, 0x98, 0xc4, 0xae, 0xc5, 0xb2,
	0xc4, 0x85, 0xc4, 0x99, 0xc4, 0xaf, 0xc5, 0xb3, 0x0a, 0x63, 0x66, 0x09,
	0xc4, 0x8c, 0xc4, 0x8e, 0xc4, 0x9a, 0xc4, 0xbd, 0xc5, 0x87, 0xc5, 0x98,
	0xc5, 0xa0, 0xc5, 0xa4, 0xc5, 0xbd, 0xc4, 0x8d, 0xc4, 0x8f, 0xc4, 0x9b,
	0xc4, 0xbe, 0xc5, 0x88, 0xc5, 0x99, 0xc5, 0xa1, 0xc5, 0xa5, 0xc5, 0xbe,
	0x0a,
];

diff --git a/encoding/asn1/strings.ha b/encoding/asn1/strings.ha
new file mode 100644
index 00000000..a381958b
--- /dev/null
+++ b/encoding/asn1/strings.ha
@@ -0,0 +1,362 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use encoding::utf8;
use endian;
use errors;
use io;
use strings;


// numeric string
def N: u8 = 0o1;

// printable string
def P: u8 = 0o2;

// LUT of bitfields with character attributes
const cclass: [_]u8 = [
//	 0	 1	 2	 3	 4	 5	 6	 7
	0,	0,	0,	0,	0,	0,	0,	0,	// 0
	0,	0,	0,	0,	0,	0,	0,	0,	// 10
	0,	0,	0,	0,	0,	0,	0,	0,	// 20
	0,	0,	0,	0,	0,	0,	0,	0,	// 30
	N|P,	0,	0,	0,	0,	0,	0,	P,	// 40
	P,	P,	0,	P,	P,	P,	P,	P,	// 50
	N|P,	N|P,	N|P,	N|P,	N|P,	N|P,	N|P,	N|P,	// 60
	N|P,	N|P,	P,	0,	0,	P,	0,	P,	// 70
	0,	P,	P,	P,	P,	P,	P,	P,	// 100
	P,	P,	P,	P,	P,	P,	P,	P,	// 110
	P,	P,	P,	P,	P,	P,	P,	P,	// 120
	P,	P,	P,	0,	0,	0,	0,	0,	// 130
	0,	P,	P,	P,	P,	P,	P,	P,	// 140
	P,	P,	P,	P,	P,	P,	P,	P,	// 150
	P,	P,	P,	P,	P,	P,	P,	P,	// 160
	P,	P,	P,	0,	0,	0,	0,	0,	// 170
];

type char_validator = fn (c: u8) bool;

// Whether 'c' is valid in a NumericString
fn c_is_num(c: u8) bool = c & 0x80 == 0 && cclass[c] & N != 0;

// Whether 'c' is valid in a PrintableString
fn c_is_print(c: u8) bool = c & 0x80 == 0 && cclass[c] & P != 0;

fn c_is_ia5(c: u8) bool = c & 0x80 == 0;

// Returns the number of bytes of the biggest complete utf8 chunk. Returns
// invalid, if the biggest complete chunk contains invalid utf8 characters.
fn validutf8(buf: []u8) (size | invalid) = {
	if (len(buf) == 0) {
		return 0z;
	};

	const min = if (len(buf) < 4) 0z else len(buf) - 4;

	let lastvalid = 0z;
	let lastsz = 0z;
	for (let i = min; i < len(buf); i += 1) {
		match (utf8::utf8sz(buf[i])) {
		case utf8::invalid =>
			yield;
		case let s: size =>
			lastsz = s;
			lastvalid = i;
		};
	};

	if (lastsz == 0) return invalid;

	const n = if (len(buf) - lastvalid == lastsz) len(buf) else lastvalid;
	if (utf8::validate(buf[..n]) is utf8::invalid) {
		return invalid;
	};

	return n;
};

@test fn validutf8() void = {
	let b: [_]u8 = [
		0x55, 0x56, 0xd0, 0x98, 0xe0, 0xa4, 0xb9, 0xf0, 0x90, 0x8d, 0x88
	];
	const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8];

	for (let i = 0z; i < len(b); i += 1) {
		assert(validutf8(b[..i])! == runesat[i]);
	};

	b[10] = 0x55;
	assert(validutf8(b[..10])! == 7);
	assert(validutf8(b) is invalid);
};

// An io::stream reader that returns only valid utf8 chunks on read.
export type utf8stream = struct {
	stream: io::stream,
	d: *decoder,
	strdec: *strdecoder,
};

const utf8stream_vtable = io::vtable {
	reader = &utf8stream_reader,
	...
};

fn utf8stream_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
	// at least a rune must fit in buf
	assert(len(buf) >= 4);
	let s = s: *utf8stream;
	let cur = match (s.d.cur) {
	case void =>
		abort();
	case let dh: head =>
		yield dh;
	};

	match (s.strdec(s, buf)?) {
	case let n: size =>
		return n;
	case io::EOF =>
		return io::EOF;
	};
};

export type strdecoder = fn(
	s: *utf8stream,
	buf: []u8,
) (size | io::EOF | io::error);

fn no_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	dataread(s.d, buf);

fn char_decoder(
	s: *utf8stream, buf: []u8,
	v: *char_validator,
) (size | io::EOF | io::error) = {
	let n = match (dataread(s.d, buf)?) {
	case let n: size =>
		yield n;
	case io::EOF =>
		return io::EOF;
	};

	for (let i = 0z; i < n; i += 1) {
		if (!v(buf[i])) return wrap_err(invalid);
	};
	return n;
};

fn num_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	char_decoder(s, buf, &c_is_num);

fn print_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	char_decoder(s, buf, &c_is_print);

fn ia5_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	char_decoder(s, buf, &c_is_ia5);

fn utf8_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	let n = 0z;

	n += match (dataread(s.d, buf)?) {
	case let sz: size =>
		yield sz;
	case io::EOF =>
		if (s.d.unbufn > 0) return wrap_err(invalid);
		return io::EOF;
	};

	const max = match (validutf8(buf[..n])) {
	case let s: size =>
		yield s;
	case invalid =>
		return wrap_err(invalid);
	};

	if (max < n) {
		if (dataeof(s.d)) {
			// string ends with incomplete rune
			return wrap_err(invalid);
		};
		dataunread(s.d, buf[max..n]);
		return max;
	};

	return n;
};

// A bmp string is an UTF-16 string.
fn bmp_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	const max = len(buf) - (len(buf) % 2);

	// TODO disallow control functions (X.690: 8.23.9)

	let n = 0z;
	let rbuf: [2]u8 = [0...];
	for (true) {
		match (dataread(s.d, rbuf)?) {
		case let sz: size =>
			if (sz < 2) return wrap_err(invalid);
		case io::EOF =>
			return if (n == 0) io::EOF else n;
		};

		let r = endian::begetu16(rbuf): rune;
		let rb = utf8::encoderune(r);
		if (len(buf) - n < len(rb)) {
			dataunread(s.d, rbuf);
			return n;
		};

		buf[n..n + len(rb)] = rb;
		n += len(rb);
	};
};

// Universal string is an UTF32BE string.
fn universal_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	const max = len(buf) - (len(buf) % 4);

	let n = 0z;
	let rbuf: [4]u8 = [0...];
	for (true) {
		match (dataread(s.d, rbuf)?) {
		case let sz: size =>
			if (sz < 4) return wrap_err(invalid);
		case io::EOF =>
			return if (n == 0) io::EOF else n;
		};

		let r = endian::begetu32(rbuf): rune;
		let rb = utf8::encoderune(r);
		if (len(buf) - n < len(rb)) {
			dataunread(s.d, rbuf);
			return n;
		};

		buf[n..n + len(rb)] = rb;
		n += len(rb);
	};
};

fn t61_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	let inbuf: [2]u8 = [0...];
	let in = inbuf[..0];

	let n = 0z;

	for (true) {
		let chr: [1]u8 = [0];
		match (dataread(s.d, chr)?) {
		case let sz: size =>
			assert(sz == 1);
			static append(in, chr[0]);
		case io::EOF =>
			if (len(in) > 0) return wrap_err(invalid);
			if (n > 0) return n;
			return io::EOF;
		};

		match (t61_chardecode(in)) {
		case let r: rune =>
			let raw = utf8::encoderune(r);
			const bufremain = len(buf) - n;
			if (len(raw) < bufremain) {
				buf[n..n + len(raw)] = raw[..];
				n += len(raw);
				in = inbuf[..0];
			} else {
				dataunread(s.d, in);
				break;
			};
		case insufficient =>
			// leave combining char in in
			yield;
		case invalid =>
			return wrap_err(invalid);
		};
	};

	return n;
};

fn newstrreader(d: *decoder, t: utag) (utf8stream | error) = {
	let strdec: *strdecoder = switch (t) {
	case utag::NUMERIC_STRING =>
		yield &num_decoder;
	case utag::PRINTABLE_STRING =>
		yield &print_decoder;
	case utag::IA5_STRING =>
		yield &ia5_decoder;
	case utag::UTF8_STRING =>
		yield &utf8_decoder;
	case utag::TELETEX_STRING =>
		yield &t61_decoder;
	case utag::BMP_STRING =>
		yield &bmp_decoder;
	case utag::UNIVERSAL_STRING =>
		yield &universal_decoder;
	case =>
		return invalid;
	};

	return utf8stream {
		stream = &utf8stream_vtable,
		d = d,
		strdec = strdec,
		...
	};
};

// Returns an [[utf8stream]] for a supported utag 't', which is one of:
//   * utag::NUMERIC_STRING
//   * utag::PRINTABLE_STRING
//   * utag::IA5_STRING
//   * utag::UTF8_STRING
//   * utag::TELETEX_STRING
//   * utag::BMP_STRING
//   * utag::UNIVERSAL_STRING
export fn strreader(d: *decoder, t: utag) (utf8stream | error) = {
	let dh = next(d)?;
	expect_utag(dh, t)?;
	return newstrreader(d, t)!;
};

// Reads a printable string into 'buf'.
export fn read_printstr(d: *decoder, buf: []u8) (size | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::PRINTABLE_STRING)?;

	const n = read_bytes(d, buf)?;

	for (let i = 0z; i < n; i += 1) {
		if (!c_is_print(buf[i])) {
			return invalid;
		};
	};
	return n;
};

// Reads an utf8 string into 'buf' and returns a str that borrows from buf.
export fn read_utf8str(d: *decoder, buf: []u8) (str | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::UTF8_STRING)?;

	let r = newstrreader(d, utag::UTF8_STRING)!;
	let n = 0z;

	for (true) {
		n += match (io::read(&r, buf[n..])) {
		case let sz: size =>
			yield sz;
		case io::EOF =>
			break;
		case let e: io::error =>
			return unwrap_err(e);
		};
	};

	return strings::fromutf8(buf[..n])!;
};

diff --git a/encoding/asn1/t61.ha b/encoding/asn1/t61.ha
new file mode 100644
index 00000000..d0e14ab8
--- /dev/null
+++ b/encoding/asn1/t61.ha
@@ -0,0 +1,534 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

// https://en.wikipedia.org/wiki/ITU_T.61
const t61toascii: [_]u8 = [
//	 0	 1	 2	 3	 4	 5	 6	 7
//	 8	 9	 a	 b	 c	 d	 e	 f
	0,	0,	0,	0,	0,	0,	0,	0,	// 0
	0,	0,	0x0a,	0,	0x0c,	0x0d,	0,	0,	// 0
	0,	0,	0,	0,	0,	0,	0,	0,	// 10
	0,	0,	0x1a,	0x1b,	0,	0,	0,	0,	// 10
	0x20,	0x21,	0x22,	0,	0,	0x25,	0x26,	0x27,	// 20
	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,	// 20
	0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,	// 30
	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,	// 30
	0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,	// 40
	0x48,	0x49,	0x4a,	0x4b,	0x4c,	0x4d,	0x4e,	0x4f,	// 40
	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,	// 50
	0x58,	0x59,	0x5a,	0x5b,	0,	0x5d,	0,	0x5f,	// 50
	0,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,	// 60
	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,	// 60
	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,	// 70
	0x78,	0x79,	0x7a,	0,	0x7c,	0,	0,	0,	// 70
];

const t61toutf8: [_]rune = [
	// 0x80
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u008b',
	'\u008c', '\u0000', '\u0000', '\u0000',

	// 0x90
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u009b',
	'\u0000', '\u0000', '\u0000', '\u0000',

	// 0xa0
	'\u00a0', '\u00a1', '\u00a2', '\u00a3',
	'\u0024', '\u00a5', '\u0023', '\u00a7',
	'\u00a4', '\u0000', '\u0000', '\u00ab',
	'\u0000', '\u0000', '\u0000', '\u0000',

	// 0x0b
	'\u00b0', '\u00b1', '\u00b2', '\u00b3',
	'\u00d7', '\u00b5', '\u00b6', '\u00b7',
	'\u00f7', '\u0000', '\u0000', '\u00bb',
	'\u00bc', '\u00bd', '\u00be', '\u00bf',

	// 0xc0
	'\u0000', '\u0300', '\u0301', '\u0302',
	'\u0303', '\u0304', '\u0306', '\u0307',
	'\u0308', '\u0308', '\u030a', '\u0327',
	'\u0332', '\u030b', '\u0328', '\u030c',

	// 0xd0
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',

	// 0xe0
	'\u2126', '\u00c6', '\u00d0', '\u00aa',
	'\u0126', '\u0000', '\u0132', '\u013f',
	'\u0141', '\u00d8', '\u0152', '\u00ba',
	'\u00de', '\u0166', '\u014a', '\u0149',

	// 0xf0
	'\u0138', '\u00e6', '\u0111', '\u00f0',
	'\u0127', '\u0131', '\u0133', '\u0140',
	'\u0142', '\u00f8', '\u0153', '\u00df',
	'\u00fe', '\u0167', '\u014b', '\u0000',
];

fn decode(out: []u8, in: []u8) void = {
	for (let i = 0z; i < len(in); i += 1) {
		const c = in[i];
		const r: rune = if (c & 0x80 != 0) {
			// TODO special cases
			yield t61toutf8[c - 0x80];
		} else {
			const c = t61toascii[in[i]];
			yield c: u32: rune;
		};

		// write r to out
	};
	return;
};

export type insufficient = !void;

export fn t61_chardecode(in: []u8) (rune | insufficient | invalid) = {
	// 'in' is either one char or two if first is a combining character.
	if (len(in) == 2) {
		return t61_combine(in);
	};

	const in = in[0];

	if (in & 0x80 == 0) {
		const r = t61toascii[in];
		return if (r == 0) invalid else r: u32: rune;
	};

	const c = t61toutf8[in - 0x80];
	if (c == '\u0000') {
		return invalid;
	};

	if (in == 0xcc) {
		return invalid;
	};
	if (in > 0xc0 && in <= 0xcf) {
		return insufficient;
	};

	return c;
};

fn t61_combine(in: []u8) (rune | invalid) = {
	const comb = in[0];
	const in = in[1];
	switch (comb) {
	case 0xc1 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c0';
		case 'E' =>
			return '\u00c8';
		case 'I' =>
			return '\u00cc';
		case 'O' =>
			return '\u00d2';
		case 'U' =>
			return '\u00d9';
		case 'a' =>
			return '\u00e0';
		case 'e' =>
			return '\u00e8';
		case 'i' =>
			return '\u00ec';
		case 'o' =>
			return '\u00f2';
		case 'u' =>
			return '\u00f9';
		case =>
			return invalid;
		};
	case 0xc2 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c1';
		case 'C' =>
			return '\u0106';
		case 'E' =>
			return '\u00c9';
		case 'I' =>
			return '\u00cd';
		case 'L' =>
			return '\u0139';
		case 'N' =>
			return '\u0143';
		case 'O' =>
			return '\u00d3';
		case 'R' =>
			return '\u0154';
		case 'S' =>
			return '\u015a';
		case 'U' =>
			return '\u00da';
		case 'Y' =>
			return '\u00dd';
		case 'Z' =>
			return '\u0179';
		case 'a' =>
			return '\u00e1';
		case 'c' =>
			return '\u0107';
		case 'e' =>
			return '\u00e9';
		case 'g' =>
			return '\u0123';
		case 'i' =>
			return '\u00ed';
		case 'l' =>
			return '\u013a';
		case 'n' =>
			return '\u0144';
		case 'o' =>
			return '\u00f3';
		case 'r' =>
			return '\u0155';
		case 's' =>
			return '\u015b';
		case 'u' =>
			return '\u00fa';
		case 'y' =>
			return '\u00fd';
		case 'z' =>
			return '\u017a';
		case =>
			return invalid;
		};
	case 0xc3 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c2';
		case 'C' =>
			return '\u0108';
		case 'E' =>
			return '\u00ca';
		case 'G' =>
			return '\u011c';
		case 'H' =>
			return '\u0124';
		case 'I' =>
			return '\u00ce';
		case 'J' =>
			return '\u0134';
		case 'O' =>
			return '\u00d4';
		case 'S' =>
			return '\u015c';
		case 'U' =>
			return '\u00db';
		case 'W' =>
			return '\u0174';
		case 'Y' =>
			return '\u0176';
		case 'a' =>
			return '\u00e2';
		case 'c' =>
			return '\u0109';
		case 'e' =>
			return '\u00ea';
		case 'g' =>
			return '\u011d';
		case 'h' =>
			return '\u0125';
		case 'i' =>
			return '\u00ee';
		case 'j' =>
			return '\u0135';
		case 'o' =>
			return '\u00f4';
		case 's' =>
			return '\u015d';
		case 'u' =>
			return '\u00fb';
		case 'w' =>
			return '\u0175';
		case 'y' =>
			return '\u0177';
		case =>
			return invalid;
		};
	case 0xc4 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c3';
		case 'I' =>
			return '\u0128';
		case 'N' =>
			return '\u00d1';
		case 'O' =>
			return '\u00d5';
		case 'U' =>
			return '\u0168';
		case 'a' =>
			return '\u00e3';
		case 'i' =>
			return '\u0129';
		case 'n' =>
			return '\u00f1';
		case 'o' =>
			return '\u00f5';
		case 'u' =>
			return '\u0169';
		case =>
			return invalid;
		};
	case 0xc5 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u0100';
		case 'E' =>
			return '\u0112';
		case 'I' =>
			return '\u012a';
		case 'O' =>
			return '\u014c';
		case 'U' =>
			return '\u016a';
		case 'a' =>
			return '\u0101';
		case 'e' =>
			return '\u0113';
		case 'i' =>
			return '\u012b';
		case 'o' =>
			return '\u014d';
		case 'u' =>
			return '\u016b';
		case =>
			return invalid;
		};
	case 0xc6 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u0102';
		case 'G' =>
			return '\u011e';
		case 'U' =>
			return '\u016c';
		case 'a' =>
			return '\u0103';
		case 'g' =>
			return '\u011f';
		case 'u' =>
			return '\u016d';
		case =>
			return invalid;
		};
	case 0xc7 =>
		switch (in: u32: rune) {
		case 'C' =>
			return '\u010a';
		case 'E' =>
			return '\u0116';
		case 'G' =>
			return '\u0120';
		case 'I' =>
			return '\u0130';
		case 'Z' =>
			return '\u017b';
		case 'c' =>
			return '\u010b';
		case 'e' =>
			return '\u0117';
		case 'g' =>
			return '\u0121';
		case 'z' =>
			return '\u017c';
		case =>
			return invalid;
		};
	case 0xc8 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c4';
		case 'E' =>
			return '\u00cb';
		case 'I' =>
			return '\u00cf';
		case 'O' =>
			return '\u00d6';
		case 'U' =>
			return '\u00dc';
		case 'Y' =>
			return '\u0178';
		case 'a' =>
			return '\u00e4';
		case 'e' =>
			return '\u00eb';
		case 'i' =>
			return '\u00ef';
		case 'o' =>
			return '\u00f6';
		case 'u' =>
			return '\u00fc';
		case 'y' =>
			return '\u00ff';
		case =>
			return invalid;
		};
	case 0xc9 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c4';
		case 'E' =>
			return '\u00cb';
		case 'I' =>
			return '\u00cf';
		case 'O' =>
			return '\u00d6';
		case 'U' =>
			return '\u00dc';
		case 'Y' =>
			return '\u0178';
		case 'a' =>
			return '\u00e4';
		case 'e' =>
			return '\u00eb';
		case 'i' =>
			return '\u00ef';
		case 'o' =>
			return '\u00f6';
		case 'u' =>
			return '\u00fc';
		case 'y' =>
			return '\u00ff';
		case =>
			return invalid;
		};
	case 0xca =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c5';
		case 'U' =>
			return '\u016e';
		case 'a' =>
			return '\u00e5';
		case 'u' =>
			return '\u016f';
		case =>
			return invalid;
		};
	case 0xcb =>
		switch (in: u32: rune) {
		case 'C' =>
			return '\u00c7';
		case 'G' =>
			return '\u0122';
		case 'K' =>
			return '\u0136';
		case 'L' =>
			return '\u013b';
		case 'N' =>
			return '\u0145';
		case 'R' =>
			return '\u0156';
		case 'S' =>
			return '\u015e';
		case 'T' =>
			return '\u0162';
		case 'c' =>
			return '\u00e7';
		case 'k' =>
			return '\u0137';
		case 'l' =>
			return '\u013c';
		case 'n' =>
			return '\u0146';
		case 'r' =>
			return '\u0157';
		case 's' =>
			return '\u015f';
		case 't' =>
			return '\u0163';
		case =>
			return invalid;
		};
	case 0xcd =>
		switch (in: u32: rune) {
		case 'O' =>
			return '\u0150';
		case 'U' =>
			return '\u0170';
		case 'o' =>
			return '\u0151';
		case 'u' =>
			return '\u0171';
		case =>
			return invalid;
		};
	case 0xce =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u0104';
		case 'E' =>
			return '\u0118';
		case 'I' =>
			return '\u012e';
		case 'U' =>
			return '\u0172';
		case 'a' =>
			return '\u0105';
		case 'e' =>
			return '\u0119';
		case 'i' =>
			return '\u012f';
		case 'u' =>
			return '\u0173';
		case =>
			return invalid;
		};
	case 0xCf =>
		switch (in: u32: rune) {
		case 'C' =>
			return '\u010c';
		case 'D' =>
			return '\u010e';
		case 'E' =>
			return '\u011a';
		case 'L' =>
			return '\u013d';
		case 'N' =>
			return '\u0147';
		case 'R' =>
			return '\u0158';
		case 'S' =>
			return '\u0160';
		case 'T' =>
			return '\u0164';
		case 'Z' =>
			return '\u017d';
		case 'c' =>
			return '\u010d';
		case 'd' =>
			return '\u010f';
		case 'e' =>
			return '\u011b';
		case 'l' =>
			return '\u013e';
		case 'n' =>
			return '\u0148';
		case 'r' =>
			return '\u0159';
		case 's' =>
			return '\u0161';
		case 't' =>
			return '\u0165';
		case 'z' =>
			return '\u017e';
		case =>
			return invalid;
		};
	case =>
		return invalid;
	};
};

-- 
2.43.0

[PATCH hare v4 6/6] asn1: add readme

Details
Message ID
<20240207185633.9327-6-apreiml@strohwolke.at>
In-Reply-To
<20240207185633.9327-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +6 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---

v4: mention oid support

 encoding/asn1/README | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 encoding/asn1/README

diff --git a/encoding/asn1/README b/encoding/asn1/README
new file mode 100644
index 00000000..80fbf801
--- /dev/null
+++ b/encoding/asn1/README
@@ -0,0 +1,6 @@
This module provides functions to decode and encode the distinguished encoding
rules (DER) format as defined in the X.690 ITU-T standard.

See [[newencoder]] and [[newdecoder]] for how to encode or decode values.

This module also provides tools to work with oids. See [[oiddb]] for more info.
-- 
2.43.0

[hare/patches] build failed

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CYZ2T06RC0EX.2RWYPUEONCGP5@fra02>
In-Reply-To
<20240207185633.9327-6-apreiml@strohwolke.at> (view parent)
DKIM signature
missing
Download raw message
hare/patches: FAILED in 55s

[asn1: add types and errors][0] v4 from [Armin Preiml][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/49246
[1]: apreiml@strohwolke.at

✗ #1145380 FAILED  hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1145380
✗ #1145382 FAILED  hare/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1145382
✓ #1145381 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1145381

Re: [PATCH hare v4 6/6] asn1: add readme

Details
Message ID
<CYZMDYNJJ3HU.2SZS3OB84TPPG@taiga>
In-Reply-To
<20240207185633.9327-6-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
This looks fantastic, great work. I'm going to make some minor
improvements, mostly to style and doc strings, and then push this.

Should we make plans to fuzz test this at some point?

Re: [PATCH hare v4 6/6] asn1: add readme

Details
Message ID
<CYZN3SAUPQF8.3MXMAYOFRCUOA@taiga>
In-Reply-To
<20240207185633.9327-6-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Great work. Thanks!

To git@git.sr.ht:~sircmpwn/hare
   d0c057db..6f3e0d3b  master -> master

Re: [PATCH hare v4 6/6] asn1: add readme

Details
Message ID
<CYZN9HWB9HA3.19IC5EMGYXT6T@strohwolke.at>
In-Reply-To
<CYZMDYNJJ3HU.2SZS3OB84TPPG@taiga> (view parent)
DKIM signature
pass
Download raw message
On Thu Feb 8, 2024 at 11:22 AM CET, Drew DeVault wrote:
> I'm going to make some minor
> improvements, mostly to style and doc strings, and then push this.

Thanks for your support with this!

> Should we make plans to fuzz test this at some point?

Sure, would be neat. Though it will not be a priority for me anytime
soon.
Reply to thread Export thread (mbox)