~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
6 2

[PATCH hare v3 1/6] asn1: add types and errors

Details
Message ID
<20240205145140.361304-1-apreiml@strohwolke.at>
DKIM signature
pass
Download raw message
Patch: +204 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
 encoding/asn1/errors.ha |  68 ++++++++++++++++++++
 encoding/asn1/types.ha  | 136 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+)
 create mode 100644 encoding/asn1/errors.ha
 create mode 100644 encoding/asn1/types.ha

diff --git a/encoding/asn1/errors.ha b/encoding/asn1/errors.ha
new file mode 100644
index 00000000..f79f5e84
--- /dev/null
+++ b/encoding/asn1/errors.ha
@@ -0,0 +1,68 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use errors;
use io;

// Invalid DER encoding.
export type invalid = !void;

// Unexpected data format.
export type badformat = !void;

// Premature EOF
export type truncated = !void;

// Data does not fit into the encoder buffer.
export type overflow = !void;

type asn1error = !(invalid | badformat | overflow | truncated);

// Any error within the asn1 module.
export type error = !(...io::error | ...asn1error);


// Converts an [[error]] into a user-friendly string.
export fn strerror(e: error) str = {
	match (e) {
	case invalid =>
		return "Data encoding does not follow the DER format";
	case badformat =>
		return "Unexpected data format";
	case truncated =>
		return "Premature EOF";
	case overflow =>
		return "Data does not fit into the encoder buffer";
	case let e: io::error =>
		return io::strerror(e);
	};
};

fn wrap_err(e: error) io::error = {
	match (e) {
	case let e: io::error =>
		return e;
	case let e: asn1error =>
		static assert(size(asn1error) <= size(errors::opaque_data));
		let w = errors::opaque_ { strerror = &wrap_strerror, ... };
		let ptr = &w.data: *error;
		*ptr = e;
		return w;
	};
};

fn wrap_strerror(err: *errors::opaque_data) const str = {
	let e = err: *error;
	return strerror(*e);
};

// Unwrap [[io::error]] returned by readers into [[error]].
export fn unwrap_err(e: io::error) error = {
	match (e) {
	case let e: errors::opaque_ =>
		let ptr = &e.data: *error;
		return *ptr;
	case let e: io::error =>
		return e;
	};
};
diff --git a/encoding/asn1/types.ha b/encoding/asn1/types.ha
new file mode 100644
index 00000000..be68a065
--- /dev/null
+++ b/encoding/asn1/types.ha
@@ -0,0 +1,136 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use errors;
use fmt;
use io;
use memio;


// Data types specified in the standard
export type class = enum u8 {
	UNIVERSAL = 0x0,
	APPLICATION = 0x1,
	CONTEXT = 0x2,
	PRIVATE = 0x3,
};

// String representation of 'c'.
export fn strclass(c: class) str = {
	switch (c) {
	case class::UNIVERSAL =>
		return "UNIVERSAL";
	case class::APPLICATION =>
		return "APPLICATION";
	case class::CONTEXT =>
		return "CONTEXT_SPECIFIC";
	case class::PRIVATE =>
		return "PRIVATE";
	};
};

// Universal tags as defined in x.690. Not all are supported by this
// implemenation.
export type utag = enum u8 {
	RESERVED = 0x00,
	BOOLEAN = 0x01,
	INTEGER = 0x02,
	BITSTRING = 0x03,
	OCTET_STRING = 0x04,
	NULL = 0x05,
	OID = 0x06,
	OBJECT_DESCRIPTOR = 0x07,
	EXTERNAL = 0x08,
	REAL = 0x09,
	ENUMERATED = 0x0a,
	EMBEDDED_PDV = 0x0b,
	UTF8_STRING = 0x0c,
	RELATIVE_OID = 0x0d,
	TIME = 0x0e,
	RESERVED2 = 0x0f,
	SEQUENCE = 0x10,
	SET = 0x11,
	NUMERIC_STRING = 0x12,
	PRINTABLE_STRING = 0x13,
	TELETEX_STRING = 0x14, // T61String
	VIDEOTEX_STRING = 0x15,
	IA5_STRING = 0x16,
	UTC_TIME = 0x17,
	GENERALIZED_TIME = 0x18,
	GRAPHIC_STRING = 0x19,
	VISIBLE_STRING = 0x1a, // iso646String
	GENERAL_STRING = 0x1b,
	UNIVERSAL_STRING = 0x1c,
	UNKNOWN = 0x1d,
	BMP_STRING = 0x1e,
	DATE = 0x1f,
	TIME_OF_DAY = 0x20,
	DATE_TIME = 0x21,
	DURATION = 0x22,
	OID_IRI = 0x23,
	OID_RELATIVE_IRI = 0x24,
};

// String representation of universal tag ids. May return a statically allocated
// string and will be overwritten on the next call.
export fn strtag(dh: head) str = {
	static let tagstrbuf: [128]u8 = [0...];

	if (dh.class != class::UNIVERSAL) {
		let tagstr = memio::fixed(tagstrbuf);

		fmt::fprint(&tagstr, "[")!;
		if (dh.class != class::CONTEXT) {
			fmt::fprintf(&tagstr, "{} ", strclass(dh.class))!;
		};
		fmt::fprintf(&tagstr, "{:x}]", dh.tagid)!;
		return memio::string(&tagstr)!;
	};

	if (dh.tagid >> 8 != 0) {
		return "UNKNOWN";
	};

	switch (dh.tagid: u8) {
	case utag::BOOLEAN =>
		return "BOOLEAN";
	case utag::INTEGER =>
		return "INTEGER";
	case utag::BITSTRING =>
		return "BITSTRING";
	case utag::OCTET_STRING =>
		return "OCTET_STRING";
	case utag::NULL =>
		return "NULL";
	case utag::OID =>
		return "OBJECT_IDENTIFIER";
	case utag::OBJECT_DESCRIPTOR =>
		return "OBJECT_DESCRIPTOR";
	case utag::EXTERNAL =>
		return "EXTERNAL";
	case utag::REAL =>
		return "REAL";
	case utag::ENUMERATED =>
		return "ENUMERATED";
	case utag::EMBEDDED_PDV =>
		return "EMBEDDED_PDV";
	case utag::UTF8_STRING =>
		return "UTF8_STRING";
	case utag::RELATIVE_OID =>
		return "RELATIVE_OID";
	case utag::TIME =>
		return "TIME";
	case utag::SEQUENCE =>
		return "SEQUENCE";
	case utag::SET =>
		return "SET";
	case utag::PRINTABLE_STRING =>
		return "PRINTABLE_STRING";
	case utag::TELETEX_STRING =>
		return "TELETEX_STRING";
	case utag::UTC_TIME =>
		return "UTC_TIME";
	case =>
		return "UNKNOWN";
	};
};
-- 
2.43.0

[PATCH hare v3 2/6] asn1: encoder

Details
Message ID
<20240205145140.361304-2-apreiml@strohwolke.at>
In-Reply-To
<20240205145140.361304-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +569 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---

v3: the encoder now uses io operations on 'mem' except in encode

 encoding/asn1/+test/encoder_test.ha | 147 ++++++++++
 encoding/asn1/encoder.ha            | 422 ++++++++++++++++++++++++++++
 2 files changed, 569 insertions(+)
 create mode 100644 encoding/asn1/+test/encoder_test.ha
 create mode 100644 encoding/asn1/encoder.ha

diff --git a/encoding/asn1/+test/encoder_test.ha b/encoding/asn1/+test/encoder_test.ha
new file mode 100644
index 00000000..6ac2a21a
--- /dev/null
+++ b/encoding/asn1/+test/encoder_test.ha
@@ -0,0 +1,147 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use errors;
use io;
use memio;
use os;
use strings;
use time::date;
use types;


@test fn write_id() void = {
	let buf = memio::dynamic();
	let e = derencoder(&buf);

	write_fixedprim(&e, class::UNIVERSAL, 0x2aa, [0x00])!;
	encode(&e)!;
	assert(bytes::equal([0x1f, 0x85, 0x2a, 0x01, 0x00],
			memio::buffer(&buf)));

	io::seek(&buf, 0, io::whence::SET)!;
	let d = derdecoder(&buf);
	let h = peek(&d)!;
	assert(h.tagid == 0x2aa);

	let buf = memio::dynamic();
	let e = derencoder(&buf);

	write_fixedprim(&e, class::UNIVERSAL, types::U32_MAX, [0x00])!;
	encode(&e)!;
	assert(bytes::equal([0x1f, 0x8f, 0xff, 0xff, 0xff, 0x7f, 0x01, 0x00],
			memio::buffer(&buf)));

	io::seek(&buf, 0, io::whence::SET)!;
	let d = derdecoder(&buf);
	let h = peek(&d)!;
	assert(h.tagid == types::U32_MAX);
};

@test fn write_prim() void = {
	let buf = memio::dynamic();
	defer io::close(&buf)!;
	let dest = memio::dynamic();
	defer io::close(&dest)!;

	let enc = derencoder(&buf);

	create_prim(&enc, class::UNIVERSAL, utag::INTEGER)!;
	write(&enc, [0x01, 0x05, 0x07])!;
	finish_prim(&enc);

	assert(encodeto(&enc, &dest)! == 5);

	assert(bytes::equal(memio::buffer(&dest), [
		0x02, 0x03, 0x01, 0x05, 0x07
	]));
};

@test fn encode_dsz() void = {
	assert(bytes::equal([0x7f], encode_dsz(0x7f)));
	assert(bytes::equal([0x81, 0x8f], encode_dsz(0x8f)));
	assert(bytes::equal([0x81, 0xff], encode_dsz(0xff)));
	assert(bytes::equal([0x82, 0x01, 0x00], encode_dsz(0x100)));
};

@test fn write_seq() void = {
	let buf = memio::dynamic();
	defer io::close(&buf)!;
	let dest = memio::dynamic();
	defer io::close(&dest)!;

	let enc = derencoder(&buf);

	create_seq(&enc)!;
	write_bool(&enc, false)!;
	create_seq(&enc)!;
	write_int(&enc, [0x01, 0x02, 0x03])!;
	finish_seq(&enc);
	finish_seq(&enc);
	assert(encodeto(&enc, &dest)! == 12);

	assert(bytes::equal(memio::buffer(&dest), [
		0x30, 0x0a, // seq
		0x01, 0x01, 0x00, // bool
		0x30, 0x05, // seq
		0x02, 0x03, 0x01, 0x02, 0x03, // int
	]));
};

@test fn write_bool() void = {
	let dest = memio::dynamic();
	defer io::close(&dest)!;

	let buf = memio::dynamic();
	defer io::close(&buf)!;
	let enc = derencoder(&buf);

	write_bool(&enc, true)!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x01, 0x01, 0xff]));
};

@test fn write_int() void = {
	let dest = memio::dynamic();
	defer io::close(&dest)!;
	let buf = memio::dynamic();
	defer io::close(&buf)!;

	let enc = derencoder(&buf);

	write_int(&enc, [0x00, 0x00, 0x00, 0x00, 0x80])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x02, 0x00, 0x80]));

	memio::reset(&dest);
	memio::reset(&buf);
	let enc = derencoder(&buf);

	write_int(&enc, [0xff, 0xff, 0xff, 0x80, 0x10])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x02, 0x80, 0x10]));

	memio::reset(&dest);
	memio::reset(&buf);
	let enc = derencoder(&buf);

	write_int(&enc, [0x00, 0x00, 0x00])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x01, 0x00]));

	memio::reset(&dest);
	memio::reset(&buf);
	let enc = derencoder(&buf);

	write_uint(&enc, [0x8f, 0x01])!;
	encodeto(&enc, &dest)!;

	assert(bytes::equal(memio::buffer(&dest), [0x02, 0x03, 0x00, 0x8f, 0x01]));
};

diff --git a/encoding/asn1/encoder.ha b/encoding/asn1/encoder.ha
new file mode 100644
index 00000000..372f989a
--- /dev/null
+++ b/encoding/asn1/encoder.ha
@@ -0,0 +1,422 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use endian;
use errors;
use io;
use math::{bit_size_u8,bit_size_u32};
use memio;
use strings;
use time::date;
use types;


export type datasz = u32; // XXX: might want to use size here
let szput = &endian::beputu32;
let szget = &endian::begetu32;
def DATASZ_MAX = types::U32_MAX;

// The maximum header size possible for u32 tag ids.
export def MAXHEADSZ = 1 + 5 + 1 + size(datasz);

// The maximum header size possible for entries of [[utag]].
export def MAXUTAGHEADSZ = 1 + 1 + size(datasz);

export type encoder = struct {
	mem: *memio::stream,
	start: io::off,
	pos: size,
	bt: [MAX_CONS_DEPTH](size, datasz),
	btn: size,

	cur_dpos: size,
	cur_prim: bool,
	cur_fixed: bool,

	parent: nullable *bytewstream,
};

// Creates a DER encoder. create_* methods are used to create constructed
// values. Functions to write primitive values start with write_ or end with
// writer. After the entries have been written, the result is encoded using
// [[encode]] or [[encodeto]].
//
// 'mem' is required to buffer the written data before encoding it.Each entry
// will have an maximum overhead of [[MAXUTAGHEADSZ]], if entries are written
// using only methods provided here; or [[MAXHEADSZ]], if custom tag ids are in
// use. The encoder doesn't close after use 'mem', hence it's the caller's
// responsibility manage its lifetime.
//
// 'mem' as memio::stream allows the caller to decide whether to use a static or
// a dynamic allocated buffer.
export fn derencoder(mem: *memio::stream) encoder = encoder {
	mem = mem,
	start = io::tell(mem)!,
	...
};

// Creates a DER encoder that is nested within another DER entry and hence can
// use the buffer of the parent.
export fn derencoder_nested(b: *bytewstream) encoder = encoder {
	mem = b.e.mem,
	start = io::tell(b.e.mem)!,
	parent = b,
	...
};

fn write(e: *encoder, buf: []u8) (void | overflow) = {
	if (len(buf) > (DATASZ_MAX - e.pos)) return overflow;

	match (io::write(e.mem, buf)) {
	case let n: size =>
		if (n < len(buf)) {
			// short writes happen, if a fixed e.mem reaches its end
			return overflow;
		};
	case errors::overflow =>
		return overflow;
	case =>
		 // writing to mem does not throw any other errors
		abort();
	};
	e.pos += len(buf);
};

fn write_id(e: *encoder, c: class, t: u32, cons: bool) (void | overflow) = {
	let head: u8 = c << 6;
	if (cons) {
		head |= (1 << 5);
	};

	if (t < 31) {
		bt_add_sz(e, 1);
		return write(e, [head | t: u8]);
	};

	write(e, [head | 0x1f])?;

	const bsz = bit_size_u32(t);
	const n = ((bsz + 6) / 7) - 1;
	for (let i = 0z; i < n; i += 1) {
		write(e, [0x80 | (t >> ((n - i) * 7)): u8])?;
	};
	write(e, [t: u8 & 0x7f])?;
};

fn write_fixedprim(e: *encoder, c: class, t: u32, b: []u8) (void | overflow) = {
	if (e.cur_prim) {
		finish_prim(e);
	};

	e.cur_prim = true;
	e.cur_fixed = true;
	write_id(e, c, t, false)?;

	write(e, encode_dsz(len(b)))?;
	write(e, b)?;

	bt_add_dsz(e, len(b): datasz);
};

fn create_prim(e: *encoder, class: class, tag: u32) (void | overflow) = {
	if (e.cur_prim) {
		finish_prim(e);
	};

	e.cur_prim = true;
	e.cur_fixed = false;

	write_id(e, class, tag, false)?;

	// write size placeholder
	const placehsz = 0x80 | size(datasz): u8;
	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
	write(e, lbuf)?;

	e.cur_dpos = e.pos;
};

fn finish_prim(e: *encoder) void = {
	e.cur_prim = false;
	if (e.pos == 0 || e.cur_fixed) {
		return;
	};

	const pos = io::tell(e.mem)!;
	defer io::seek(e.mem, pos, io::whence::SET)!;

	// write back size to placeholder
	const dszpos = e.start: size + e.cur_dpos - size(datasz);
	const dsz = e.pos - e.cur_dpos;
	let dszbuf: [size(datasz)]u8 = [0...];
	szput(dszbuf, dsz: datasz);

	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
	io::write(e.mem, dszbuf)!;

	bt_add_dsz(e, dsz: datasz);
};

// Push n empty size value to backtrace stack
fn push_bt(e: *encoder, pos: size) (void | overflow) = {
	if (e.btn + 1 >= len(e.bt)) return overflow;

	e.bt[e.btn] = (pos, 0);
	e.btn += 1;
};

// Add 'sz' to the current value of the backtrack stack
fn bt_add_sz(e: *encoder, sz: size) void = {
	if (e.btn == 0) return;
	const csz = e.bt[e.btn - 1].1;
	e.bt[e.btn - 1].1 = csz + sz: datasz;
};

// Add data size 'sz' + size length to current value of the backtrack stack
fn bt_add_dsz(e: *encoder, sz: datasz) void = {
	if (e.btn == 0) return;
	const lsz = lensz(sz);
	return bt_add_sz(e, lsz + sz);
};

// Pop current backtrace value from stack
fn pop_bt(e: *encoder) (size, datasz) = {
	e.btn -= 1;
	let x = e.bt[e.btn];
	e.bt[e.btn] = (0, 0);
	return x;
};

fn lensz(l: datasz) u8 = if (l < 128) 1: u8 else (1 + (bit_size_u32(l) + 7) / 8);

fn encode_dsz(sz: size) []u8 = {
	static let buf: [size(datasz) + 1]u8 = [0...];
	if (sz < 128) {
		buf[0] = sz: u8;
		return buf[..1];
	};

	let n = lensz(sz: datasz);
	buf[0] = (n - 1) | 0x80;
	for (let i: size = n - 1; sz > 0; i -= 1) {
		buf[i] = sz: u8;
		sz >>= 8;
	};

	return buf[..n];
};

// Creates an explicit constructed entry. [[finish_explicit]] must be called
// to close the entry.
export fn create_explicit(e: *encoder, c: class, tag: u32) (void | overflow) =
	create_cons(e, c, tag);

// Finishes an explicit constructed entry.
export fn finish_explicit(e: *encoder) void = finish_cons(e);

fn create_cons(e: *encoder, class: class, tagid: u32) (void | overflow) = {
	if (e.cur_prim) {
		finish_prim(e);
	};
	write_id(e, class, tagid, true)?;

	const placehsz = 0x80 | size(datasz): u8;
	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
	write(e, lbuf)?;

	push_bt(e, e.pos - size(datasz))?;
	return;
};

fn finish_cons(e: *encoder) void = {
	if (e.cur_prim) {
		finish_prim(e);
	};

	let (dszpos, sz) = pop_bt(e);
	let lbuf: [size(datasz)]u8 = [0...];
	szput(lbuf, sz);

	const pos = io::tell(e.mem)!;
	defer io::seek(e.mem, pos, io::whence::SET)!;

	dszpos += e.start: size;
	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
	io::write(e.mem, lbuf)!;
	bt_add_dsz(e, sz);
};

// Creates a sequence. [[finish_seq]] must be called to close it.
export fn create_seq(e: *encoder) (void | overflow) =
	return create_cons(e, class::UNIVERSAL, utag::SEQUENCE);

// Finishes a sequence.
export fn finish_seq(e: *encoder) void = finish_cons(e);

// Writes a boolean.
export fn write_bool(e: *encoder, b: bool) (void | overflow) = {
	let v: u8 = if (b) 0xff else 0x00;
	write_fixedprim(e, class::UNIVERSAL, utag::BOOLEAN, [v])?;
};

// Writes a null value.
export fn write_null(e: *encoder) (void | overflow) = {
	write_fixedprim(e, class::UNIVERSAL, utag::NULL, [])?;
};

export type bytewstream = struct {
	stream: io::stream,
	e: *encoder,
};

fn bytewriter(e: *encoder, c: class, tagid: u32) (bytewstream | overflow) = {
	create_prim(e, c, tagid)?;
	return bytewstream {
		stream = &bytewriter_vtable,
		e = e,
		...
	};
};

const bytewriter_vtable = io::vtable {
	writer = &bytewriter_write,
	...
};

fn bytewriter_write(s: *io::stream, buf: const []u8) (size | io::error) = {
	let w = s: *bytewstream;
	if (write(w.e, buf) is overflow) {
		return wrap_err(overflow);
	};
	return len(buf);
};

// Creates a io::writer that adds written bytes as OctetString.
export fn octetstrwriter(e: *encoder) (bytewstream | overflow) = {
	return bytewriter(e, class::UNIVERSAL, utag::OCTET_STRING);
};

// Writes an integer. 'n' must be stored in big endian order. The highest bit of
// the first byte marks the sign.
export fn write_int(e: *encoder, n: []u8) (void | overflow) = {
	const neg = n[0] & 0x80 == 0x80;

	// compact according to X.690 Chapt. 8.3.2
	let i = 0z;
	for (i < len(n) - 1; i += 1) {
		if (neg && (n[i] != 0xff || n[i+1] & 0x80 != 0x80)) {
			break;
		};

		if (!neg && (n[i] != 0x00 || n[i+1] & 0x80 == 0x80)) {
			break;
		};
	};

	write_fixedprim(e, class::UNIVERSAL, utag::INTEGER, n[i..])?;
};

// Writes an integer asuming 'n' is unsigned.
export fn write_uint(e: *encoder, n: []u8) (void | overflow) = {
	if (n[0] & 0x80 == 0) {
		return write_int(e, n);
	};

	// prepend 0 so that the highest valued bit is not interpreted as sign
	create_prim(e, class::UNIVERSAL, utag::INTEGER)?;
	write(e, [0])?;
	write(e, n)?;
	finish_prim(e);
};

// Writes 's' as Utf8String.
export fn write_utf8str(e: *encoder, s: str) (void | overflow) =
	write_fixedprim(e, class::UNIVERSAL, utag::UTF8_STRING,
		strings::toutf8(s))?;

// Encodes currently written data in given memio stream and returns the buffer
// containing the result borrowed from 'mem' provided for [[derencoder]].
export fn encode(e: *encoder) ([]u8 | io::error) = {
	assert(e.btn == 0);
	assert(e.start >= 0);

	if (e.cur_prim) {
		finish_prim(e);
	};

	let n = 0z;
	let buf = memio::buffer(e.mem)[e.start..];

	// iterate entries to minify tag ids and data sizes. 't' is the write
	// index and 'i' is the read index.
	let t = 0z;
	for (let i = 0z; i < e.pos) { // TODO cast seems off
		// encode id
		const id = buf[i];
		buf[t] = id;
		t += 1;
		i += 1;

		const cons = (id >> 5) & 1 == 1;
		if ((id & 0b11111) == 0b11111) {
			// id spans multiple bytes
			let id: u8 = 0x80;
			for (id & 0x80 == 0x80) {
				id = buf[i];
				buf[t] = id;
				t += 1;
				i += 1;
			};
		};

		// encode dsz
		let dsz: datasz = 0;
		let l = buf[i];
		i += 1;
		if (l < 128) {
			// data size fits in a single byte
			dsz = l;
			buf[t] = l;
			t += 1;
		} else {
			// decode multibyte size and minimize, since not all
			// placeholder bytes may have been used.
			const dn = l & 0x7f;
			for (let j = 0z; j < dn; j += 1) {
				dsz <<= 8;
				dsz |= buf[i];
				i += 1;
			};

			let dszbuf = encode_dsz(dsz);
			buf[t..t + len(dszbuf)] = dszbuf;
			t += len(dszbuf);
		};

		if (cons) {
			continue;
		};

		// write data of primitive fields
		buf[t..t+dsz] = buf[i..i+dsz];
		t += dsz;
		i += dsz;
	};

	bytes::zero(buf[t..]);
	match (e.parent) {
	case null =>
		yield;
	case let s: *bytewstream =>
		s.e.pos += t;
	};
	return buf[..t];
};

// Encodes written data and writes it to 'dest'.
export fn encodeto(e: *encoder, dest: io::handle) (size | io::error) = {
	const buf = encode(e)?;
	return io::writeall(dest, buf)?;
};
-- 
2.43.0

[PATCH hare v3 3/6] asn1: decoder

Details
Message ID
<20240205145140.361304-3-apreiml@strohwolke.at>
In-Reply-To
<20240205145140.361304-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +1141 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
 encoding/asn1/+test/decoder_test.ha | 331 ++++++++++++
 encoding/asn1/decoder.ha            | 810 ++++++++++++++++++++++++++++
 2 files changed, 1141 insertions(+)
 create mode 100644 encoding/asn1/+test/decoder_test.ha
 create mode 100644 encoding/asn1/decoder.ha

diff --git a/encoding/asn1/+test/decoder_test.ha b/encoding/asn1/+test/decoder_test.ha
new file mode 100644
index 00000000..2282fc40
--- /dev/null
+++ b/encoding/asn1/+test/decoder_test.ha
@@ -0,0 +1,331 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use errors;
use io;
use memio;
use os;
use strings;
use time::date;
use types;


// XXX: would be nice to just declare this as mem: memio::stream
let mem: nullable *memio::stream = null;
let rbuf: [os::BUFSZ]u8 = [0...];

fn d(i: []u8) decoder = {
	let buf = memio::fixed(i);
	let h = match (mem) {
	case null =>
		let h = alloc(buf);
		mem = h;
		yield h;
	case let m: *memio::stream =>
		*m = buf;
		yield m;
	};
	return derdecoder(h);
};

@fini fn freetdec() void = {
	match (mem) {
	case null =>
		yield;
	case let m: *memio::stream =>
		free(m);
		mem = null;
	};
};

@test fn parsetag() void = {
	assert((next(&d([0x02, 0x01]))!).class == class::UNIVERSAL);
	assert((next(&d([0x02, 0x01]))!).tagid == 0x02);
	assert((next(&d([0x1e, 0x01]))!).tagid == 0x1e);
	assert((next(&d([0x1f, 0x7f, 0x01]))!).tagid == 0x7f);
	assert((next(&d([0x1f, 0x81, 0x00, 0x01]))!).tagid == 0x80);

	assert((next(&d([0x1f, 0x8f, 0xff, 0xff, 0xff, 0x7f, 0x01]))!).tagid
		== types::U32_MAX);
	assert(next(&d([0x1f, 0x90, 0x80, 0x80, 0x80, 0x00, 0x01])) is invalid);
};

@test fn parselen() void = {
	assert(dsz(next(&d([0x02, 0x1]))!) == 1);
	assert(dsz(next(&d([0x02, 0x7f]))!) == 127);
	assert(dsz(next(&d([0x02, 0x81, 0x80]))!) == 128);

	// must use minimal amount of bytes for length encoding
	assert(next(&d([0x02, 0x81, 0x01, 0x01])) is invalid);
	assert(next(&d([0x02, 0x81, 0x7f])) is invalid);
	assert(next(&d([0x02, 0x82, 0x00, 0xff])) is invalid);

	// indefinite form is not allowed in DER
	assert(next(&d([0x02, 0x80, 0x01, 0x00, 0x00])) is invalid);
};

@test fn emptydata() void = {
	assert(read_bool(&d([])) is badformat);
	assert(open_set(&d([])) is badformat);
};

@test fn seq() void = {
	let dat: [_]u8 = [
		0x30, 0x0a, // seq
		0x01, 0x01, 0xff, // bool true
		0x30, 0x05, // seq
		0x30, 0x03, // seq
		0x01, 0x01, 0x00, // bool false
	];

	let dc = &d(dat);
	open_seq(dc)!;
	assert(read_bool(dc)! == true);
	open_seq(dc)!;
	open_seq(dc)!;
	assert(read_bool(dc)! == false);
	close_seq(dc)!;
	close_seq(dc)!;
	close_seq(dc)!;
	finish(dc)!;

	let dc = &d(dat);
	open_seq(dc)!;
	assert(open_seq(dc) is invalid);

	let dc = &d(dat);
	open_seq(dc)!;
	assert(close_seq(dc) is badformat);

	let dat: [_]u8 = [
		0x30, 0x07, // seq
		0x0c, 0x05, 0x65, 0x66, 0x67, 0xc3, 0x96, // utf8 string
	];

	let dc = &d(dat);
	open_seq(dc)!;
	let r = strreader(dc, utag::UTF8_STRING)!;
	let s = io::drain(&r)!;
	defer free(s);
	assert(bytes::equal([0x65, 0x66, 0x67, 0xc3, 0x96], s));

	let dc = &d(dat);
	let buf: [4]u8 = [0...];
	open_seq(dc)!;
	let r = strreader(dc, utag::UTF8_STRING)!;
	assert(io::read(&r, buf)! == 3);
	assert(close_seq(dc) is badformat);

	// check unclosed
	let dc = &d(dat);
	open_seq(dc)!;
	assert(finish(dc) is invalid);

	let dc = &d(dat);
	open_seq(dc)!;
	let r = strreader(dc, utag::UTF8_STRING)!;
	let s = io::drain(&r)!;
	assert(finish(dc) is invalid);
};

@test fn invalid_seq() void = {
	let dat: [_]u8 = [
		0x30, 0x03, // seq containing data of size 3
		0x02, 0x03, 0x01, 0x02, 0x03, // int 0x010203 overflows seq
	];

	let dc = &d(dat);
	open_seq(dc)!;

	let buf: [3]u8 = [0...];
	assert(read_int(dc, buf) is invalid);
};

@test fn read_implicit() void = {
	let dat: [_]u8 = [
		0x30, 0x06, // seq
		0x85, 0x01, 0xff, // IMPLICIT bool true
		0x01, 0x01, 0x00, // bool false
	];

	let dc = &d(dat);
	open_seq(dc)!;
	expect_implicit(dc, class::CONTEXT, 5)!;
	assert(read_bool(dc)! == true);
	assert(read_u16(dc) is badformat);
};

@test fn read_bool() void = {
	assert(read_bool(&d([0x01, 0x01, 0xff]))!);
	assert(read_bool(&d([0x01, 0x01, 0x00]))! == false);
	assert(read_bool(&d([0x01, 0x02, 0x00, 0x00])) is invalid);
	// X.690, ch. 11.1
	assert(read_bool(&d([0x01, 0x01, 0x01])) is invalid);

	// invalid class
	assert(read_bool(&d([0x81, 0x01, 0x01])) is badformat);
	// must be primitive
	assert(read_bool(&d([0x21, 0x01, 0x01])) is invalid);
	// invalid tag
	assert(read_bool(&d([0x02, 0x01, 0x01])) is badformat);
};

@test fn read_null() void = {
	read_null(&d([0x05, 0x00]))!;
	read_null(&d([0x05, 0x01, 0x00])) is invalid;
	read_null(&d([0x85, 0x00])) is invalid;
	read_null(&d([0x01, 0x00])) is invalid;
};

@test fn read_int() void = {
	let buf: [8]u8 = [0...];

	assert(read_int(&d([0x02, 0x01, 0x01]), buf)! == 1);
	assert(buf[0] == 0x01);
	assert(read_int(&d([0x02, 0x01, 0x00]), buf)! == 1);
	assert(buf[0] == 0x00);
	assert(read_int(&d([0x02, 0x02, 0x01, 0x02]), buf)! == 2);
	assert(buf[0] == 0x01);
	assert(buf[1] == 0x02);

	// must have at least one byte
	assert(read_int(&d([0x02, 0x00]), buf) is invalid);
	// non minimal
	assert(read_int(&d([0x02, 0x02, 0x00, 0x01]), buf) is invalid);
	assert(read_int(&d([0x02, 0x02, 0xff, 0x81]), buf) is invalid);

	assert(read_u8(&d([0x02, 0x01, 0x00]))! == 0);
	assert(read_u8(&d([0x02, 0x01, 0x01]))! == 1);
	assert(read_u8(&d([0x02, 0x01, 0x7f]))! == 0x7f);
	assert(read_u8(&d([0x02, 0x01, 0x80])) is invalid);
	assert(read_u8(&d([0x02, 0x01, 0x81])) is invalid);
	assert(read_u8(&d([0x02, 0x02, 0x00, 0x80]))! == 0x80);
	assert(read_u8(&d([0x02, 0x02, 0x00, 0xff]))! == 0xff);

	assert(read_u16(&d([0x02, 0x01, 0x00]))! == 0);
	assert(read_u16(&d([0x02, 0x02, 0x0f, 0xff]))! == 0xfff);
	assert(read_u16(&d([0x02, 0x03, 0x00, 0xff, 0xff]))! == 0xffff);
	assert(read_u16(&d([0x02, 0x03, 0x01, 0xff, 0xff])) is invalid);
	assert(read_u32(&d([0x02, 0x03, 0x00, 0xff, 0xff]))! == 0xffff);

	let maxu64: [_]u8 = [
		0x02, 0x09, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
	];
	assert(read_u64(&d(maxu64))! == 0xffffffffffffffff);
	maxu64[2] = 0x01;
	assert(read_u64(&d(maxu64)) is invalid);
};

@test fn read_bitstr() void = {
	let buf: [8]u8 = [0...];
	let bs = read_bitstr(&d([0x03, 0x01, 0x00]), buf)!;
	assert(len(bs.0) == 0 && bs.1 == 0);
	assert(bitstr_isset(bs, 0)! == false);

	let bs = read_bitstr(&d([0x03, 0x02, 0x00, 0xff]), buf)!;
	assert(bytes::equal(bs.0, [0xff]) && bs.1 == 0);
	assert(bitstr_isset(bs, 0)!);
	assert(bitstr_isset(bs, 7)!);

	let bs = read_bitstr(&d([0x03, 0x03, 0x04, 0xab, 0xc0]), buf)!;
	assert(bytes::equal(bs.0, [0xab, 0xc0]) && bs.1 == 4);
	assert(bitstr_isset(bs, 0)!);
	assert(bitstr_isset(bs, 1)! == false);
	assert(bitstr_isset(bs, 8)!);
	assert(bitstr_isset(bs, 9)!);
	assert(!bitstr_isset(bs, 11)!);
	assert(bitstr_isset(bs, 12) is invalid);

	// unused bits must be zero
	assert(read_bitstr(&d([0x03, 0x03, 0x04, 0xab, 0xc1]), buf) is invalid);
	assert(read_bitstr(&d([0x03, 0x03, 0x07, 0xab, 0x40]), buf) is invalid);
};

@test fn read_oid() void = {
	assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))! == oid::ID_AT_COMMON_NAME);

	assert(bytes::equal([0x55, 0x04, 0x03],
			read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
};

let datbuf: [64]u8 = [0...];

fn newdatetime(s: str, tag: utag) []u8 = {
	let datetime = strings::toutf8(s);
	let datsz = len(datetime): u8;
	datbuf[..2] = [tag, datsz];
	datbuf[2..2 + datsz] = datetime;
	return datbuf[..2 + datsz];
};

@test fn read_utctime() void = {
	let derdatetime = newdatetime("231030133710Z", utag::UTC_TIME);
	let dt = read_utctime(&d(derdatetime), 2046)!;

	let fbuf: [24]u8 = [0...];
	assert(date::bsformat(fbuf, date::RFC3339, &dt)!
		== "2023-10-30T13:37:10+0000");

	let dt = read_utctime(&d(derdatetime), 2020)!;
	assert(date::bsformat(fbuf, date::RFC3339, &dt)!
		== "1923-10-30T13:37:10+0000");

	let derdatetime = newdatetime("2310301337100", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);

	let derdatetime = newdatetime("231030133710", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);

	let derdatetime = newdatetime("231030133a10Z", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);

	let derdatetime = newdatetime("231330133710Z", utag::UTC_TIME);
	assert(read_utctime(&d(derdatetime), 2020) is error);
};

@test fn read_gtime() void = {
	let derdatetime = newdatetime("20231030133710Z", utag::GENERALIZED_TIME);

	let dt = read_gtime(&d(derdatetime))!;

	let fbuf: [32]u8 = [0...];
	assert(date::bsformat(fbuf, date::RFC3339, &dt)!
		== "2023-10-30T13:37:10+0000");

	let derdatetime = newdatetime("20231030133710.1Z", utag::GENERALIZED_TIME);
	let dt = read_gtime(&d(derdatetime))!;
	assert(date::bsformat(fbuf, date::STAMP_NANO, &dt)!
		== "2023-10-30 13:37:10.100000000");

	// must end with Z
	let derdatetime = newdatetime("20231030133710", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);
	let derdatetime = newdatetime("202310301337100", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	// seconds must always be present
	let derdatetime = newdatetime("202310301337", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);
	let derdatetime = newdatetime("202310301337Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	// fractional seconds must not end with 0. must be ommitted if 0
	let derdatetime = newdatetime("20231030133710.", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.0", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.0Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	let derdatetime = newdatetime("20231030133710.10Z", utag::GENERALIZED_TIME);
	assert(read_gtime(&d(derdatetime)) is error);

	// TODO midnight is YYYYMMDD000000Z
};
diff --git a/encoding/asn1/decoder.ha b/encoding/asn1/decoder.ha
new file mode 100644
index 00000000..78c01ca9
--- /dev/null
+++ b/encoding/asn1/decoder.ha
@@ -0,0 +1,810 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bufio;
use bytes;
use errors;
use io;
use math::{bit_size_u8};
use os;
use strings;
use time::date;
use types;


def TAGMASK: u8 = 0x1f;
def MAX_CONS_DEPTH: size = 32;

// Each DER entry starts with an header that describes the content.
export type head = struct {

	// Tells whether the data is constructed and encapsulates multiple
	// other data fields; or primitive and the value follows.
	cons: bool,

	// Class info
	class: class,

	// Tag id of the data
	tagid: u32,

	// Start position in stream
	start: size,

	// Start position of data in stream
	data: size,

	// End position in stream
	end: size,

	implicit: bool,
};

fn head_endpos(d: head) size = d.end;

// Size of current element (header size + data size)
export fn sz(d: head) size = d.end - d.start;

// Size of the encoded data.
export fn dsz(d: head) size = d.end - d.data;

export type decoder = struct {
	src: io::handle,
	pos: size,
	cstack: [MAX_CONS_DEPTH]head,
	cstackp: size,
	next: (void | head),
	cur: (void | head),
	unbuf: [3]u8,
	unbufn: u8,
	implicit: bool,
};

// Creates a new DER decoder that reads from 'src'. The decoder will do a lot of
// short reads, hence a buffered stream is recommended.
//
// Each entry must be read to the end, before the next one is attended to.
// [[finish]] must be called at the end to make sure everything is read.
export fn derdecoder(src: io::handle) decoder = {
	return decoder {
		src = src,
		pos = 0,
		cstackp = 0,
		cur = void,
		next = void,
		implicit = false,
		...
	};
};

export fn finish(d: *decoder) (void | error) = {
	if (d.cstackp != 0 || d.next is head) return invalid;
	match (d.cur) {
	case void =>
		return;
	case let h: head =>
		if (h.end != d.pos) return invalid;
	};
};

// Returns last opened cons or void if none is open.
fn curcons(d: *decoder) (void | head) = {
	if (d.cstackp == 0) {
		return;
	};
	return d.cstack[d.cstackp-1];
};

// Peeks the header of the next data field. Fails with [[badformat]] if no data
// follows.
export fn peek(d: *decoder) (head | error) = {
	match (trypeek(d)?) {
	case io::EOF =>
		return badformat;
	case let h: head =>
		return h;
	};
};

// Tries to peek the header of the next data and returns EOF, if none exists.
export fn trypeek(d: *decoder) (head | error | io::EOF) = {
	if (!(d.next is void)) {
		return d.next: head;
	};

	if (is_endofcons(d)) return io::EOF;

	match (parse_header(d)?) {
	case io::EOF =>
		const unreaddata = d.unbufn > 0;
		if (d.cstackp != 0 || unreaddata) {
			return badformat;
		};
		return io::EOF;
	case let dh: head =>
		d.next = dh;
		return dh;
	};
};

// Cons is open and end is reached.
fn is_endofcons(d: *decoder) bool = {
	match (curcons(d)) {
	case void =>
		return false;
	case let cur: head =>
		return d.pos == head_endpos(cur);
	};
};

// Returns the next data element or [[badformat]] on EOF.
fn next(d: *decoder) (head | error) = {
	match (trynext(d)?) {
	case io::EOF =>
		return badformat;
	case let dh: head =>
		return dh;
	};
};

fn trynext(d: *decoder) (head | error | io::EOF) = {
	if (d.next is head) {
		let dh = d.next: head;
		d.cur = dh;
		d.next = void;
		dh.implicit = d.implicit;
		d.implicit = false;
		return dh;
	};

	if (is_endofcons(d)) return io::EOF;

	let dh = match (parse_header(d)?) {
	case io::EOF =>
		return io::EOF;
	case let dh: head =>
		yield dh;
	};

	d.cur = dh;
	dh.implicit = d.implicit;
	d.implicit = false;
	return dh;
};

fn parse_header(d: *decoder) (head | error | io::EOF) = {
	const consend = match (curcons(d)) {
	case void =>
		yield types::SIZE_MAX;
	case let h: head =>
		yield h.end;
	};

	if (d.pos == consend) return invalid;

	const epos = d.pos;
	const id = match (tryscan_byte(d)?) {
	case io::EOF =>
		d.cur = void;
		return io::EOF;
	case let id: u8 =>
		yield id;
	};

	const class = ((id & 0xc0) >> 6): class;

	let tagid: u32 = id & TAGMASK;
	if (tagid == TAGMASK) {
		tagid = parse_longtag(d, consend - d.pos)?;
	};
	const l = parse_len(d, consend - d.pos)?;
	const hl = d.pos - epos;

	const end = epos + hl + l;
	if (end > consend) return invalid;

	return head {
		class = class,
		cons = ((id >> 5) & 1) == 1,
		tagid = tagid,
		start = epos,
		data = epos + hl,
		end = end,
		implicit = d.implicit,
		...
	};
};

fn tryscan_byte(d: *decoder) (u8 | io::EOF | error) = {
	let buf: [1]u8 = [0...];
	match (io::readall(d.src, buf)?) {
	case io::EOF =>
		return io::EOF;
	case size =>
		d.pos += 1;
		return buf[0];
	};
};

fn scan_byte(d: *decoder) (u8 | error) = {
	match (tryscan_byte(d)?) {
	case io::EOF =>
		return truncated;
	case let b: u8 =>
		return b;
	};
};

// Reads data of current entry and advances pointer. Data must have been opened
// using [[next]] or [[trynext]]. EOF is returned on end of data.
fn dataread(d: *decoder, buf: []u8) (size | io::EOF | io::error) = {
	let cur = match (d.cur) {
	case void =>
		abort("primitive must be opened with [[next]] or [[trynext]]");
	case let dh: head =>
		yield dh;
	};

	const dataleft = head_endpos(cur) - d.pos + d.unbufn;
	if (dataleft == 0) {
		return io::EOF;
	};

	let n = 0z;
	if (d.unbufn > 0) {
		const max = if (d.unbufn > len(buf)) len(buf): u8 else d.unbufn;
		buf[..max] = d.unbuf[..max];
		d.unbufn -= max;
		n += max;
	};

	const max = if (dataleft < len(buf) - n) dataleft else len(buf) - n;

	match (io::read(d.src, buf[n..n + max])?) {
	case io::EOF =>
		// there should be data left
		return wrap_err(truncated);
	case let sz: size =>
		d.pos += sz;
		return n + sz;
	};
};

// unread incomplete utf8 runes.
fn dataunread(d: *decoder, buf: []u8) void = {
	assert(len(buf) + d.unbufn <= len(d.unbuf));

	d.unbuf[d.unbufn..d.unbufn + len(buf)] = buf;
	d.unbufn += len(buf): u8;
};

fn dataeof(d: *decoder) bool = {
	match (d.cur) {
	case void =>
		return true;
	case let h: head =>
		return d.pos + d.unbufn == head_endpos(h);
	};
};

fn parse_longtag(p: *decoder, max: size) (u32 | error) = {
	// XXX: u32 too much?
	let tag: u32 = 0;
	let maxbits = size(u32) * 8;
	let nbits = 0z;

	for (let i = 0z; i < max; i += 1) {
		let b = scan_byte(p)?;
		const part = b & 0x7f;

		nbits += if (tag == 0) bit_size_u8(part) else 7;
		if (nbits > maxbits) {
			// overflows u32
			return invalid;
		};

		tag = (tag << 7) + part;
		if (tag == 0) {
			// first tag part must not be 0
			return invalid;
		};

		if ((b >> 7) == 0) {
			return tag;
		};
	};
	return invalid; // max has been reached
};

fn parse_len(p: *decoder, max: size) (size | error) = {
	if (max == 0) return invalid;

	const b = scan_byte(p)?;
	if (b == 0xff) {
		return invalid;
	};
	if (b >> 7 == 0) {
		// short form
		return b: size;
	};

	let l = 0z;
	const n = b & 0x7f;
	if (n == 0) {
		// Indefinite encoding is not supported in DER.
		return invalid;
	};

	if (n > size(size)) {
		// would cause a size overflow
		return invalid;
	};

	if (n + 1 > max) return invalid;

	for (let i = 0z; i < n; i += 1) {
		const b = scan_byte(p)?;
		l = (l << 8) + b;
		if (l == 0) {
			// Leading zeroes means minimum number of bytes for
			// length encoding has not been used.
			return invalid;
		};
	};

	if (l <= 0x7f) {
		// Could've used short form.
		return invalid;
	};

	return l;
};

// Expects an IMPLICIT defined data field having class 'c' and tag 'tag'.
// If the requirements meet, a read function (read_{*} or {*}reader) must
// follow, that defines and reads the actual data as its stored.
export fn expect_implicit(d: *decoder, c: class, tag: u32) (void | error) = {
	let h = peek(d)?;
	expect_tag(h, c, tag)?;
	d.implicit = true;
};

// Opens an EXPLICIT encoded field of given class 'c' and 'tag'. The user must
// call [[close_explicit]] after containing data has been read.
export fn open_explicit(d: *decoder, c: class, tag: u32) (void | error) =
	open_cons(d, c, tag);

// Closes an EXPLICIT encoded field.
export fn close_explicit(d: *decoder) (void | badformat) = close_cons(d);


// Opens a constructed value of given 'class' and 'tagid'. Fails if not a
// constructed value or it has an unexpected tag.
fn open_cons(d: *decoder, class: class, tagid: u32) (void | error) = {
	let dh = next(d)?;
	if (!dh.cons) {
		return invalid;
	};

	expect_tag(dh, class, tagid)?;

	if (d.cstackp == len(d.cstack)) {
		return badformat;
	};

	d.cstack[d.cstackp] = dh;
	d.cstackp += 1;
};

// Closes current constructed value. badformat is returend, if not all data has
// been read.
fn close_cons(d: *decoder) (void | badformat) = {
	if (d.implicit) {
		// a datafield marked implicit has not been read
		return badformat;
	};

	match (curcons(d)) {
	case void =>
		abort("No constructed value open");
	case let h: head =>
		if (d.pos != head_endpos(h) || d.unbufn > 0) {
			// All data must have been read before closing the seq
			return badformat;
		};
	};

	d.cstackp -= 1;
};

// Opens a sequence
export fn open_seq(d: *decoder) (void | error) =
	open_cons(d, class::UNIVERSAL, utag::SEQUENCE: u32)?;

// Closes current sequence. [[badformat]] is returned, if not all data has
// been read.
export fn close_seq(d: *decoder) (void | badformat) = close_cons(d);

// Opens a set. Though a set must be sorted according to DER, the order will not
// be validated.
export fn open_set(d: *decoder) (void | error) =
	open_cons(d, class::UNIVERSAL, utag::SET: u32)?;

// Closes current set. [[badformat]] is returend, if not all data has been read.
export fn close_set(d: *decoder) (void | badformat) = close_cons(d);

fn expect_tag(h: head, class: class, tagid: u32) (void | invalid | badformat) = {
	if (class == class::UNIVERSAL && (tagid == utag::SEQUENCE
			|| tagid == utag::SET) && !h.cons) {
		return invalid;
	};

	if (h.implicit) {
		return;
	};

	if (h.class != class || h.tagid != tagid) {
		return badformat;
	};
};

fn expect_utag(dh: head, tag: utag) (void | invalid | badformat) =
	expect_tag(dh, class::UNIVERSAL, tag: u32);

fn read_bytes(d: *decoder, buf: []u8) (size | error) = {
	match (dataread(d, buf)) {
	case io::EOF =>
		return 0z;
	case let n: size =>
		if (!dataeof(d)) {
			return badformat;
		};
		return n;
	};
};

fn read_nbytes(d: *decoder, buf: []u8) (size | error) = {
	const n = read_bytes(d, buf)?;
	if (n != len(buf)) {
		return badformat;
	};
	return n;
};

// Read a boolean.
export fn read_bool(d: *decoder) (bool | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::BOOLEAN)?;
	if (dsz(dh) != 1) {
		return invalid;
	};

	let b = scan_byte(d)?;

	if (b != 0x00 && b != 0xff) {
		return invalid;
	};

	return b == 0xff;
};

fn validate_intprefix(i: []u8) (void | error) = {
	switch (len(i)) {
	case 0 =>
		return invalid;
	case 1 =>
		return;
	case =>
		// An int must be encoded using the minimal number of bytes
		// possible as defined in X.690 s8.3.2
		if ((i[0] == 0x00 && i[1] >> 7 == 0)
			|| (i[0] == 0xff && i[1] >> 7 == 1)) {
			return invalid;
		};
	};
};

// Read an integer into 'buf'. Fails if size exceeds the buffer size. The
// integer is stored in big endian format. Negative values are stored as a
// twos complement. The minimum integer size is one byte.
export fn read_int(d: *decoder, buf: []u8) (size | error) = {
	assert(len(buf) > 0);

	let dh = next(d)?;
	expect_utag(dh, utag::INTEGER)?;
	const n = read_bytes(d, buf)?;
	validate_intprefix(buf[..n])?;
	return n;
};

// Similar to [[read_int]], but fails if it's not an unsigned integer. Will
// left trim 0 bytes.
export fn read_uint(d: *decoder, buf: []u8) (size | error) = {
	let s = read_int(d, buf)?;
	if (buf[0] & 0x80 == 0x80) {
		return badformat;
	};
	if (buf[0] == 0) {
		buf[..s-1] = buf[1..s];
		s -= 1;
	};
	return s;
};

fn read_ux(d: *decoder, x: u8) (u64 | error) = {
	assert(x <= 8);
	let b: [9]u8 = [0...];
	const n = read_int(d, b[..x+1])?;

	if (b[0] & 0x80 != 0) {
		// sign bit is set
		return invalid;
	};

	const s = if (b[0] == 0x00) 1u8 else 0u8;
	if (n - s > x) {
		return invalid;
	};

	let r = 0u64;
	for (let i = s; i < n; i += 1) {
		r <<= 8;
		r += b[i];
	};
	return r;
};

// Reads an integer that is expected to fit into u8.
export fn read_u8(d: *decoder) (u8 | error) = read_ux(d, 1)?: u8;

// Reads an integer that is expected to fit into u16.
export fn read_u16(d: *decoder) (u16 | error) = read_ux(d, 2)?: u16;

// Reads an integer that is expected to fit into u32.
export fn read_u32(d: *decoder) (u32 | error) = read_ux(d, 4)?: u32;

// Reads an integer that is expected to fit into u64.
export fn read_u64(d: *decoder) (u64 | error) = read_ux(d, 8)?;

// Reads a bitstring value. The result tuple contains the bitstring and the
// number of unused bits in the last byte. The [[bitstr_isset]] function may be
// used to check for set bits.
export fn read_bitstr(d: *decoder, buf: []u8) (([]u8, u8) | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::BITSTRING)?;

	let unused: [1]u8 = [0...];
	match (dataread(d, unused)?) {
	case io::EOF =>
		return invalid;
	case let n: size =>
		if (n != 1) {
			return invalid;
		};
	};
	const unused = unused[0];
	if (unused > 7) {
		return invalid;
	};

	const n = read_bytes(d, buf)?;
	const mask = (1 << unused) - 1;
	if (n > 0 && buf[n-1] & mask != 0) {
		// unused bits must be zero
		return invalid;
	};
	return (buf[..n], unused);
};

// Checks whether bit at 'pos' is set in given bitstring. 'pos' starts from 0,
// which is the highest order bit in the first byte.
export fn bitstr_isset(bitstr: ([]u8, u8), pos: size) (bool | invalid) = {
	const i = pos / 8;
	if (i >= len(bitstr.0)) {
		return false;
	};
	let b = bitstr.0[i];

	const j = pos - i * 8;
	if (i == len(bitstr.0) - 1 && j >= (8 - bitstr.1)) {
		return invalid;
	};
	const mask = (1 << (7 - j));
	return mask & b == mask;
};

// Returns an [[io::reader]] for octet string data.
// TODO add limit?
export fn octetstrreader(d: *decoder) (bytestream | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::OCTET_STRING)?;
	return newbytereader(d);
};

// Read an octet string into 'buf'. Fails if 'buf' is to small.
export fn read_octetstr(d: *decoder, buf: []u8) (size | error) = {
	assert(len(buf) > 0);

	let dh = next(d)?;
	expect_utag(dh, utag::OCTET_STRING)?;
	return read_bytes(d, buf);
};

// Reads a null entry.
export fn read_null(d: *decoder) (void | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::NULL)?;
	if (dsz(dh) != 0) {
		return invalid;
	};
};

export type bytestream = struct {
	stream: io::stream,
	d: *decoder,
};

fn newbytereader(d: *decoder) bytestream = {
	return bytestream {
		stream = &bytestream_vtable,
		d = d,
		...
	};
};

const bytestream_vtable: io::vtable = io::vtable {
	reader = &bytestream_reader,
	...
};

fn bytestream_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) =
	dataread((s: *bytestream).d, buf);

// Returns an [[io::reader]] that allows to read the raw data in its encoded
// form. Note that this reader won't do any kind of validation.
export fn bytereader(d: *decoder, c: class, tagid: u32) (bytestream | error) = {
	let dh = next(d)?;
	expect_tag(dh, c, tagid)?;
	return newbytereader(d);
};

// Reads an UTC time. Since the stored date only has a two digit year, 'maxyear'
// is required to define the epoch switch. For example 'maxyear' = 2046 causes
// all encoded years <= 46 to be after 2000 and all values > 46 will have 1900
// as the century.
export fn read_utctime(d: *decoder, maxyear: u16) (date::date | error) = {
	assert(maxyear > 100);

	let dh = next(d)?;
	expect_utag(dh, utag::UTC_TIME)?;

	let time: [13]u8 = [0...];
	read_nbytes(d, time[..])?;

	if (time[len(time)-1] != 'Z') {
		return invalid;
	};

	let year: u16 = (time[0] - 0x30): u16 * 10 + (time[1] - 0x30): u16;
	let cent = maxyear - (maxyear % 100);
	if (year > maxyear % 100) {
		cent -= 100;
	};

	let v = date::newvirtual();
	v.year = (year + cent): int;
	v.zoff = 0;
	v.nanosecond = 0;

	let datestr = strings::fromutf8(time[2..])!;
	if (!(date::parse(&v, "%m%d%H%M%S%Z", datestr) is void)) {
		return invalid;
	};

	let dt = match (date::realize(v)) {
	case let dt: date::date =>
		yield dt;
	case let e: (date::insufficient | date::invalid) =>
		return invalid;
	};

	return dt;
};

// Reads a generalized datetime.
export fn read_gtime(d: *decoder) (date::date | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::GENERALIZED_TIME)?;

	// The date begins with the encoded datetime
	def DATESZ = 14z;
	// followed by optional fractional seconds separated by '.'
	def NANOSZ = 10z;
	def NANOSEPPOS = 14;
	// and ends with the zone info 'Z'
	def ZONESZ = 1z;

	let time: [DATESZ + NANOSZ + ZONESZ]u8 = [0...];
	let n = read_bytes(d, time[..])?;

	// zone info and seconds must always be present
	if (time[n-1] != 'Z' || n < DATESZ + ZONESZ) {
		return invalid;
	};

	// validate fractional seconds
	if (n > DATESZ + ZONESZ) {
		// fractional seconds must not be empty
		if (time[NANOSEPPOS] != '.' || n == DATESZ + ZONESZ + 1) {
			return invalid;
		};
		// fractional seconds must not end with 0 and must be > 0
		if (time[n-2] == '0') return invalid;
	};

	// right pad fractional seconds to make them valid nanoseconds
	time[n-1..] = ['0'...];
	time[NANOSEPPOS] = '.';

	match (date::from_str("%Y%m%d%H%M%S.%N", strings::fromutf8(time)!)) {
	case let d: date::date =>
		return d;
	case let e: date::error =>
		return invalid;
	};
};

// Skips an element and returns the size of the data that has been skipped.
// Returns an error, if the skipped data is invalid.
export fn skip(d: *decoder, tag: utag, max: size) (size | error) = {
	static let buf: [os::BUFSZ]u8 = [0...];
	let s = 0z;
	switch (tag) {
	case utag::BOOLEAN =>
		read_bool(d)?;
		return 1z;
	case utag::INTEGER =>
		let br = bytereader(d, class::UNIVERSAL, utag::INTEGER)?;
		let n = match (io::read(&br, buf)?) {
		case let n: size =>
			yield n;
		case io::EOF =>
			return invalid;
		};
		validate_intprefix(buf[..n])?;
		n += streamskip(&br, max, buf)?;
		return n;
	case utag::NULL =>
		read_null(d)?;
		return 0z;
	case utag::OCTET_STRING =>
		let r = octetstrreader(d)?;
		return streamskip(&r, max, buf)?;
	case utag::BITSTRING =>
		assert(max <= len(buf));
		let buf = buf[..max];
		let p = read_bitstr(d, buf)?;
		bytes::zero(p.0);
		return len(p.0) + 1;
	case =>
		abort("skip for given utag not implemented");
	};
};

fn streamskip(r: io::handle, max: size, buf: []u8) (size | error) = {
	defer bytes::zero(buf);
	let buf = if (max < len(buf)) buf[..max] else buf[..];
	let s = 0z;
	for (true) {
		match (io::read(r, buf)?) {
		case let n: size =>
			s += n;
		case io::EOF =>
			return s;
		};

		if (s > max) {
			return badformat;
		};
	};
};
-- 
2.43.0

[PATCH hare v3 4/6] asn1: oid support

Details
Message ID
<20240205145140.361304-4-apreiml@strohwolke.at>
In-Reply-To
<20240205145140.361304-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +534 -7
encoding/asn1/stdoid/db.txt contains all oids required by the stdlib. It
is in a separate module. Project specific oid databases can be created
with cmd/genoiddb.

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---

v3: Rework to allow different oid databases in different modules.

 cmd/genoiddb/main.ha                | 209 ++++++++++++++++++++++++++++
 encoding/asn1/+test/decoder_test.ha |   7 -
 encoding/asn1/oid.ha                | 147 +++++++++++++++++++
 encoding/asn1/stdoid/db.ha          | 127 +++++++++++++++++
 encoding/asn1/stdoid/db.txt         |  51 +++++++
 5 files changed, 534 insertions(+), 7 deletions(-)
 create mode 100644 cmd/genoiddb/main.ha
 create mode 100644 encoding/asn1/oid.ha
 create mode 100644 encoding/asn1/stdoid/db.ha
 create mode 100644 encoding/asn1/stdoid/db.txt

diff --git a/cmd/genoiddb/main.ha b/cmd/genoiddb/main.ha
new file mode 100644
index 00000000..5ce36ccb
--- /dev/null
+++ b/cmd/genoiddb/main.ha
@@ -0,0 +1,209 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use ascii;
use bufio;
use fmt;
use io;
use os;
use strconv;
use strings;
use types;

type entry = struct {
	name: str,
	val: str,
	idx: size,
};

// Parses an oid database from stdin and writes the database as hare code to
// stdout.
export fn main() void = {
	let oids = parse_oids();
	defer free_oids(oids);

	fmt::println("// SPDX-License-Identifier: MPL-2.0\n"
		"// (c) Hare authors <https://harelang.org>\n"
		"// This is an auto generated file. Do not edit.\n"
		"\n"
		"use encoding::asn1;\n")!;

	fmt::println("const _db = asn1::oiddb {")!;

	write_db(os::stdout, oids)!;

	fmt::println("\tnames = [")!;
	for (let i = 0z; i < len(oids); i += 1) {
		fmt::printfln("\t\t\"{}\",", oids[i].name)!;
	};
	fmt::println("\t],")!;
	fmt::println("};\n")!;

	fmt::println("export const db = &_db;\n")!;

	for (let i = 0z; i < len(oids); i += 1) {
		fmt::print("export def ")!;
		write_varname(os::stdout, oids[i].name)!;
		fmt::printfln(": asn1::oid = {};", i)!;
	};
};

fn parse_oids() []entry = {
	let s = bufio::newscanner(os::stdin, types::SIZE_MAX);
	defer bufio::finish(&s);
	let oids: []entry = [];

	for (true) {
		const l = match (bufio::scan_line(&s)!) {
		case io::EOF =>
			break;
		case let s: const str =>
			yield s;
		};

		if (l == "" || strings::hasprefix(l, '#')) {
			continue;
		};


		const p = strings::split(l, " ");
		defer free(p);
		const name = p[0];
		const val = p[len(p)-1];

		append(oids, entry {
			name = strings::dup(name),
			val = strings::dup(val),
			...
		});
	};

	return oids;
};

fn free_oids(oids: []entry) void = {
	for (let i = 0z; i < len(oids); i += 1) {
		free(oids[i].name);
		free(oids[i].val);
	};

	free(oids);
};

fn write_db(h: io::handle, oids: []entry) (void | io::error) = {
	fmt::print("\tlut = [")?;

	const maxcols = 12z;
	let idx = 0z;

	for (let i = 0z; i < len(oids); i += 1) {
		let e = &oids[i];
		e.idx = idx;

		let der = oidtoder(e.val);
		assert(len(der) <= 0xff);
		insert(der[0], len(der): u8);
		defer free(der);

		for (let j = 0z; j < len(der); j += 1) {
			fmt::print(if (idx % maxcols == 0) "\n\t\t" else " ")?;
			fmt::printf("0x{:.2x},", der[j])?;
			idx += 1;
		};
	};
	fmt::println("\n\t],")?;

	const maxcols = 9z;
	fmt::print("\tindex = [")?;
	for (let i = 0z; i < len(oids); i += 1) {
		fmt::print(if (i % maxcols == 0) "\n\t\t" else " ")?;
		fmt::printf("0x{:.4x},", oids[i].idx)?;
	};
	fmt::println("\n\t],")?;
};

fn oidtoder(oid: str) []u8 = {
	let nums = oidtou64s(oid);
	defer free(nums);

	let der: []u8 = alloc([0...], 1);
	assert(nums[0] <= 6);
	assert(nums[1] < 40);
	der[0] = nums[0]: u8 * 40 + nums[1]: u8;
	let end = 1z;

	for (let i = 2z; i < len(nums); i += 1) {
		let n = nums[i];
		if (n == 0) {
			insert(der[end], 0u8);
			end = len(der);
			continue;
		};

		let first = true;
		for (n > 0) {
			let p: u8 = n: u8 & 0x7f;
			n >>= 7;
			if (first) {
				first = false;
			} else {
				p |= 0x80;
			};
			insert(der[end], p);
		};

		end = len(der);
	};

	return der;
};

fn oidtou64s(oid: str) []u64 = {
	let nums = strings::tokenize(oid, ".");
	let intnums: []u64 = [];

	for (true) {
		match (strings::next_token(&nums)) {
		case let s: str =>
			append(intnums, strconv::stou64(s)!);
		case void =>
			break;
		};
	};

	return intnums;
};

fn write_varname(h: io::handle, name: str) (void | io::error) = {
	// assume that names are in ascii
	let i = strings::iter(name);
	let prevlow = false;
	for (true) {
		match (strings::next(&i)) {
		case void =>
			break;
		case let r: rune =>
			let r = if (r == '-') {
				prevlow = false;
				yield '_';
			} else if (ascii::isdigit(r)) {
				prevlow = true;
				yield r;
			} else if (ascii::isupper(r)) {
				if (prevlow) {
					fmt::fprint(h, "_")?;
					prevlow = false;
				};
				yield r;
			} else if (ascii::islower(r)) {
				prevlow = true;
				yield ascii::toupper(r);
			} else {
				fmt::fatalf("Unexpected character in oid name: {}", r);
			};

			fmt::fprint(h, r)?;
		};
	};
};

diff --git a/encoding/asn1/+test/decoder_test.ha b/encoding/asn1/+test/decoder_test.ha
index 2282fc40..a6184bde 100644
--- a/encoding/asn1/+test/decoder_test.ha
+++ b/encoding/asn1/+test/decoder_test.ha
@@ -243,13 +243,6 @@ fn d(i: []u8) decoder = {
	assert(read_bitstr(&d([0x03, 0x03, 0x07, 0xab, 0x40]), buf) is invalid);
};

@test fn read_oid() void = {
	assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))! == oid::ID_AT_COMMON_NAME);

	assert(bytes::equal([0x55, 0x04, 0x03],
			read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
};

let datbuf: [64]u8 = [0...];

fn newdatetime(s: str, tag: utag) []u8 = {
diff --git a/encoding/asn1/oid.ha b/encoding/asn1/oid.ha
new file mode 100644
index 00000000..414fa4ec
--- /dev/null
+++ b/encoding/asn1/oid.ha
@@ -0,0 +1,147 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bytes;
use errors;
use fmt;
use io;
use math::{divu};
use memio;
use strings;


// An oid database that contains a lookup table of known oids in the DER format.
// A database of oids required by the standard library can be found in
// [[encoding::asn1::stdoid]].
//
// If the standard oid database is missing entries for the given use case, an
// individual database can be generated using the genoiddb command found in
// cmd/. Take a look at encoding/asn1/stdoid/db.txt for an example database
// file.
export type oiddb = struct {
	lut: []u8,
	index: []size,
	names: []str,
};

// Numeric id of an oid which is unique within an [[oiddb]].
export type oid = u32;

// Reads an oid if present in 'db'. Returns [[badformat]] if the oid is unknown.
export fn read_oid(d: *decoder, db: *oiddb) (oid | error) = {
	let raw = read_rawoid(d)?;

	match (oid_from_der(db, raw)) {
	case let o: oid =>
		return o;
	case =>
		return badformat;
	};
};

// Reads any [[oid]] and returns the DER encoded form. The returned value is
// borrowed from a static buffer.
export fn read_rawoid(d: *decoder) ([]u8 | error) = {
	def OIDBUFSZ: size = 64; // estimated
	static let oidbuf: [OIDBUFSZ]u8 = [0...];

	const dh = next(d)?;
	expect_utag(dh, utag::OID)?;
	if (dsz(dh) < 2) {
		return invalid;
	};
	const n = read_bytes(d, oidbuf)?;
	return oidbuf[..n];
};

@test fn read_oid() void = {
	let db = oiddb {
		lut = [0x03, 0x2b, 0x65, 0x70, 0x03, 0x55, 0x04, 0x03],
		index = [0, 4],
		names = ["ed25519", "id-at-commonName"],
	};

	assert(read_oid(&d([0x06, 0x03, 0x55, 0x04, 0x03]), &db)! == 1);
	assert(stroid(&db, 1) == "id-at-commonName");

	assert(bytes::equal([0x55, 0x04, 0x03],
			read_rawoid(&d([0x06, 0x03, 0x55, 0x04, 0x03]))!));
};

// Writes given [[oid]] from the [[oiddb]] 'db'.
export fn write_oid(e: *encoder, db: *oiddb, oid: oid) (void | overflow) = {
	let doid = oid_to_der(db, oid);
	write_fixedprim(e, class::UNIVERSAL, utag::OID, doid)?;
};

// Looks up DER encoded oid 'raw' in 'db' and returns an [[oid]] if found, or
// void otheriwse.
export fn oid_from_der(db: *oiddb, raw: []u8) (void | oid) = {
	for (let i = 0z; i < len(db.index); i += 1) {
		const off = db.index[i];
		const l = db.lut[off];
		if (bytes::equal(raw, db.lut[off + 1..off + 1 + l])) {
			return i: oid;
		};
	};
};

// Borrows the DER representation of a known oid from 'db'.
export fn oid_to_der(db: *oiddb, o: oid) []u8 = {
	const off = db.index[o];
	const l = db.lut[off];
	return db.lut[off + 1..off + 1 + l];
};

// Looks up a str representation of an oid from the database.
export fn stroid(db: *oiddb, o: oid) str = {
	return db.names[o];
};

// Returns the dot id as string. The caller must free returned value. This
// function may fail if the oid overflows the internal buffer, or an invalid
// value is provided.
export fn strrawoid(der: []u8) (str | io::error) = {
	let s = memio::dynamic();
	let ok = false;
	defer if (!ok) io::close(&s)!;

	if (len(der) < 1) {
		return errors::invalid;
	};

	const (a, b) = divu(0, der[0], 40);
	fmt::fprintf(&s, "{}.{}", a, b)?;

	let j = 2z;
	let el = 0u32;
	let bits: int = size(u32): int * 8;

	for (let i = 1z; i < len(der); i += 1) {
		el += der[i] & 0x7f;

		if (der[i] & 0x80 != 0) {
			if (bits - 7 < 0) {
				return errors::overflow;
			};
			el <<= 7;
			bits -= 7;
		} else {
			fmt::fprintf(&s, ".{}", el)?;
			el = 0;
			j += 1;
			bits = size(u32): int * 8;
		};
	};

	ok = true;
	return memio::string(&s)!;
};

@test fn strrawoid() void = {
	let der: [_]u8 = [0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01];
	let s = strrawoid(der)!;
	defer free(s);
	assert(s == "1.2.840.113549.1.1.1");
};

diff --git a/encoding/asn1/stdoid/db.ha b/encoding/asn1/stdoid/db.ha
new file mode 100644
index 00000000..3bef778c
--- /dev/null
+++ b/encoding/asn1/stdoid/db.ha
@@ -0,0 +1,127 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>
// This is an auto generated file. Do not edit.

use encoding::asn1;

const _db = asn1::oiddb {
	lut = [
		0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x03, 0x2b,
		0x65, 0x70, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05,
		0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0e, 0x09, 0x2a,
		0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b, 0x09, 0x2a, 0x86, 0x48,
		0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0c, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7,
		0x0d, 0x01, 0x01, 0x0d, 0x05, 0x2b, 0x0e, 0x03, 0x02, 0x1a, 0x09, 0x60,
		0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x09, 0x60, 0x86, 0x48,
		0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65,
		0x03, 0x04, 0x02, 0x02, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04,
		0x02, 0x03, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x02, 0x01, 0x08, 0x2a,
		0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07, 0x08, 0x2a, 0x86, 0x48, 0xce,
		0x3d, 0x03, 0x01, 0x07, 0x05, 0x2b, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2b,
		0x81, 0x04, 0x00, 0x23, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x01,
		0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x03, 0x01, 0x08, 0x2a, 0x86,
		0x48, 0xce, 0x3d, 0x04, 0x03, 0x02, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d,
		0x04, 0x03, 0x03, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x04, 0x03, 0x04,
		0x03, 0x55, 0x04, 0x03, 0x03, 0x55, 0x04, 0x04, 0x03, 0x55, 0x04, 0x05,
		0x03, 0x55, 0x04, 0x06, 0x03, 0x55, 0x04, 0x07, 0x03, 0x55, 0x04, 0x08,
		0x03, 0x55, 0x04, 0x0a, 0x03, 0x55, 0x04, 0x0b, 0x03, 0x55, 0x04, 0x0c,
		0x03, 0x55, 0x04, 0x2a, 0x03, 0x55, 0x04, 0x2b, 0x03, 0x55, 0x04, 0x2b,
		0x03, 0x55, 0x04, 0x2e, 0x03, 0x55, 0x04, 0x41, 0x0a, 0x09, 0x92, 0x26,
		0x89, 0x93, 0xf2, 0x2c, 0x64, 0x01, 0x19, 0x03, 0x55, 0x1d, 0x0f, 0x03,
		0x55, 0x1d, 0x11, 0x03, 0x55, 0x1d, 0x13, 0x03, 0x55, 0x1d, 0x25,
	],
	index = [
		0x0000, 0x000a, 0x000e, 0x0018, 0x0022, 0x002c, 0x0036, 0x0040, 0x0046,
		0x0050, 0x005a, 0x0064, 0x006e, 0x0076, 0x007f, 0x0088, 0x008e, 0x0094,
		0x009c, 0x00a5, 0x00ae, 0x00b7, 0x00c0, 0x00c4, 0x00c8, 0x00cc, 0x00d0,
		0x00d4, 0x00d8, 0x00dc, 0x00e0, 0x00e4, 0x00e8, 0x00ec, 0x00f0, 0x00f4,
		0x00f8, 0x0103, 0x0107, 0x010b, 0x010f,
	],
	names = [
		"rsaEncryption",
		"ed25519",
		"sha1WithRSAEncryption",
		"sha224WithRSAEncryption",
		"sha256WithRSAEncryption",
		"sha384WithRSAEncryption",
		"sha512WithRSAEncryption",
		"id-sha1",
		"id-sha224",
		"id-sha256",
		"id-sha384",
		"id-sha512",
		"id-ecPublicKey",
		"prime256v1",
		"ansix9p256r1",
		"ansix9p384r1",
		"ansix9p521r1",
		"ecdsa-with-SHA1",
		"ecdsa-with-SHA224",
		"ecdsa-with-SHA256",
		"ecdsa-with-SHA384",
		"ecdsa-with-SHA512",
		"id-at-commonName",
		"id-at-surname",
		"id-at-serialNumber",
		"id-at-countryName",
		"id-at-localityName",
		"id-at-stateOrProvinceName",
		"id-at-organizationName",
		"id-at-organizationalUnitName",
		"id-at-title",
		"id-at-givenName",
		"id-at-initials",
		"id-at-generationQualifier",
		"id-at-dnQualifier",
		"id-at-pseudonym",
		"id-domainComponent",
		"id-ce-keyUsage",
		"id-ce-subjectAltName",
		"id-ce-basicConstraints",
		"id-ce-extKeyUsage",
	],
};

export const db = &_db;

export def RSA_ENCRYPTION: asn1::oid = 0;
export def ED25519: asn1::oid = 1;
export def SHA1_WITH_RSAENCRYPTION: asn1::oid = 2;
export def SHA224_WITH_RSAENCRYPTION: asn1::oid = 3;
export def SHA256_WITH_RSAENCRYPTION: asn1::oid = 4;
export def SHA384_WITH_RSAENCRYPTION: asn1::oid = 5;
export def SHA512_WITH_RSAENCRYPTION: asn1::oid = 6;
export def ID_SHA1: asn1::oid = 7;
export def ID_SHA224: asn1::oid = 8;
export def ID_SHA256: asn1::oid = 9;
export def ID_SHA384: asn1::oid = 10;
export def ID_SHA512: asn1::oid = 11;
export def ID_EC_PUBLIC_KEY: asn1::oid = 12;
export def PRIME256V1: asn1::oid = 13;
export def ANSIX9P256R1: asn1::oid = 14;
export def ANSIX9P384R1: asn1::oid = 15;
export def ANSIX9P521R1: asn1::oid = 16;
export def ECDSA_WITH_SHA1: asn1::oid = 17;
export def ECDSA_WITH_SHA224: asn1::oid = 18;
export def ECDSA_WITH_SHA256: asn1::oid = 19;
export def ECDSA_WITH_SHA384: asn1::oid = 20;
export def ECDSA_WITH_SHA512: asn1::oid = 21;
export def ID_AT_COMMON_NAME: asn1::oid = 22;
export def ID_AT_SURNAME: asn1::oid = 23;
export def ID_AT_SERIAL_NUMBER: asn1::oid = 24;
export def ID_AT_COUNTRY_NAME: asn1::oid = 25;
export def ID_AT_LOCALITY_NAME: asn1::oid = 26;
export def ID_AT_STATE_OR_PROVINCE_NAME: asn1::oid = 27;
export def ID_AT_ORGANIZATION_NAME: asn1::oid = 28;
export def ID_AT_ORGANIZATIONAL_UNIT_NAME: asn1::oid = 29;
export def ID_AT_TITLE: asn1::oid = 30;
export def ID_AT_GIVEN_NAME: asn1::oid = 31;
export def ID_AT_INITIALS: asn1::oid = 32;
export def ID_AT_GENERATION_QUALIFIER: asn1::oid = 33;
export def ID_AT_DN_QUALIFIER: asn1::oid = 34;
export def ID_AT_PSEUDONYM: asn1::oid = 35;
export def ID_DOMAIN_COMPONENT: asn1::oid = 36;
export def ID_CE_KEY_USAGE: asn1::oid = 37;
export def ID_CE_SUBJECT_ALT_NAME: asn1::oid = 38;
export def ID_CE_BASIC_CONSTRAINTS: asn1::oid = 39;
export def ID_CE_EXT_KEY_USAGE: asn1::oid = 40;
diff --git a/encoding/asn1/stdoid/db.txt b/encoding/asn1/stdoid/db.txt
new file mode 100644
index 00000000..de4e42d4
--- /dev/null
+++ b/encoding/asn1/stdoid/db.txt
@@ -0,0 +1,51 @@
# OIDs that will be translated into db.ha using `genoiddb`

rsaEncryption                 1.2.840.113549.1.1.1
ed25519                       1.3.101.112

sha1WithRSAEncryption         1.2.840.113549.1.1.5
sha224WithRSAEncryption       1.2.840.113549.1.1.14
sha256WithRSAEncryption       1.2.840.113549.1.1.11
sha384WithRSAEncryption       1.2.840.113549.1.1.12
sha512WithRSAEncryption       1.2.840.113549.1.1.13

id-sha1                       1.3.14.3.2.26
id-sha224                     2.16.840.1.101.3.4.2.4
id-sha256                     2.16.840.1.101.3.4.2.1
id-sha384                     2.16.840.1.101.3.4.2.2
id-sha512                     2.16.840.1.101.3.4.2.3

id-ecPublicKey                1.2.840.10045.2.1
prime256v1                    1.2.840.10045.3.1.7

ansix9p256r1                  1.2.840.10045.3.1.7
ansix9p384r1                  1.3.132.0.34
ansix9p521r1                  1.3.132.0.35

ecdsa-with-SHA1               1.2.840.10045.4.1
ecdsa-with-SHA224             1.2.840.10045.4.3.1
ecdsa-with-SHA256             1.2.840.10045.4.3.2
ecdsa-with-SHA384             1.2.840.10045.4.3.3
ecdsa-with-SHA512             1.2.840.10045.4.3.4

id-at-commonName              2.5.4.3
id-at-surname                 2.5.4.4
id-at-serialNumber            2.5.4.5
id-at-countryName             2.5.4.6
id-at-localityName            2.5.4.7
id-at-stateOrProvinceName     2.5.4.8
id-at-organizationName        2.5.4.10
id-at-organizationalUnitName  2.5.4.11
id-at-title                   2.5.4.12
id-at-givenName               2.5.4.42
id-at-initials                2.5.4.43
id-at-generationQualifier     2.5.4.43
id-at-dnQualifier             2.5.4.46
id-at-pseudonym               2.5.4.65

id-domainComponent            0.9.2342.19200300.100.1.25

id-ce-keyUsage                2.5.29.15
id-ce-subjectAltName          2.5.29.17
id-ce-basicConstraints        2.5.29.19
id-ce-extKeyUsage             2.5.29.37
-- 
2.43.0

[PATCH hare v3 5/6] asn1: add decode support for non utf8 string types

Details
Message ID
<20240205145140.361304-5-apreiml@strohwolke.at>
In-Reply-To
<20240205145140.361304-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +1214 -0
numeric and ia5, which is basically ascii, are required by some
entries in x.509. UniversalString, BMPString and T61 are only provided
for legacy support.

Note that this only supports a subset of T61. There are still some
certificates in the mozilla trust store that have T61 encoded strings.
Though the characters may only be a subset of ASCII.

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
 encoding/asn1/+test/strings_test.ha | 164 +++++++++
 encoding/asn1/charset+test.ha       | 154 ++++++++
 encoding/asn1/strings.ha            | 362 +++++++++++++++++++
 encoding/asn1/t61.ha                | 534 ++++++++++++++++++++++++++++
 4 files changed, 1214 insertions(+)
 create mode 100644 encoding/asn1/+test/strings_test.ha
 create mode 100644 encoding/asn1/charset+test.ha
 create mode 100644 encoding/asn1/strings.ha
 create mode 100644 encoding/asn1/t61.ha

diff --git a/encoding/asn1/+test/strings_test.ha b/encoding/asn1/+test/strings_test.ha
new file mode 100644
index 00000000..f338ed26
--- /dev/null
+++ b/encoding/asn1/+test/strings_test.ha
@@ -0,0 +1,164 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use bytes;
use errors;
use fmt;
use io;
use strings;


fn c_checkrange(chars: []u8, f: *fn (c: u8) bool) void = {
	for (let i = 0z; i < 256; i += 1) {
		let expected = false;
		for (let j = 0z; j < len(chars); j += 1) {
			if (chars[j] == i: u8) {
				expected = true;
				break;
			};
		};

		if (f(i: u8) != expected) {
			fmt::println(i, expected, f(i: u8))!;
		};
		assert(f(i: u8) == expected);
	};
};

@test fn c_is_num() void = {
	const chars: [_]u8 = [
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ',
	];
	c_checkrange(chars, &c_is_num);
};

@test fn c_is_print() void = {
	const chars: [_]u8 = [
		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ', '\'',
		'(', ')', '+', ',', '-', '.', '/', ':', '=', '?',
	];
	c_checkrange(chars, &c_is_print);
};

@test fn utf8() void = {
	let buf: [16]u8 = [0...];
	let b: [_]u8 = [
		0x55,
		0x56,
		0xd0, 0x98,
		0xe0, 0xa4, 0xb9,
		0xf0, 0x90, 0x8d, 0x88
	];
	const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8];

	let expected: str = strings::fromutf8([0xf0, 0x90, 0x8d, 0x88])!;
	assert(read_utf8str(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), buf)!
		== expected);
	assert(read_utf8str(&d([0x0c, 0x03, 0xf0, 0x90, 0x8d]), buf) is invalid);

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf)! == 4);
	assert(bytes::equal(buf[..4], strings::toutf8(expected)));

	bytes::zero(buf);
	let expected: str = strings::fromutf8([0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88])!;
	assert(read_utf8str(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), buf)!
		== expected);
	assert(read_utf8str(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), buf) is invalid);

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf)! == 6);
	assert(bytes::equal(buf[..6], strings::toutf8(expected)));

	let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!;
	assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid);

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf[..4])! == 2);
	assert(io::read(&r, buf[2..])! == 4);
	assert(bytes::equal(buf[..6], strings::toutf8(expected)));

	bytes::zero(buf);
	let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!;
	assert(io::read(&r, buf[..4])! == 2);
	assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid);
};

@test fn t61() void = {
	let input: [_]u8 = [
		0x14, 0x29,
		0x42, 0xc8, 0x61, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72,
		0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72,
		0x6e, 0x65, 0x20, 0xc8, 0x75, 0x62, 0x65, 0x72, 0x6d, 0xc8,
		0x61, 0xfb, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69, 0x67,
		0x0a,
	];

	const expected: [_]u8 = [
		0x42, 0xc3, 0xa4, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72,
		0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72,
		0x6e, 0x65, 0x20, 0xc3, 0xbc, 0x62, 0x65, 0x72, 0x6d, 0xc3,
		0xa4, 0xc3, 0x9f, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69,
		0x67, 0x0a,
	];

	let dec = d(input);
	let r = strreader(&dec, utag::TELETEX_STRING)!;
	let result = io::drain(&r)!;
	defer free(result);
	assert(bytes::equal(expected, result));
	assert(trypeek(&dec) is io::EOF);

	// cut off multibyte char
	input[1] = 0x2;
	let r = strreader(&d(input[..4]), utag::TELETEX_STRING)!;
	assert(unwrap_err(io::drain(&r) as io::error) is invalid);

	// not enough space for multibyte char
	let buf: [24]u8 = [0...];
	let in = input[..27];
	in[1] = (len(in) - 2): u8;
	let dec = d(in);
	let r = strreader(&dec, utag::TELETEX_STRING)!;
	assert(io::read(&r, buf)! == 23);
	assert(trypeek(&dec) is badformat);

	let r = strreader(&d([
		0x14, 0x0f, 0x63, 0x6c, 0xc2, 0x65, 0x73, 0x20, 0x70, 0x75,
		0x62, 0x6c, 0x69, 0x71, 0x75, 0x65, 0x73,
	]), utag::TELETEX_STRING)!;
	let b = io::drain(&r)!;
	defer free(b);

	assert(strings::fromutf8(b)! == "cl\u00e9s publiques");
};

@test fn bmp() void = {
	let input: [_]u8 = [
		0x1e, 0x26,
		0x00, 0x48, 0x00, 0xe4, 0x00, 0x72, 0x00, 0x65, 0x00, 0x6c,
		0x00, 0x61, 0x00, 0x6e, 0x00, 0x67, 0x00, 0x20, 0x00, 0x69,
		0x01, 0x61, 0x00, 0x20, 0x00, 0x6e, 0x00, 0x65, 0x00, 0x61,
		0x00, 0x74, 0x00, 0x6f, 0x00, 0x20, 0x27, 0x64,
	];

	const expected: [_]u8 = [
		0x48, 0xc3, 0xa4, 0x72, 0x65, 0x6c, 0x61, 0x6e, 0x67, 0x20,
		0x69, 0xc5, 0xa1, 0x20, 0x6e, 0x65, 0x61, 0x74, 0x6f, 0x20,
		0xe2, 0x9d, 0xa4,
	];

	let dec = d(input);
	let r = strreader(&dec, utag::BMP_STRING)!;
	let result = io::drain(&r)!;
	defer free(result);
	assert(bytes::equal(expected, result));
	assert(trypeek(&dec) is io::EOF);
};
diff --git a/encoding/asn1/charset+test.ha b/encoding/asn1/charset+test.ha
new file mode 100644
index 00000000..acf66f9b
--- /dev/null
+++ b/encoding/asn1/charset+test.ha
@@ -0,0 +1,154 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use ascii;
use bytes;
use fmt;
use io;
use memio;


// Encodes all characters from 0x00 to 0xff separated by \t. Invalid characters
// will not be printed. All possible accents follow the table as defined in
// the two bytes chapter at https://en.wikipedia.org/wiki/T.51/ISO/IEC_6937
fn print_t61_table(dest: io::handle) void = {
	for (let i = 0z; i < 16; i +=1 ) {
		fmt::fprintf(dest, "{:x}\t", i)!;
	};
	fmt::fprintln(dest)!;

	for (let i = 0z; i < 256; i += 1) {
		if (i % 16 == 0) {
			fmt::fprintln(dest)!;
		};
		match (t61_chardecode([i: u8])) {
		case insufficient =>
			fmt::fprint(dest, "")!;
		case invalid =>
			yield;
		case let r: rune =>
			if (i > 0xa0 || (ascii::isprint(r) && !ascii::isspace(r))) {
				fmt::fprint(dest, r)!;
			} else {
				fmt::fprintf(dest, "x{:.4x}", r: u32)!;
			};
		};

		if (i + 1 % 16 != 0) {
			fmt::fprint(dest, "\t")!;
		};
	};

	fmt::fprintln(dest)!;

	for (let i = 0xc1u8; i < 0xd0; i += 1) {
		if (i == 0xcc) continue;
		fmt::fprintf(dest, "{:.2x}\t", i)!;
		for (let j = 0x41u32; j < 0x7b; j += 1) {
			if (!ascii::isprint(j: rune)) {
				continue;
			};
			if (!(t61_chardecode([i: u8]) is insufficient)) {
				assert(false);
			};
			match (t61_chardecode([i: u8, j: u8])) {
			case let r: rune =>
				fmt::fprint(dest, r)!;
			case =>
				yield;
			};
		};
		fmt::fprintln(dest)!;
	};
};

@test fn t61encode() void = {
	let table = memio::dynamic();
	defer io::close(&table)!;
	print_t61_table(&table);
	assert(bytes::equal(t61_test_table, memio::buffer(&table)));
};

// Print this table as UTF-8, to visual check the characters.
const t61_test_table: [_]u8 = [
	0x30, 0x09, 0x31, 0x09, 0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09,
	0x36, 0x09, 0x37, 0x09, 0x38, 0x09, 0x39, 0x09, 0x61, 0x09, 0x62, 0x09,
	0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x0a, 0x0a, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30,
	0x61, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30, 0x63, 0x09, 0x78, 0x30, 0x30,
	0x30, 0x64, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x31, 0x61, 0x09, 0x78, 0x30,
	0x30, 0x31, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78, 0x30, 0x30,
	0x32, 0x30, 0x09, 0x21, 0x09, 0x22, 0x09, 0x09, 0x09, 0x25, 0x09, 0x26,
	0x09, 0x27, 0x09, 0x28, 0x09, 0x29, 0x09, 0x2a, 0x09, 0x2b, 0x09, 0x2c,
	0x09, 0x2d, 0x09, 0x2e, 0x09, 0x2f, 0x09, 0x0a, 0x30, 0x09, 0x31, 0x09,
	0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09, 0x36, 0x09, 0x37, 0x09,
	0x38, 0x09, 0x39, 0x09, 0x3a, 0x09, 0x3b, 0x09, 0x3c, 0x09, 0x3d, 0x09,
	0x3e, 0x09, 0x3f, 0x09, 0x0a, 0x40, 0x09, 0x41, 0x09, 0x42, 0x09, 0x43,
	0x09, 0x44, 0x09, 0x45, 0x09, 0x46, 0x09, 0x47, 0x09, 0x48, 0x09, 0x49,
	0x09, 0x4a, 0x09, 0x4b, 0x09, 0x4c, 0x09, 0x4d, 0x09, 0x4e, 0x09, 0x4f,
	0x09, 0x0a, 0x50, 0x09, 0x51, 0x09, 0x52, 0x09, 0x53, 0x09, 0x54, 0x09,
	0x55, 0x09, 0x56, 0x09, 0x57, 0x09, 0x58, 0x09, 0x59, 0x09, 0x5a, 0x09,
	0x5b, 0x09, 0x09, 0x5d, 0x09, 0x09, 0x5f, 0x09, 0x0a, 0x09, 0x61, 0x09,
	0x62, 0x09, 0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x67, 0x09,
	0x68, 0x09, 0x69, 0x09, 0x6a, 0x09, 0x6b, 0x09, 0x6c, 0x09, 0x6d, 0x09,
	0x6e, 0x09, 0x6f, 0x09, 0x0a, 0x70, 0x09, 0x71, 0x09, 0x72, 0x09, 0x73,
	0x09, 0x74, 0x09, 0x75, 0x09, 0x76, 0x09, 0x77, 0x09, 0x78, 0x09, 0x79,
	0x09, 0x7a, 0x09, 0x09, 0x7c, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30,
	0x38, 0x62, 0x09, 0x78, 0x30, 0x30, 0x38, 0x63, 0x09, 0x09, 0x09, 0x09,
	0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x78, 0x30, 0x30, 0x39, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78,
	0x30, 0x30, 0x61, 0x30, 0x09, 0xc2, 0xa1, 0x09, 0xc2, 0xa2, 0x09, 0xc2,
	0xa3, 0x09, 0x24, 0x09, 0xc2, 0xa5, 0x09, 0x23, 0x09, 0xc2, 0xa7, 0x09,
	0xc2, 0xa4, 0x09, 0x09, 0x09, 0xc2, 0xab, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x0a, 0xc2, 0xb0, 0x09, 0xc2, 0xb1, 0x09, 0xc2, 0xb2, 0x09, 0xc2, 0xb3,
	0x09, 0xc3, 0x97, 0x09, 0xc2, 0xb5, 0x09, 0xc2, 0xb6, 0x09, 0xc2, 0xb7,
	0x09, 0xc3, 0xb7, 0x09, 0x09, 0x09, 0xc2, 0xbb, 0x09, 0xc2, 0xbc, 0x09,
	0xc2, 0xbd, 0x09, 0xc2, 0xbe, 0x09, 0xc2, 0xbf, 0x09, 0x0a, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0xe2, 0x84, 0xa6, 0x09,
	0xc3, 0x86, 0x09, 0xc3, 0x90, 0x09, 0xc2, 0xaa, 0x09, 0xc4, 0xa6, 0x09,
	0x09, 0xc4, 0xb2, 0x09, 0xc4, 0xbf, 0x09, 0xc5, 0x81, 0x09, 0xc3, 0x98,
	0x09, 0xc5, 0x92, 0x09, 0xc2, 0xba, 0x09, 0xc3, 0x9e, 0x09, 0xc5, 0xa6,
	0x09, 0xc5, 0x8a, 0x09, 0xc5, 0x89, 0x09, 0x0a, 0xc4, 0xb8, 0x09, 0xc3,
	0xa6, 0x09, 0xc4, 0x91, 0x09, 0xc3, 0xb0, 0x09, 0xc4, 0xa7, 0x09, 0xc4,
	0xb1, 0x09, 0xc4, 0xb3, 0x09, 0xc5, 0x80, 0x09, 0xc5, 0x82, 0x09, 0xc3,
	0xb8, 0x09, 0xc5, 0x93, 0x09, 0xc3, 0x9f, 0x09, 0xc3, 0xbe, 0x09, 0xc5,
	0xa7, 0x09, 0xc5, 0x8b, 0x09, 0x09, 0x0a, 0x63, 0x31, 0x09, 0xc3, 0x80,
	0xc3, 0x88, 0xc3, 0x8c, 0xc3, 0x92, 0xc3, 0x99, 0xc3, 0xa0, 0xc3, 0xa8,
	0xc3, 0xac, 0xc3, 0xb2, 0xc3, 0xb9, 0x0a, 0x63, 0x32, 0x09, 0xc3, 0x81,
	0xc4, 0x86, 0xc3, 0x89, 0xc3, 0x8d, 0xc4, 0xb9, 0xc5, 0x83, 0xc3, 0x93,
	0xc5, 0x94, 0xc5, 0x9a, 0xc3, 0x9a, 0xc3, 0x9d, 0xc5, 0xb9, 0xc3, 0xa1,
	0xc4, 0x87, 0xc3, 0xa9, 0xc4, 0xa3, 0xc3, 0xad, 0xc4, 0xba, 0xc5, 0x84,
	0xc3, 0xb3, 0xc5, 0x95, 0xc5, 0x9b, 0xc3, 0xba, 0xc3, 0xbd, 0xc5, 0xba,
	0x0a, 0x63, 0x33, 0x09, 0xc3, 0x82, 0xc4, 0x88, 0xc3, 0x8a, 0xc4, 0x9c,
	0xc4, 0xa4, 0xc3, 0x8e, 0xc4, 0xb4, 0xc3, 0x94, 0xc5, 0x9c, 0xc3, 0x9b,
	0xc5, 0xb4, 0xc5, 0xb6, 0xc3, 0xa2, 0xc4, 0x89, 0xc3, 0xaa, 0xc4, 0x9d,
	0xc4, 0xa5, 0xc3, 0xae, 0xc4, 0xb5, 0xc3, 0xb4, 0xc5, 0x9d, 0xc3, 0xbb,
	0xc5, 0xb5, 0xc5, 0xb7, 0x0a, 0x63, 0x34, 0x09, 0xc3, 0x83, 0xc4, 0xa8,
	0xc3, 0x91, 0xc3, 0x95, 0xc5, 0xa8, 0xc3, 0xa3, 0xc4, 0xa9, 0xc3, 0xb1,
	0xc3, 0xb5, 0xc5, 0xa9, 0x0a, 0x63, 0x35, 0x09, 0xc4, 0x80, 0xc4, 0x92,
	0xc4, 0xaa, 0xc5, 0x8c, 0xc5, 0xaa, 0xc4, 0x81, 0xc4, 0x93, 0xc4, 0xab,
	0xc5, 0x8d, 0xc5, 0xab, 0x0a, 0x63, 0x36, 0x09, 0xc4, 0x82, 0xc4, 0x9e,
	0xc5, 0xac, 0xc4, 0x83, 0xc4, 0x9f, 0xc5, 0xad, 0x0a, 0x63, 0x37, 0x09,
	0xc4, 0x8a, 0xc4, 0x96, 0xc4, 0xa0, 0xc4, 0xb0, 0xc5, 0xbb, 0xc4, 0x8b,
	0xc4, 0x97, 0xc4, 0xa1, 0xc5, 0xbc, 0x0a, 0x63, 0x38, 0x09, 0xc3, 0x84,
	0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c, 0xc5, 0xb8, 0xc3, 0xa4,
	0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc, 0xc3, 0xbf, 0x0a, 0x63,
	0x39, 0x09, 0xc3, 0x84, 0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c,
	0xc5, 0xb8, 0xc3, 0xa4, 0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc,
	0xc3, 0xbf, 0x0a, 0x63, 0x61, 0x09, 0xc3, 0x85, 0xc5, 0xae, 0xc3, 0xa5,
	0xc5, 0xaf, 0x0a, 0x63, 0x62, 0x09, 0xc3, 0x87, 0xc4, 0xa2, 0xc4, 0xb6,
	0xc4, 0xbb, 0xc5, 0x85, 0xc5, 0x96, 0xc5, 0x9e, 0xc5, 0xa2, 0xc3, 0xa7,
	0xc4, 0xb7, 0xc4, 0xbc, 0xc5, 0x86, 0xc5, 0x97, 0xc5, 0x9f, 0xc5, 0xa3,
	0x0a, 0x63, 0x64, 0x09, 0xc5, 0x90, 0xc5, 0xb0, 0xc5, 0x91, 0xc5, 0xb1,
	0x0a, 0x63, 0x65, 0x09, 0xc4, 0x84, 0xc4, 0x98, 0xc4, 0xae, 0xc5, 0xb2,
	0xc4, 0x85, 0xc4, 0x99, 0xc4, 0xaf, 0xc5, 0xb3, 0x0a, 0x63, 0x66, 0x09,
	0xc4, 0x8c, 0xc4, 0x8e, 0xc4, 0x9a, 0xc4, 0xbd, 0xc5, 0x87, 0xc5, 0x98,
	0xc5, 0xa0, 0xc5, 0xa4, 0xc5, 0xbd, 0xc4, 0x8d, 0xc4, 0x8f, 0xc4, 0x9b,
	0xc4, 0xbe, 0xc5, 0x88, 0xc5, 0x99, 0xc5, 0xa1, 0xc5, 0xa5, 0xc5, 0xbe,
	0x0a,
];

diff --git a/encoding/asn1/strings.ha b/encoding/asn1/strings.ha
new file mode 100644
index 00000000..a381958b
--- /dev/null
+++ b/encoding/asn1/strings.ha
@@ -0,0 +1,362 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use encoding::utf8;
use endian;
use errors;
use io;
use strings;


// numeric string
def N: u8 = 0o1;

// printable string
def P: u8 = 0o2;

// LUT of bitfields with character attributes
const cclass: [_]u8 = [
//	 0	 1	 2	 3	 4	 5	 6	 7
	0,	0,	0,	0,	0,	0,	0,	0,	// 0
	0,	0,	0,	0,	0,	0,	0,	0,	// 10
	0,	0,	0,	0,	0,	0,	0,	0,	// 20
	0,	0,	0,	0,	0,	0,	0,	0,	// 30
	N|P,	0,	0,	0,	0,	0,	0,	P,	// 40
	P,	P,	0,	P,	P,	P,	P,	P,	// 50
	N|P,	N|P,	N|P,	N|P,	N|P,	N|P,	N|P,	N|P,	// 60
	N|P,	N|P,	P,	0,	0,	P,	0,	P,	// 70
	0,	P,	P,	P,	P,	P,	P,	P,	// 100
	P,	P,	P,	P,	P,	P,	P,	P,	// 110
	P,	P,	P,	P,	P,	P,	P,	P,	// 120
	P,	P,	P,	0,	0,	0,	0,	0,	// 130
	0,	P,	P,	P,	P,	P,	P,	P,	// 140
	P,	P,	P,	P,	P,	P,	P,	P,	// 150
	P,	P,	P,	P,	P,	P,	P,	P,	// 160
	P,	P,	P,	0,	0,	0,	0,	0,	// 170
];

type char_validator = fn (c: u8) bool;

// Whether 'c' is valid in a NumericString
fn c_is_num(c: u8) bool = c & 0x80 == 0 && cclass[c] & N != 0;

// Whether 'c' is valid in a PrintableString
fn c_is_print(c: u8) bool = c & 0x80 == 0 && cclass[c] & P != 0;

fn c_is_ia5(c: u8) bool = c & 0x80 == 0;

// Returns the number of bytes of the biggest complete utf8 chunk. Returns
// invalid, if the biggest complete chunk contains invalid utf8 characters.
fn validutf8(buf: []u8) (size | invalid) = {
	if (len(buf) == 0) {
		return 0z;
	};

	const min = if (len(buf) < 4) 0z else len(buf) - 4;

	let lastvalid = 0z;
	let lastsz = 0z;
	for (let i = min; i < len(buf); i += 1) {
		match (utf8::utf8sz(buf[i])) {
		case utf8::invalid =>
			yield;
		case let s: size =>
			lastsz = s;
			lastvalid = i;
		};
	};

	if (lastsz == 0) return invalid;

	const n = if (len(buf) - lastvalid == lastsz) len(buf) else lastvalid;
	if (utf8::validate(buf[..n]) is utf8::invalid) {
		return invalid;
	};

	return n;
};

@test fn validutf8() void = {
	let b: [_]u8 = [
		0x55, 0x56, 0xd0, 0x98, 0xe0, 0xa4, 0xb9, 0xf0, 0x90, 0x8d, 0x88
	];
	const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8];

	for (let i = 0z; i < len(b); i += 1) {
		assert(validutf8(b[..i])! == runesat[i]);
	};

	b[10] = 0x55;
	assert(validutf8(b[..10])! == 7);
	assert(validutf8(b) is invalid);
};

// An io::stream reader that returns only valid utf8 chunks on read.
export type utf8stream = struct {
	stream: io::stream,
	d: *decoder,
	strdec: *strdecoder,
};

const utf8stream_vtable = io::vtable {
	reader = &utf8stream_reader,
	...
};

fn utf8stream_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
	// at least a rune must fit in buf
	assert(len(buf) >= 4);
	let s = s: *utf8stream;
	let cur = match (s.d.cur) {
	case void =>
		abort();
	case let dh: head =>
		yield dh;
	};

	match (s.strdec(s, buf)?) {
	case let n: size =>
		return n;
	case io::EOF =>
		return io::EOF;
	};
};

export type strdecoder = fn(
	s: *utf8stream,
	buf: []u8,
) (size | io::EOF | io::error);

fn no_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	dataread(s.d, buf);

fn char_decoder(
	s: *utf8stream, buf: []u8,
	v: *char_validator,
) (size | io::EOF | io::error) = {
	let n = match (dataread(s.d, buf)?) {
	case let n: size =>
		yield n;
	case io::EOF =>
		return io::EOF;
	};

	for (let i = 0z; i < n; i += 1) {
		if (!v(buf[i])) return wrap_err(invalid);
	};
	return n;
};

fn num_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	char_decoder(s, buf, &c_is_num);

fn print_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	char_decoder(s, buf, &c_is_print);

fn ia5_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) =
	char_decoder(s, buf, &c_is_ia5);

fn utf8_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	let n = 0z;

	n += match (dataread(s.d, buf)?) {
	case let sz: size =>
		yield sz;
	case io::EOF =>
		if (s.d.unbufn > 0) return wrap_err(invalid);
		return io::EOF;
	};

	const max = match (validutf8(buf[..n])) {
	case let s: size =>
		yield s;
	case invalid =>
		return wrap_err(invalid);
	};

	if (max < n) {
		if (dataeof(s.d)) {
			// string ends with incomplete rune
			return wrap_err(invalid);
		};
		dataunread(s.d, buf[max..n]);
		return max;
	};

	return n;
};

// A bmp string is an UTF-16 string.
fn bmp_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	const max = len(buf) - (len(buf) % 2);

	// TODO disallow control functions (X.690: 8.23.9)

	let n = 0z;
	let rbuf: [2]u8 = [0...];
	for (true) {
		match (dataread(s.d, rbuf)?) {
		case let sz: size =>
			if (sz < 2) return wrap_err(invalid);
		case io::EOF =>
			return if (n == 0) io::EOF else n;
		};

		let r = endian::begetu16(rbuf): rune;
		let rb = utf8::encoderune(r);
		if (len(buf) - n < len(rb)) {
			dataunread(s.d, rbuf);
			return n;
		};

		buf[n..n + len(rb)] = rb;
		n += len(rb);
	};
};

// Universal string is an UTF32BE string.
fn universal_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	const max = len(buf) - (len(buf) % 4);

	let n = 0z;
	let rbuf: [4]u8 = [0...];
	for (true) {
		match (dataread(s.d, rbuf)?) {
		case let sz: size =>
			if (sz < 4) return wrap_err(invalid);
		case io::EOF =>
			return if (n == 0) io::EOF else n;
		};

		let r = endian::begetu32(rbuf): rune;
		let rb = utf8::encoderune(r);
		if (len(buf) - n < len(rb)) {
			dataunread(s.d, rbuf);
			return n;
		};

		buf[n..n + len(rb)] = rb;
		n += len(rb);
	};
};

fn t61_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = {
	let inbuf: [2]u8 = [0...];
	let in = inbuf[..0];

	let n = 0z;

	for (true) {
		let chr: [1]u8 = [0];
		match (dataread(s.d, chr)?) {
		case let sz: size =>
			assert(sz == 1);
			static append(in, chr[0]);
		case io::EOF =>
			if (len(in) > 0) return wrap_err(invalid);
			if (n > 0) return n;
			return io::EOF;
		};

		match (t61_chardecode(in)) {
		case let r: rune =>
			let raw = utf8::encoderune(r);
			const bufremain = len(buf) - n;
			if (len(raw) < bufremain) {
				buf[n..n + len(raw)] = raw[..];
				n += len(raw);
				in = inbuf[..0];
			} else {
				dataunread(s.d, in);
				break;
			};
		case insufficient =>
			// leave combining char in in
			yield;
		case invalid =>
			return wrap_err(invalid);
		};
	};

	return n;
};

fn newstrreader(d: *decoder, t: utag) (utf8stream | error) = {
	let strdec: *strdecoder = switch (t) {
	case utag::NUMERIC_STRING =>
		yield &num_decoder;
	case utag::PRINTABLE_STRING =>
		yield &print_decoder;
	case utag::IA5_STRING =>
		yield &ia5_decoder;
	case utag::UTF8_STRING =>
		yield &utf8_decoder;
	case utag::TELETEX_STRING =>
		yield &t61_decoder;
	case utag::BMP_STRING =>
		yield &bmp_decoder;
	case utag::UNIVERSAL_STRING =>
		yield &universal_decoder;
	case =>
		return invalid;
	};

	return utf8stream {
		stream = &utf8stream_vtable,
		d = d,
		strdec = strdec,
		...
	};
};

// Returns an [[utf8stream]] for a supported utag 't', which is one of:
//   * utag::NUMERIC_STRING
//   * utag::PRINTABLE_STRING
//   * utag::IA5_STRING
//   * utag::UTF8_STRING
//   * utag::TELETEX_STRING
//   * utag::BMP_STRING
//   * utag::UNIVERSAL_STRING
export fn strreader(d: *decoder, t: utag) (utf8stream | error) = {
	let dh = next(d)?;
	expect_utag(dh, t)?;
	return newstrreader(d, t)!;
};

// Reads a printable string into 'buf'.
export fn read_printstr(d: *decoder, buf: []u8) (size | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::PRINTABLE_STRING)?;

	const n = read_bytes(d, buf)?;

	for (let i = 0z; i < n; i += 1) {
		if (!c_is_print(buf[i])) {
			return invalid;
		};
	};
	return n;
};

// Reads an utf8 string into 'buf' and returns a str that borrows from buf.
export fn read_utf8str(d: *decoder, buf: []u8) (str | error) = {
	let dh = next(d)?;
	expect_utag(dh, utag::UTF8_STRING)?;

	let r = newstrreader(d, utag::UTF8_STRING)!;
	let n = 0z;

	for (true) {
		n += match (io::read(&r, buf[n..])) {
		case let sz: size =>
			yield sz;
		case io::EOF =>
			break;
		case let e: io::error =>
			return unwrap_err(e);
		};
	};

	return strings::fromutf8(buf[..n])!;
};

diff --git a/encoding/asn1/t61.ha b/encoding/asn1/t61.ha
new file mode 100644
index 00000000..d0e14ab8
--- /dev/null
+++ b/encoding/asn1/t61.ha
@@ -0,0 +1,534 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

// https://en.wikipedia.org/wiki/ITU_T.61
const t61toascii: [_]u8 = [
//	 0	 1	 2	 3	 4	 5	 6	 7
//	 8	 9	 a	 b	 c	 d	 e	 f
	0,	0,	0,	0,	0,	0,	0,	0,	// 0
	0,	0,	0x0a,	0,	0x0c,	0x0d,	0,	0,	// 0
	0,	0,	0,	0,	0,	0,	0,	0,	// 10
	0,	0,	0x1a,	0x1b,	0,	0,	0,	0,	// 10
	0x20,	0x21,	0x22,	0,	0,	0x25,	0x26,	0x27,	// 20
	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,	// 20
	0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,	// 30
	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,	// 30
	0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,	// 40
	0x48,	0x49,	0x4a,	0x4b,	0x4c,	0x4d,	0x4e,	0x4f,	// 40
	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,	// 50
	0x58,	0x59,	0x5a,	0x5b,	0,	0x5d,	0,	0x5f,	// 50
	0,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,	// 60
	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,	// 60
	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,	// 70
	0x78,	0x79,	0x7a,	0,	0x7c,	0,	0,	0,	// 70
];

const t61toutf8: [_]rune = [
	// 0x80
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u008b',
	'\u008c', '\u0000', '\u0000', '\u0000',

	// 0x90
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u009b',
	'\u0000', '\u0000', '\u0000', '\u0000',

	// 0xa0
	'\u00a0', '\u00a1', '\u00a2', '\u00a3',
	'\u0024', '\u00a5', '\u0023', '\u00a7',
	'\u00a4', '\u0000', '\u0000', '\u00ab',
	'\u0000', '\u0000', '\u0000', '\u0000',

	// 0x0b
	'\u00b0', '\u00b1', '\u00b2', '\u00b3',
	'\u00d7', '\u00b5', '\u00b6', '\u00b7',
	'\u00f7', '\u0000', '\u0000', '\u00bb',
	'\u00bc', '\u00bd', '\u00be', '\u00bf',

	// 0xc0
	'\u0000', '\u0300', '\u0301', '\u0302',
	'\u0303', '\u0304', '\u0306', '\u0307',
	'\u0308', '\u0308', '\u030a', '\u0327',
	'\u0332', '\u030b', '\u0328', '\u030c',

	// 0xd0
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',
	'\u0000', '\u0000', '\u0000', '\u0000',

	// 0xe0
	'\u2126', '\u00c6', '\u00d0', '\u00aa',
	'\u0126', '\u0000', '\u0132', '\u013f',
	'\u0141', '\u00d8', '\u0152', '\u00ba',
	'\u00de', '\u0166', '\u014a', '\u0149',

	// 0xf0
	'\u0138', '\u00e6', '\u0111', '\u00f0',
	'\u0127', '\u0131', '\u0133', '\u0140',
	'\u0142', '\u00f8', '\u0153', '\u00df',
	'\u00fe', '\u0167', '\u014b', '\u0000',
];

fn decode(out: []u8, in: []u8) void = {
	for (let i = 0z; i < len(in); i += 1) {
		const c = in[i];
		const r: rune = if (c & 0x80 != 0) {
			// TODO special cases
			yield t61toutf8[c - 0x80];
		} else {
			const c = t61toascii[in[i]];
			yield c: u32: rune;
		};

		// write r to out
	};
	return;
};

export type insufficient = !void;

export fn t61_chardecode(in: []u8) (rune | insufficient | invalid) = {
	// 'in' is either one char or two if first is a combining character.
	if (len(in) == 2) {
		return t61_combine(in);
	};

	const in = in[0];

	if (in & 0x80 == 0) {
		const r = t61toascii[in];
		return if (r == 0) invalid else r: u32: rune;
	};

	const c = t61toutf8[in - 0x80];
	if (c == '\u0000') {
		return invalid;
	};

	if (in == 0xcc) {
		return invalid;
	};
	if (in > 0xc0 && in <= 0xcf) {
		return insufficient;
	};

	return c;
};

fn t61_combine(in: []u8) (rune | invalid) = {
	const comb = in[0];
	const in = in[1];
	switch (comb) {
	case 0xc1 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c0';
		case 'E' =>
			return '\u00c8';
		case 'I' =>
			return '\u00cc';
		case 'O' =>
			return '\u00d2';
		case 'U' =>
			return '\u00d9';
		case 'a' =>
			return '\u00e0';
		case 'e' =>
			return '\u00e8';
		case 'i' =>
			return '\u00ec';
		case 'o' =>
			return '\u00f2';
		case 'u' =>
			return '\u00f9';
		case =>
			return invalid;
		};
	case 0xc2 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c1';
		case 'C' =>
			return '\u0106';
		case 'E' =>
			return '\u00c9';
		case 'I' =>
			return '\u00cd';
		case 'L' =>
			return '\u0139';
		case 'N' =>
			return '\u0143';
		case 'O' =>
			return '\u00d3';
		case 'R' =>
			return '\u0154';
		case 'S' =>
			return '\u015a';
		case 'U' =>
			return '\u00da';
		case 'Y' =>
			return '\u00dd';
		case 'Z' =>
			return '\u0179';
		case 'a' =>
			return '\u00e1';
		case 'c' =>
			return '\u0107';
		case 'e' =>
			return '\u00e9';
		case 'g' =>
			return '\u0123';
		case 'i' =>
			return '\u00ed';
		case 'l' =>
			return '\u013a';
		case 'n' =>
			return '\u0144';
		case 'o' =>
			return '\u00f3';
		case 'r' =>
			return '\u0155';
		case 's' =>
			return '\u015b';
		case 'u' =>
			return '\u00fa';
		case 'y' =>
			return '\u00fd';
		case 'z' =>
			return '\u017a';
		case =>
			return invalid;
		};
	case 0xc3 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c2';
		case 'C' =>
			return '\u0108';
		case 'E' =>
			return '\u00ca';
		case 'G' =>
			return '\u011c';
		case 'H' =>
			return '\u0124';
		case 'I' =>
			return '\u00ce';
		case 'J' =>
			return '\u0134';
		case 'O' =>
			return '\u00d4';
		case 'S' =>
			return '\u015c';
		case 'U' =>
			return '\u00db';
		case 'W' =>
			return '\u0174';
		case 'Y' =>
			return '\u0176';
		case 'a' =>
			return '\u00e2';
		case 'c' =>
			return '\u0109';
		case 'e' =>
			return '\u00ea';
		case 'g' =>
			return '\u011d';
		case 'h' =>
			return '\u0125';
		case 'i' =>
			return '\u00ee';
		case 'j' =>
			return '\u0135';
		case 'o' =>
			return '\u00f4';
		case 's' =>
			return '\u015d';
		case 'u' =>
			return '\u00fb';
		case 'w' =>
			return '\u0175';
		case 'y' =>
			return '\u0177';
		case =>
			return invalid;
		};
	case 0xc4 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c3';
		case 'I' =>
			return '\u0128';
		case 'N' =>
			return '\u00d1';
		case 'O' =>
			return '\u00d5';
		case 'U' =>
			return '\u0168';
		case 'a' =>
			return '\u00e3';
		case 'i' =>
			return '\u0129';
		case 'n' =>
			return '\u00f1';
		case 'o' =>
			return '\u00f5';
		case 'u' =>
			return '\u0169';
		case =>
			return invalid;
		};
	case 0xc5 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u0100';
		case 'E' =>
			return '\u0112';
		case 'I' =>
			return '\u012a';
		case 'O' =>
			return '\u014c';
		case 'U' =>
			return '\u016a';
		case 'a' =>
			return '\u0101';
		case 'e' =>
			return '\u0113';
		case 'i' =>
			return '\u012b';
		case 'o' =>
			return '\u014d';
		case 'u' =>
			return '\u016b';
		case =>
			return invalid;
		};
	case 0xc6 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u0102';
		case 'G' =>
			return '\u011e';
		case 'U' =>
			return '\u016c';
		case 'a' =>
			return '\u0103';
		case 'g' =>
			return '\u011f';
		case 'u' =>
			return '\u016d';
		case =>
			return invalid;
		};
	case 0xc7 =>
		switch (in: u32: rune) {
		case 'C' =>
			return '\u010a';
		case 'E' =>
			return '\u0116';
		case 'G' =>
			return '\u0120';
		case 'I' =>
			return '\u0130';
		case 'Z' =>
			return '\u017b';
		case 'c' =>
			return '\u010b';
		case 'e' =>
			return '\u0117';
		case 'g' =>
			return '\u0121';
		case 'z' =>
			return '\u017c';
		case =>
			return invalid;
		};
	case 0xc8 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c4';
		case 'E' =>
			return '\u00cb';
		case 'I' =>
			return '\u00cf';
		case 'O' =>
			return '\u00d6';
		case 'U' =>
			return '\u00dc';
		case 'Y' =>
			return '\u0178';
		case 'a' =>
			return '\u00e4';
		case 'e' =>
			return '\u00eb';
		case 'i' =>
			return '\u00ef';
		case 'o' =>
			return '\u00f6';
		case 'u' =>
			return '\u00fc';
		case 'y' =>
			return '\u00ff';
		case =>
			return invalid;
		};
	case 0xc9 =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c4';
		case 'E' =>
			return '\u00cb';
		case 'I' =>
			return '\u00cf';
		case 'O' =>
			return '\u00d6';
		case 'U' =>
			return '\u00dc';
		case 'Y' =>
			return '\u0178';
		case 'a' =>
			return '\u00e4';
		case 'e' =>
			return '\u00eb';
		case 'i' =>
			return '\u00ef';
		case 'o' =>
			return '\u00f6';
		case 'u' =>
			return '\u00fc';
		case 'y' =>
			return '\u00ff';
		case =>
			return invalid;
		};
	case 0xca =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u00c5';
		case 'U' =>
			return '\u016e';
		case 'a' =>
			return '\u00e5';
		case 'u' =>
			return '\u016f';
		case =>
			return invalid;
		};
	case 0xcb =>
		switch (in: u32: rune) {
		case 'C' =>
			return '\u00c7';
		case 'G' =>
			return '\u0122';
		case 'K' =>
			return '\u0136';
		case 'L' =>
			return '\u013b';
		case 'N' =>
			return '\u0145';
		case 'R' =>
			return '\u0156';
		case 'S' =>
			return '\u015e';
		case 'T' =>
			return '\u0162';
		case 'c' =>
			return '\u00e7';
		case 'k' =>
			return '\u0137';
		case 'l' =>
			return '\u013c';
		case 'n' =>
			return '\u0146';
		case 'r' =>
			return '\u0157';
		case 's' =>
			return '\u015f';
		case 't' =>
			return '\u0163';
		case =>
			return invalid;
		};
	case 0xcd =>
		switch (in: u32: rune) {
		case 'O' =>
			return '\u0150';
		case 'U' =>
			return '\u0170';
		case 'o' =>
			return '\u0151';
		case 'u' =>
			return '\u0171';
		case =>
			return invalid;
		};
	case 0xce =>
		switch (in: u32: rune) {
		case 'A' =>
			return '\u0104';
		case 'E' =>
			return '\u0118';
		case 'I' =>
			return '\u012e';
		case 'U' =>
			return '\u0172';
		case 'a' =>
			return '\u0105';
		case 'e' =>
			return '\u0119';
		case 'i' =>
			return '\u012f';
		case 'u' =>
			return '\u0173';
		case =>
			return invalid;
		};
	case 0xCf =>
		switch (in: u32: rune) {
		case 'C' =>
			return '\u010c';
		case 'D' =>
			return '\u010e';
		case 'E' =>
			return '\u011a';
		case 'L' =>
			return '\u013d';
		case 'N' =>
			return '\u0147';
		case 'R' =>
			return '\u0158';
		case 'S' =>
			return '\u0160';
		case 'T' =>
			return '\u0164';
		case 'Z' =>
			return '\u017d';
		case 'c' =>
			return '\u010d';
		case 'd' =>
			return '\u010f';
		case 'e' =>
			return '\u011b';
		case 'l' =>
			return '\u013e';
		case 'n' =>
			return '\u0148';
		case 'r' =>
			return '\u0159';
		case 's' =>
			return '\u0161';
		case 't' =>
			return '\u0165';
		case 'z' =>
			return '\u017e';
		case =>
			return invalid;
		};
	case =>
		return invalid;
	};
};

-- 
2.43.0

[PATCH hare v3 6/6] asn1: add readme

Details
Message ID
<20240205145140.361304-6-apreiml@strohwolke.at>
In-Reply-To
<20240205145140.361304-1-apreiml@strohwolke.at> (view parent)
DKIM signature
pass
Download raw message
Patch: +4 -0
Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
---
 encoding/asn1/README | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 encoding/asn1/README

diff --git a/encoding/asn1/README b/encoding/asn1/README
new file mode 100644
index 00000000..ff3c9749
--- /dev/null
+++ b/encoding/asn1/README
@@ -0,0 +1,4 @@
This module provides functions to parse the distinguished encoding rules (DER)
subset of the basic encoding rules (BER) format defined in X.690 ITU-T standard.

See [[newencoder]] and [[newdecoder]] for how to encode or decode values.
-- 
2.43.0

[hare/patches] build failed

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CYX8CNKKQHFD.2E05TWKWX7B9M@fra02>
In-Reply-To
<20240205145140.361304-6-apreiml@strohwolke.at> (view parent)
DKIM signature
missing
Download raw message
hare/patches: FAILED in 1m24s

[asn1: add types and errors][0] v3 from [Armin Preiml][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/49201
[1]: apreiml@strohwolke.at

✗ #1143729 FAILED  hare/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1143729
✗ #1143727 FAILED  hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1143727
✓ #1143728 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1143728
Reply to thread Export thread (mbox)