~sircmpwn/hare-dev

hare: strconv: implement ftosf. v3 SUPERSEDED

Joe Finney: 1
 strconv: implement ftosf.

 7 files changed, 1377 insertions(+), 694 deletions(-)
#1060797 alpine.yml success
#1060798 freebsd.yml success
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/44928/mbox | git am -3
Learn more about email & git

[PATCH hare v3] strconv: implement ftosf. Export this patch

For void precision, uses Ryu algorithm. For uint precision, uses a
multiprecision implementation heavily based on golang's strconv.

The rounding and encoding functions are highly nontrivial, largely
because I decided to add a SHOW_POINT flag.

Signed-off-by: Joe Finney <me@spxtr.net>
---
1. Is this interface okay? I don't like that every function is
duplicated for f64 and f32, only to be recombined in the generic
function. It doesn't seem necessary. Why not just one ftosf (and
ftos) that takes a (f32 | f64), like floatingtos?
2. The code is significantly more complicated than I would prefer. I
think this is largely because of SHOW_POINT and G. I'll look for ways
to simplify, but we can also consider relaxing the requirements in
some way.

 stdlib.mk                      |   7 +-
 strconv/+test/ftos_test.ha     | 230 ++++++++
 strconv/ftos.ha                | 964 ++++++++++-----------------------
 strconv/ftos_multiprecision.ha | 310 +++++++++++
 strconv/ftos_ryu.ha            | 508 +++++++++++++++++
 strconv/ftostof+test.ha        |  30 -
 strconv/numeric.ha             |  22 +
 7 files changed, 1377 insertions(+), 694 deletions(-)
 create mode 100644 strconv/+test/ftos_test.ha
 create mode 100644 strconv/ftos_multiprecision.ha
 create mode 100644 strconv/ftos_ryu.ha
 delete mode 100644 strconv/ftostof+test.ha

diff --git a/stdlib.mk b/stdlib.mk
index f4157560..69871b4d 100644
--- a/stdlib.mk
+++ b/stdlib.mk
@@ -2090,6 +2090,8 @@ stdlib_strconv_any_srcs = \
	$(STDLIB)/strconv/stoi.ha \
	$(STDLIB)/strconv/numeric.ha \
	$(STDLIB)/strconv/ftos.ha \
	$(STDLIB)/strconv/ftos_ryu.ha \
	$(STDLIB)/strconv/ftos_multiprecision.ha \
	$(STDLIB)/strconv/stof.ha \
	$(STDLIB)/strconv/stof_data.ha

@@ -4615,10 +4617,13 @@ testlib_strconv_any_srcs = \
	$(STDLIB)/strconv/stoi.ha \
	$(STDLIB)/strconv/numeric.ha \
	$(STDLIB)/strconv/ftos.ha \
	$(STDLIB)/strconv/ftos_ryu.ha \
	$(STDLIB)/strconv/ftos_multiprecision.ha \
	$(STDLIB)/strconv/stof.ha \
	$(STDLIB)/strconv/stof_data.ha \
	$(STDLIB)/strconv/+test/stou_test.ha \
	$(STDLIB)/strconv/+test/stoi_test.ha
	$(STDLIB)/strconv/+test/stoi_test.ha \
	$(STDLIB)/strconv/+test/ftos_test.ha

$(TESTCACHE)/strconv/strconv-any.ssa: $(testlib_strconv_any_srcs) $(testlib_rt) $(testlib_types_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_ascii_$(PLATFORM)) $(testlib_math_$(PLATFORM)) $(testlib_bytes_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM))
	@printf 'HAREC \t$@\n'
diff --git a/strconv/+test/ftos_test.ha b/strconv/+test/ftos_test.ha
new file mode 100644
index 00000000..fff7e7ef
--- /dev/null
+++ b/strconv/+test/ftos_test.ha
@@ -0,0 +1,230 @@
// License: MPL-2.0
use math;

@test fn ftosf() void = {
	// These tests should pass for both f32 and f64.
	const tcs: [](f64, ffmt, (void | uint), fflags, str) = [
		// First test special values
		(1.0 / 0.0, ffmt::G, void, 0, "infinity"),
		(1.0 / 0.0, ffmt::G, void, fflags::SHOW_POS, "+infinity"),
		(-1.0 / 0.0, ffmt::F, void, 0, "-infinity"),
		(-1.0 / 0.0, ffmt::E, void, fflags::UPPERCASE, "-INFINITY"),
		(0.0 / 0.0, ffmt::G, void, 0, "nan"),
		(0.0 / 0.0, ffmt::E, void, fflags::UPPERCASE, "NAN"),

		// Then a million tests for zero.
		(0.0, ffmt::E, void, 0, "0e0"),
		(0.0, ffmt::E, void, fflags::SHOW_TWO_EXP_DIGITS, "0e00"),
		(0.0, ffmt::E, void, fflags::UPPER_EXP, "0E0"),
		(0.0, ffmt::E, void, fflags::SHOW_POS_EXP, "0e+0"),
		(0.0, ffmt::E, void, fflags::SHOW_POINT, "0.0e0"),
		(0.0, ffmt::F, void, 0, "0"),
		(0.0, ffmt::F, void, fflags::SHOW_POINT, "0.0"),
		(0.0, ffmt::G, void, 0, "0"),
		(-0.0, ffmt::G, void, fflags::SHOW_POS, "-0"),
		(0.0, ffmt::G, void, fflags::SHOW_POS, "+0"),
		(0.0, ffmt::G, void, fflags::SHOW_POINT, "0.0"),

		// ... e and f do not cut trailing zeros
		(0.0, ffmt::E, 0, 0, "0e0"),
		(0.0, ffmt::E, 1, 0, "0.0e0"),
		(0.0, ffmt::E, 2, 0, "0.00e0"),
		(0.0, ffmt::F, 0, 0, "0"),
		(0.0, ffmt::F, 1, 0, "0.0"),
		(0.0, ffmt::F, 2, 0, "0.00"),
		// ... g cuts trailing zeros
		(0.0, ffmt::G, 0, 0, "0"),
		(0.0, ffmt::G, 1, 0, "0"),
		(0.0, ffmt::G, 2, 0, "0"),

		// ... SHOW_POINT only changes precision 0
		(0.0, ffmt::E, 0, fflags::SHOW_POINT, "0.0e0"),
		(0.0, ffmt::E, 1, fflags::SHOW_POINT, "0.0e0"),
		(0.0, ffmt::E, 2, fflags::SHOW_POINT, "0.00e0"),
		(0.0, ffmt::F, 0, fflags::SHOW_POINT, "0.0"),
		(0.0, ffmt::F, 1, fflags::SHOW_POINT, "0.0"),
		(0.0, ffmt::F, 2, fflags::SHOW_POINT, "0.00"),
		// ... g with SHOW_POINT only has the one extra 0
		(0.0, ffmt::G, 0, fflags::SHOW_POINT, "0.0"),
		(0.0, ffmt::G, 1, fflags::SHOW_POINT, "0.0"),
		(0.0, ffmt::G, 2, fflags::SHOW_POINT, "0.0"),

		// Now we can test actual numbers.
		(10.0, ffmt::F, void, 0, "10"),
		(1.0, ffmt::F, void, 0, "1"),
		(1.1, ffmt::F, void, 0, "1.1"),
		(13.37, ffmt::G, void, 0, "13.37"),
		(0.3, ffmt::F, void, 0, "0.3"),
		(0.0031415, ffmt::F, void, 0, "0.0031415"),
		(-6345.972, ffmt::F, void, 0, "-6345.972"),
		(1.414, ffmt::F, void, 0, "1.414"),
		(1000000.0e9, ffmt::F, void, 0, "1000000000000000"),
		(10.0, ffmt::E, void, 0, "1e1"),
		(10.0, ffmt::E, void, fflags::SHOW_TWO_EXP_DIGITS, "1e01"),
		(10.0, ffmt::E, void, fflags::UPPER_EXP, "1E1"),
		(10.0, ffmt::E, void, fflags::SHOW_POS_EXP, "1e+1"),
		(0.1, ffmt::E, void, fflags::SHOW_POS_EXP, "1e-1"),
		(1.0, ffmt::E, void, 0, "1e0"),
		(0.3, ffmt::E, void, 0, "3e-1"),
		(0.0031415, ffmt::E, void, 0, "3.1415e-3"),
		(0.12345, ffmt::E, void, 0, "1.2345e-1"),

		// ... g is shortest
		(12345.0, ffmt::G, void, 0, "12345"),
		(10000.0, ffmt::G, void, 0, "1e4"),
		(11000.0, ffmt::G, void, 0, "1.1e4"),
		(1000.0, ffmt::G, void, 0, "1e3"),
		(1100.0, ffmt::G, void, 0, "1100"),
		(100.0, ffmt::G, void, 0, "100"),
		(10.0, ffmt::G, void, 0, "10"),
		(1.0, ffmt::G, void, 0, "1"),
		(0.1, ffmt::G, void, 0, "0.1"),
		(0.01, ffmt::G, void, 0, "0.01"),
		(0.011, ffmt::G, void, 0, "0.011"),
		(0.001, ffmt::G, void, 0, "1e-3"), // one shorter than f
		(0.0011, ffmt::G, void, 0, "1.1e-3"), // same length as f
		(0.0001, ffmt::G, void, 0, "1e-4"),

		// ... fixed precision stuff
		(0.5, ffmt::F, 0, 0, "0"),
		(1.0 / 3.0, ffmt::F, 2, 0, "0.33"),
		(1.0 / 3.0, ffmt::F, 1, 0, "0.3"),
		(1.0 / 3.0, ffmt::F, 0, 0, "0"),
		(1.0 / 3.0, ffmt::F, 0, fflags::SHOW_POINT, "0.3"),
		(2.0 / 3.0, ffmt::F, 2, 0, "0.67"),
		(2.0 / 3.0, ffmt::F, 1, 0, "0.7"),
		(2.0 / 3.0, ffmt::F, 0, 0, "1"),
		(2.0 / 3.0, ffmt::F, 0, fflags::SHOW_POINT, "0.7"),
		(2.0 / 30.0, ffmt::F, 5, 0, "0.06667"),
		(2.0 / 30.0, ffmt::F, 2, 0, "0.07"),
		(2.0 / 30.0, ffmt::F, 1, 0, "0.1"),
		(2.0 / 30.0, ffmt::F, 1, fflags::SHOW_POINT, "0.1"),
		(200.0 / 3.0, ffmt::F, 4, 0, "66.6667"),
		(100.0 / 3.0, ffmt::F, 4, 0, "33.3333"),
		(100.0 / 3.0, ffmt::F, 0, 0, "33"),
		(100.0 / 3.0, ffmt::F, 0, fflags::SHOW_POINT, "33.3"),
		(0.00001, ffmt::F, 1, 0, "0.0"),
		(0.001, ffmt::F, 2, 0, "0.00"),
		(0.006, ffmt::F, 2, 0, "0.01"),
		(0.001, ffmt::F, 6, 0, "0.001000"),
		(1.0, ffmt::F, 6, 0, "1.000000"),
		(100.0, ffmt::F, 6, 0, "100.000000"),

		// ... scientific notation stuff
		(460.0, ffmt::E, 2, 0, "4.60e2"),
		(1.0 / 3.0, ffmt::E, 2, 0, "3.33e-1"),
		(1.0 / 3.0, ffmt::E, 1, 0, "3.3e-1"),
		(1.0 / 3.0, ffmt::E, 0, 0, "3e-1"),
		(1.0 / 3.0, ffmt::E, 0, fflags::SHOW_POINT, "3.3e-1"),
		(2.0 / 3.0, ffmt::E, 2, 0, "6.67e-1"),
		(2.0 / 3.0, ffmt::E, 1, 0, "6.7e-1"),
		(2.0 / 3.0, ffmt::E, 0, 0, "7e-1"),
		(2.0 / 3.0, ffmt::E, 0, fflags::SHOW_POINT, "6.7e-1"),
		(2.0 / 30.0, ffmt::E, 5, 0, "6.66667e-2"),
		(2.0 / 30.0, ffmt::E, 2, 0, "6.67e-2"),
		(2.0 / 30.0, ffmt::E, 0, 0, "7e-2"),
		(2.0 / 30.0, ffmt::E, 0, fflags::SHOW_POINT, "6.7e-2"),
		(200.0 / 3.0, ffmt::E, 5, 0, "6.66667e1"),
		(100.0 / 3.0, ffmt::E, 5, 0, "3.33333e1"),
		(100.0 / 3.0, ffmt::E, 0, 0, "3e1"),
		(100.0 / 3.0, ffmt::E, 0, fflags::SHOW_POINT, "3.3e1"),
		(0.001, ffmt::E, 2, 0, "1.00e-3"),
		(1.0, ffmt::E, 6, 0, "1.000000e0"),
		(100.0, ffmt::E, 6, 0, "1.000000e2"),

		// ... and G. The behavior with SHOW_POINT is gnarly.
		(1.0 / 3.0, ffmt::G, 2, 0, "0.33"),
		(1.0 / 3.0, ffmt::G, 0, 0, "0.3"),
		(0.01, ffmt::G, void, fflags::SHOW_POINT, "0.01"),
		(1.0, ffmt::G, void, fflags::SHOW_POINT, "1.0"),
		(0.0001, ffmt::G, 0, 0, "1e-4"),
		(0.001, ffmt::G, 0, 0, "1e-3"),
		(0.00123, ffmt::G, 2, 0, "1.2e-3"),
		(0.01, ffmt::G, 5, 0, "0.01"), // trim trailing zeros
		(0.1, ffmt::G, 5, 0, "0.1"),
		(1.0, ffmt::G, 0, 0, "1"),
		(10.0, ffmt::G, 0, 0, "10"),
		(120.0, ffmt::G, 2, 0, "120"),
		(12000.0, ffmt::G, 2, 0, "1.2e4"),
		(0.0001, ffmt::G, 0, fflags::SHOW_POINT, "1.0e-4"),
		(0.001, ffmt::G, 0, fflags::SHOW_POINT, "1.0e-3"),
		(0.01, ffmt::G, 0, fflags::SHOW_POINT, "0.01"),
		(0.1, ffmt::G, 0, fflags::SHOW_POINT, "0.1"),
		(1.0, ffmt::G, 0, fflags::SHOW_POINT, "1.0"),
		(10.0, ffmt::G, 0, fflags::SHOW_POINT, "10.0"),
		(100.0, ffmt::G, 0, fflags::SHOW_POINT, "100.0"),
		(1000.0, ffmt::G, 0, fflags::SHOW_POINT, "1.0e3"),
		(0.0123, ffmt::G, 2, fflags::SHOW_POINT, "0.012"),
		(0.0123, ffmt::G, 5, fflags::SHOW_POINT, "0.0123"),
	];
	for (let i = 0z; i < len(tcs); i += 1) {
		const res64 = f64tosf(tcs[i].0, tcs[i].1, tcs[i].2, tcs[i].3);
		//defer free(res64);
		assert(res64 == tcs[i].4, res64);
		if (tcs[i].2 is void) {
			// void precision should guarantee that it parses back
			// to the original number.
			const back = stof64(res64)!;
			assert(math::isnan(back) == math::isnan(tcs[i].0));
			if (!math::isnan(back))
				assert(back == tcs[i].0);
		};

		const res32 = f32tosf(tcs[i].0: f32, tcs[i].1,
			tcs[i].2, tcs[i].3);
		defer free(res32);
		assert(res32 == tcs[i].4);
		if (tcs[i].2 is void) {
			const back = stof32(res32)!;
			assert(math::isnan(back) == math::isnan(tcs[i].0));
			if (!math::isnan(back))
				assert(back == tcs[i].0: f32);
		};
	};
	// These tests will only pass for f64
	const tcsf64: [](f64, ffmt, (void | uint), fflags, str) = [
		(9007199254740991.0, ffmt::F, void, 0, "9007199254740991"),
		(90071992547409915.0, ffmt::F, void, 0, "90071992547409920"),
		(90071992547409925.0, ffmt::F, void, 0, "90071992547409920"),
		(math::F64_MIN, ffmt::E, void, 0, "5e-324"),
		(math::F64_MIN, ffmt::E, void, fflags::SHOW_TWO_EXP_DIGITS, "5e-324"),
		(-math::F64_MIN, ffmt::E, void, 0, "-5e-324"),
		(math::F64_MIN_NORMAL, ffmt::E, void, 0, "2.2250738585072014e-308"),
		(math::F64_MAX_NORMAL, ffmt::E, void, 0, "1.7976931348623157e308"),
		(math::F64_MIN, ffmt::E, 2, 0, "4.94e-324"),
		(math::F64_MIN_NORMAL, ffmt::E, 0, 0, "2e-308"),
		(math::F64_MAX_NORMAL, ffmt::E, 3, 0, "1.798e308"),
	];
	for (let i = 0z; i < len(tcsf64); i += 1) {
		const res64 = f64tosf(tcsf64[i].0, tcsf64[i].1,
			tcsf64[i].2, tcsf64[i].3);
		defer free(res64);
		assert(res64 == tcsf64[i].4);
	};
	// These tests will only pass for f32
	const tcsf32: [](f32, ffmt, (void | uint), fflags, str) = [
		(math::F32_MIN, ffmt::G, void, 0, "1e-45"),
		(math::F32_MIN_NORMAL, ffmt::G, void, 0, "1.1754944e-38"),
		(math::F32_MAX_NORMAL, ffmt::G, void, 0, "3.4028235e38"),
	];
	for (let i = 0z; i < len(tcsf32); i += 1) {
		const res32 = f32tosf(tcsf32[i].0, tcsf32[i].1,
			tcsf32[i].2, tcsf32[i].3);
		defer free(res32);
		assert(res32 == tcsf32[i].4);
	};
	// Just make sure we can generate big numbers without breaking anything.
	const tcslen: [](f64, ffmt, (void | uint), fflags, size) = [
		(9007199254740991.0, ffmt::F, void, 0, 16),
		(-math::F64_MIN, ffmt::E, 100, 0, 108),
		(1.0, ffmt::F, 1000, 0, 1002),
		(2.22507385850720088902458687609E-308, ffmt::F, 1000, 0, 1002),
	];
	for (let i = 0z; i < len(tcslen); i += 1) {
		const res64 = f64tosf(tcslen[i].0, tcslen[i].1,
			tcslen[i].2, tcslen[i].3);
		defer free(res64);
		assert(len(res64) == tcslen[i].4);
	};
	assert(f64tos(13.37) == "13.37");
};
diff --git a/strconv/ftos.ha b/strconv/ftos.ha
index da996d33..5c725565 100644
--- a/strconv/ftos.ha
+++ b/strconv/ftos.ha
@@ -1,270 +1,48 @@
// License: MPL-2.0
// (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
// (c) 2021 Drew DeVault <sir@cmpwn.com>
// (c) 2021 Ember Sawady <ecs@d2evs.net>
// (c) 2021 Sudipto Mallick <smlckz@disroot.org>
// (c) 2021 Sudipto Mallick <smlckz@envs.net>
// (c) 2021 Vlad-Stefan Harbuz <vlad@vladh.net>

// Using Ryū: fast float-to-string conversion algorithm by Ulf Adams.
// https://doi.org/10.1145/3192366.3192369
// This Hare implementation is translated from the original
// C implementation here: https://github.com/ulfjack/ryu
// Uses Ryū for shortest, falls back to multiprecision for fixed precision.

use ascii;
use io;
use math;
use types;

type r128 = struct {
	hi: u64,
	lo: u64,
};

// TODO: use 128-bit integers when implemented
fn u128mul(a: u64, b: u64) r128 = {
	const a0 = a: u32: u64, a1 = a >> 32;
	const b0 = b: u32: u64, b1 = b >> 32;
	const p00 = a0 * b0, p01 = a0 * b1, p10 = a1 * b0, p11 = a1 * b1;
	const p00_lo = p00: u32: u64, p00_hi = p00 >> 32;
	const mid1 = p10 + p00_hi;
	const mid1_lo = mid1: u32: u64, mid1_hi = mid1 >> 32;
	const mid2 = p01 + mid1_lo;
	const mid2_lo = mid2: u32: u64, mid2_hi = mid2 >> 32;
	const r_hi = p11 + mid1_hi + mid2_hi;
	const r_lo = (mid2_lo << 32) | p00_lo;
	return r128 { hi = r_hi, lo = r_lo };
};

// TODO: Same as above
fn u128rshift(lo: u64, hi: u64, s: u32) u64 = {
	assert(0 <= s);
	assert(s <= 64);
	return (hi << (64 - s)) | (lo >> s);
};

fn pow5fac(value: u64) u32 = {
	const m_inv_5: u64 = 14757395258967641293; // 5 * m_inv_5 = 1 (mod 2^64)
	const n_div_5: u64 = 3689348814741910323;
	let count: u32 = 0;
	for (true) {
		assert(value != 0);
		value *= m_inv_5;
		if (value > n_div_5) break;
		count += 1;
	};
	return count;
};

fn pow5fac32(value: u32) u32 = {
	let count: u32 = 0;
	for (true) {
		assert(value != 0);
		const q = value / 5, r = value % 5;
		if (r != 0) break;
		value = q;
		count += 1;
	};
	return count;
};

fn ibool(b: bool) u8 = if (b) 1 else 0;

fn pow5multiple(v: u64, p: u32) bool = pow5fac(v) >= p;
fn pow5multiple32(v: u32, p: u32) bool = pow5fac32(v) >= p;

fn pow2multiple(v: u64, p: u32) bool = {
	assert(v > 0);
	assert(p < 64);
	return (v & ((1u64 << p) - 1)) == 0;
};

fn pow2multiple32(v: u32, p: u32) bool = {
	assert(v > 0);
	assert(p < 32);
	return (v & ((1u32 << p) - 1)) == 0;
};

fn mulshift64(m: u64, mul: (u64, u64), j: u32) u64 = {
	// m is maximum 55 bits
	let r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1);
	const sum = r1.lo + r0.hi;
	r1.hi += ibool(sum < r0.hi);
	return u128rshift(sum, r1.hi, j - 64);
};

fn mulshiftall64(m: u64, mul: (u64, u64), j: i32, mm_shift: u32) (u64, u64, u64) = {
	m <<= 1;
	const r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1);
	const lo = r0.lo, tmp = r0.hi, mid = tmp + r1.lo;
	const hi = r1.hi + ibool(mid < tmp);
	const lo2 = lo + mul.0;
	const mid2 = mid + mul.1 + ibool(lo2 < lo);
	const hi2 = hi + ibool(mid2 < mid);
	const v_plus = u128rshift(mid2, hi2, (j - 64 - 1): u32);
	const v_minus = if (mm_shift == 1) {
		const lo3 = lo - mul.0;
		const mid3 = mid - mul.1 - ibool(lo3 > lo);
		const hi3 = hi - ibool(mid3 > mid);
		yield u128rshift(mid3, hi3, (j - 64 - 1): u32);
	} else {
		const lo3 = lo + lo;
		const mid3 = mid + mid + ibool(lo3 < lo);
		const hi3 = hi + hi + ibool(mid3 < mid);
		const lo4 = lo3 - mul.0;
		const mid4 = mid3 - mul.1 - ibool(lo4 > lo3);
		const hi4 = hi3 - ibool(mid4 > mid3);
		yield u128rshift(mid4, hi4, (j - 64): u32);
	};
	const v_rounded = u128rshift(mid, hi, (j - 64 - 1): u32);
	return (v_plus, v_rounded, v_minus);
};

fn mulshift32(m: u32, a: u64, s: u32) u32 = {
	assert(s > 32);
	const a_lo = a: u32: u64, a_hi = a >> 32;
	const b0 = m * a_lo, b1 = m * a_hi;
	const sum = (b0 >> 32) + b1, ss = sum >> (s - 32);
	assert(ss <= types::U32_MAX);
	return ss: u32;
};

fn mulpow5inv_divpow2(m: u32, q: u32, j: i32) u32 = {
	const pow5 = f64computeinvpow5(q);
	return mulshift32(m, pow5.1 + 1, j: u32);
};

fn mulpow5_divpow2(m: u32, i: u32, j: i32) u32 = {
	const pow5 = f64computepow5(i);
	return mulshift32(m, pow5.1, j: u32);
};

fn log2pow5(e: u32) u32 = {
	assert(e <= 3528);
	return ((e * 1217359) >> 19);
};

fn ceil_log2pow5(e: u32) u32 = log2pow5(e) + 1;

fn pow5bits(e: u32) u32 = ceil_log2pow5(e);

fn log10pow2(e: u32) u32 = {
	assert(e <= 1650);
	return ((e * 78913) >> 18);
use memio;
use strings;

export type ffmt = enum {
	// Scientific notation. Consists of a number in [1, 10), an 'e' (or 'E',
	// if UPPER_EXP flag is present), then an exponent.
	E,
	// Fixed-point notation.
	F,
	// General format. Uses whichever of E and F is shortest, not accounting
	// for flags.
	G,
};

fn log10pow5(e: u32) u32 = {
	assert(e <= 2620);
	return ((e * 732923) >> 20);
export type fflags = enum uint {
	// Use a sign for both positive and negative numbers.
	SHOW_POS = 1 << 0,
	// Include at least one decimal digit.
	SHOW_POINT = 1 << 1,
	// Uppercase INFINITY and NAN.
	UPPERCASE = 1 << 2,
	// Uppercase exponent symbols E and P rather than e and p.
	UPPER_EXP = 1 << 3,
	// Use a sign for both positive and negative exponents.
	SHOW_POS_EXP = 1 << 4,
	// Show at least two digits of the exponent.
	SHOW_TWO_EXP_DIGITS = 1 << 5,
};

def F64_POW5_INV_BITCOUNT: u8 = 125;
def F64_POW5_BITCOUNT: u8 = 125;

def F32_POW5_INV_BITCOUNT: u8 = F64_POW5_INV_BITCOUNT - 64;
def F32_POW5_BITCOUNT: u8 = F64_POW5_BITCOUNT - 64;

const F64_POW5_INV_SPLIT2: [15][2]u64 = [
	[1, 2305843009213693952],
	[5955668970331000884, 1784059615882449851],
	[8982663654677661702, 1380349269358112757],
	[7286864317269821294, 2135987035920910082],
	[7005857020398200553, 1652639921975621497],
	[17965325103354776697, 1278668206209430417],
	[8928596168509315048, 1978643211784836272],
	[10075671573058298858, 1530901034580419511],
	[597001226353042382, 1184477304306571148],
	[1527430471115325346, 1832889850782397517],
	[12533209867169019542, 1418129833677084982],
	[5577825024675947042, 2194449627517475473],
	[11006974540203867551, 1697873161311732311],
	[10313493231639821582, 1313665730009899186],
	[12701016819766672773, 2032799256770390445],
];

const POW5_INV_OFFSETS: [19]u32 = [
	0x54544554, 0x04055545, 0x10041000, 0x00400414, 0x40010000, 0x41155555,
	0x00000454, 0x00010044, 0x40000000, 0x44000041, 0x50454450, 0x55550054,
	0x51655554, 0x40004000, 0x01000001, 0x00010500, 0x51515411, 0x05555554,
	0x00000000
];

const F64_POW5_SPLIT2: [13][2]u64 = [
	[0, 1152921504606846976],
	[0, 1490116119384765625],
	[1032610780636961552, 1925929944387235853],
	[7910200175544436838, 1244603055572228341],
	[16941905809032713930, 1608611746708759036],
	[13024893955298202172, 2079081953128979843],
	[6607496772837067824, 1343575221513417750],
	[17332926989895652603, 1736530273035216783],
	[13037379183483547984, 2244412773384604712],
	[1605989338741628675, 1450417759929778918],
	[9630225068416591280, 1874621017369538693],
	[665883850346957067, 1211445438634777304],
	[14931890668723713708, 1565756531257009982]
];

const POW5_OFFSETS: [21]u32 = [
	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x59695995,
	0x55545555, 0x56555515, 0x41150504, 0x40555410, 0x44555145, 0x44504540,
	0x45555550, 0x40004000, 0x96440440, 0x55565565, 0x54454045, 0x40154151,
	0x55559155, 0x51405555, 0x00000105
];

def POW5_TABLE_SZ: u8 = 26;

const POW5_TABLE: [POW5_TABLE_SZ]u64 = [
	1u64, 5u64, 25u64, 125u64, 625u64, 3125u64, 15625u64, 78125u64,
	390625u64, 1953125u64, 9765625u64, 48828125u64, 244140625u64,
	1220703125u64, 6103515625u64, 30517578125u64, 152587890625u64,
	762939453125u64, 3814697265625u64, 19073486328125u64, 95367431640625u64,
	476837158203125u64, 2384185791015625u64, 11920928955078125u64,
	59604644775390625u64, 298023223876953125u64 //, 1490116119384765625u64
];

fn f64computeinvpow5(i: u32) (u64, u64) = {
	const base = ((i + POW5_TABLE_SZ - 1) / POW5_TABLE_SZ): u32;
	const base2 = base * POW5_TABLE_SZ;
	const mul = F64_POW5_INV_SPLIT2[base];
	const off = base2 - i;
	if (off == 0) {
		return (mul[0], mul[1]);
	};
	const m = POW5_TABLE[off];
	const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0] - 1);
	let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo;
	const sum = high0 + low1;
	if (sum < high0) {
		high1 += 1;
	};
	const delta = pow5bits(base2) - pow5bits(i);
	const res0 = u128rshift(low0, sum, delta) + 1 +
		((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
	const res1 = u128rshift(sum, high1, delta);
	return (res0, res1);
};

fn f64computepow5(i: u32) (u64, u64) = {
	const base = i / POW5_TABLE_SZ, base2 = base * POW5_TABLE_SZ;
	const mul = F64_POW5_SPLIT2[base];
	const off = i - base2;
	if (off == 0) {
		return (mul[0], mul[1]);
	};
	const m = POW5_TABLE[off];
	const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0]);
	let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo;
	const sum = high0 + low1;
	if (sum < high0) {
		high1 += 1;
	};
	const delta = pow5bits(i) - pow5bits(base2);
	const res0 = u128rshift(low0, sum, delta) +
		((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
	const res1 = u128rshift(sum, high1, delta);
	return (res0, res1);
};
// Just for convenience... inline functions when?
fn ffpos(f: fflags) bool = f & fflags::SHOW_POS > 0;
fn ffpoint(f: fflags) bool = f & fflags::SHOW_POINT > 0;
fn ffcaps(f: fflags) bool = f & fflags::UPPERCASE > 0;
fn ffcaps_exp(f: fflags) bool = f & fflags::UPPER_EXP > 0;
fn ffpos_exp(f: fflags) bool = f & fflags::SHOW_POS_EXP > 0;
fn fftwodigs(f: fflags) bool = f & fflags::SHOW_TWO_EXP_DIGITS > 0;

fn declen(n: u64) u8 = {
fn declen(n: u64) uint = {
	assert(n <= 1e17);
	return if (n >= 1e17) 18
	else if (n >= 1e16) 17
@@ -286,341 +64,269 @@ fn declen(n: u64) u8 = {
	else 1;
};

type decf64 = struct {
	mantissa: u64,
	exponent: i32,
fn writestr(h: io::handle, s: str) (void | io::error) = {
	io::writeall(h, strings::toutf8(s))?;
};

fn f64todecf64(mantissa: u64, exponent: u32) decf64 = {
	let e2 = (math::F64_EXPONENT_BIAS + math::F64_MANTISSA_BITS + 2): i32;
	let m2: u64 = 0;
	if (exponent == 0) {
		e2 = 1 - e2;
		m2 = mantissa;
	} else {
		e2 = (exponent: i32) - e2;
		m2 = (1u64 << math::F64_MANTISSA_BITS) | mantissa;
	};
	const accept_bounds = (m2 & 1) == 0;
	const mv = 4 * m2;
	const mm_shift = ibool(mantissa != 0 || exponent <= 1);
	let vp: u64 = 0, vr: u64 = 0, vm: u64 = 0;
	let e10: i32 = 0;
	let vm_trailing_zeros = false, vr_trailing_zeros = false;
	if (e2 >= 0) {
		const q = log10pow2(e2: u32) - ibool(e2 > 3);
		e10 = q: i32;
		const k = F64_POW5_INV_BITCOUNT + pow5bits(q) - 1;
		const i = -e2 + (q + k): i32;
		let pow5 = f64computeinvpow5(q);
		const res = mulshiftall64(m2, pow5, i, mm_shift);
		vp = res.0; vr = res.1; vm = res.2;
		if (q <= 21) {
			if ((mv - 5 * (mv / 5)) == 0) {
				vr_trailing_zeros = pow5multiple(mv, q);
			} else if (accept_bounds) {
				vm_trailing_zeros = pow5multiple(mv - 1 - mm_shift, q);
			} else {
				vp -= ibool(pow5multiple(mv + 2, q));
			};
		};
	} else {
		const q = log10pow5((-e2): u32) - ibool(-e2 > 1);
		e10 = e2 + (q: i32);
		const i = -e2 - (q: i32);
		const k = pow5bits(i: u32): i32 - F64_POW5_BITCOUNT: i32;
		const j = (q: i32) - k;
		let pow5 = f64computepow5(i: u32);
		const res = mulshiftall64(m2, pow5, j, mm_shift);
		vp = res.0; vr = res.1; vm = res.2;
		if (q <= 1) {
			vr_trailing_zeros = true;
			if (accept_bounds) {
				vm_trailing_zeros = mm_shift == 1;
			} else {
				vp -= 1;
// XXX: this can likely be dedup'd with the other encode functions.
fn encode_zero(
	h: io::handle,
	f: ffmt,
	prec: (void | uint),
	flag: fflags,
) (void | io::error) = {
	memio::appendrune(h, '0')?;
	let hasdec = false;
	match (prec) {
	case let u: uint =>
		if (u > 0 && f != ffmt::G) {
			memio::appendrune(h, '.')?;
			for (let i = 0u; i < u; i += 1) {
				memio::appendrune(h, '0')?;
			};
		} else if (q < 63) {
			vr_trailing_zeros = pow2multiple(mv, q);
			hasdec = true;
		};
	case void => yield;
	};
	let removed: i32 = 0, last_removed_digit: u8 = 0;
	let output: u64 = 0;
	if (vm_trailing_zeros || vr_trailing_zeros) {
		for (true) {
			const vpby10 = vp / 10, vmby10 = vm / 10;
			if (vpby10 <= vmby10) break;
			const vmmod10 = (vm: u32) - 10 * (vmby10: u32);
			const vrby10 = vr / 10;
			const vrmod10 = (vr: u32) - 10 * (vrby10: u32);
			vm_trailing_zeros &&= vmmod10 == 0;
			vr_trailing_zeros &&= last_removed_digit == 0;
			last_removed_digit = vrmod10: u8;
			vr = vrby10; vp = vpby10; vm = vmby10;
			removed += 1;
		};
		if (vm_trailing_zeros) {
			for (true) {
				const vmby10 = vm / 10;
				const vmmod10 = (vm: u32) - 10 * (vmby10: u32);
				if (vmmod10 != 0) break;
				const vpby10 = vp / 10, vrby10 = vr / 10;
				const vrmod10 = (vr: u32) - 10 * (vrby10: u32);
				vr_trailing_zeros &&= last_removed_digit == 0;
				last_removed_digit = vrmod10: u8;
				vr = vrby10; vp = vpby10; vm = vmby10;
				removed += 1;
	if (!hasdec && ffpoint(flag)) {
		memio::appendrune(h, '.')?;
		memio::appendrune(h, '0')?;
	};
	if (f == ffmt::E) {
		memio::appendrune(h, if (ffcaps_exp(flag)) 'E' else 'e')?;
		if (ffpos_exp(flag))
			memio::appendrune(h, '+')?;
		memio::appendrune(h, '0')?;
		if (fftwodigs(flag))
			memio::appendrune(h, '0')?;
	};
};

fn encode_f_mp(
	m: *mp,
	h: io::handle,
	f: ffmt,
	prec: (void | uint),
	flag: fflags,
) (void | io::error) = {
	// we will loop from lo <= i < hi, printing either zeros or a digit.
	// lo is simple, but hi depends intricately on f, prec, and the
	// SHOW_POINT flag.
	const lo = if (m.dp <= 0) m.dp - 1 else 0;
	let hi = match (prec) {
	case void =>
		yield if (m.nd: int > m.dp) m.nd: int else m.dp;
	case let u: uint =>
		yield if (m.dp <= 0) lo + u: int + 1 else m.dp + u: int;
	};
	// ffmt::G: we need to remove trailing zeros
	if (f == ffmt::G) {
		// first, make sure we include at least prec digits
		if (prec is uint) {
			const p = prec as uint;
			if (m.dp <= 0 && hi < p: int) {
				hi = p: int;
			};
		};
		if (vr_trailing_zeros && last_removed_digit == 5 && (vr & 1 == 0)) {
			// round to even
			last_removed_digit = 4;
		// then, cut back to the decimal point or nd
		if (hi > m.nd: int && m.dp <= 0) {
			hi = m.nd: int;
		} else if (hi > m.dp && m.dp > 0) {
			hi = if (m.nd: int > m.dp) m.nd: int else m.dp;
		};
		output = vr + ibool((vr == vm &&
			(!accept_bounds || !vm_trailing_zeros)) ||
			last_removed_digit >= 5);
	} else {
		let round_up = false;
		const vpby100 = vp / 100, vmby100 = vm / 100;
		if (vpby100 > vmby100) {
			const vrby100 = vr / 100;
			const vrmod100 = (vr: u32) - 100 * (vrby100: u32);
			round_up = vrmod100 >= 50;
			vr = vrby100; vp = vpby100; vm = vmby100;
			removed += 2;
	};
	// SHOW_POINT: we need to go at least one past the decimal
	if (ffpoint(flag) && hi <= m.dp) {
		hi = m.dp + 1;
	};
	for (let i = lo; i < hi; i += 1) {
		if (i == m.dp) {
			memio::appendrune(h, '.')?;
		};
		for (true) {
			const vmby10 = vm / 10, vpby10 = vp / 10;
			if (vpby10 <= vmby10) break;
			const vrby10 = vr / 10;
			const vrmod10 = (vr: u32) - 10 * (vrby10: u32);
			round_up = vrmod10 >= 5;
			vr = vrby10; vp = vpby10; vm = vmby10;
			removed += 1;
		if (0 <= i && i < m.nd: int) {
			memio::appendrune(h, (m.buf[i] + '0'): rune)?;
		} else {
			memio::appendrune(h, '0')?;
		};
		output = vr + ibool(vr == vm || round_up);
	};
	const exp = e10 + removed;
	return decf64 { exponent = exp, mantissa = output };
};

type decf32 = struct {
	mantissa: u32,
	exponent: i32,
fn encode_e_mp(
	m: *mp,
	h: io::handle,
	f: ffmt,
	prec: (void | uint),
	flag: fflags,
) (void | io::error) = {
	assert(m.nd > 0);
	memio::appendrune(h, (m.buf[0] + '0'): rune)?;
	const zeros: uint = match (prec) {
	case void =>
		yield 0;
	case let u: uint =>
		yield switch (f) {
		case ffmt::G =>
			yield if (m.nd + 1 < u) u - m.nd + 1 else 0;
		case ffmt::E =>
			yield if (m.nd < u + 1) u - m.nd + 1 else 0;
		case => abort();
		};
	};
	if (m.nd <= 1 && ffpoint(flag) && zeros < 1) {
		zeros = 1;
	};
	if (m.nd > 1 || zeros > 0) {
		memio::appendrune(h, '.')?;
	};
	for (let i = 1z; i < m.nd; i += 1) {
		memio::appendrune(h, (m.buf[i] + '0'): rune)?;
	};
	for (let i = 0u; i < zeros; i += 1) {
		memio::appendrune(h, '0')?;
	};
	memio::appendrune(h, if (ffcaps_exp(flag)) 'E' else 'e')?;
	let e = m.dp - 1;
	if (e < 0) {
		e = -e;
		memio::appendrune(h, '-')?;
	} else if (ffpos_exp(flag)) {
		memio::appendrune(h, '+')?;
	};
	let ebuf: [3]u8 = [0...]; // max and min exponents are 3 digits
	let l = declen(e: u64);
	for (let i = 0z; i < l; i += 1) {
		ebuf[2 - i] = (e % 10): u8;
		e /= 10;
	};
	if (fftwodigs(flag) && l == 1) {
		l = 2;
	};
	for (let i = 3 - l; i < 3; i += 1) {
		memio::appendrune(h, (ebuf[i] + '0'): rune)?;
	};
};

fn f32todecf32(mantissa: u32, exponent: u32) decf32 = {
	let e2 = (math::F32_EXPONENT_BIAS + math::F32_MANTISSA_BITS + 2): i32;
	let m2: u32 = 0;
	if (exponent == 0) {
		e2 = 1 - e2;
		m2 = mantissa;
	} else {
		e2 = (exponent: i32) - e2;
		m2 = (1u32 << math::F32_MANTISSA_BITS: u32) | mantissa;
fn ftosf_handle_generic(
	n: (f32 | f64),
	h: io::handle,
	f: ffmt,
	prec: (void | uint),
	flag: fflags,
) (void | io::error) = {
	const (mantissa, exponent, sign, special) = match (n) {
	case let n: f64 =>
		const bits = math::f64bits(n);
		const mantissa = bits & math::F64_MANTISSA_MASK;
		const exponent = ((bits >> math::F64_MANTISSA_BITS) &
			math::F64_EXPONENT_MASK): u32;
		const sign = bits >> (math::F64_EXPONENT_BITS +
			math::F64_MANTISSA_BITS) > 0;
		const special = exponent == math::F64_EXPONENT_MASK;
		yield (mantissa, exponent, sign, special);
	case let n: f32 =>
		const bits = math::f32bits(n);
		const mantissa = bits & math::F32_MANTISSA_MASK;
		const exponent = ((bits >> math::F32_MANTISSA_BITS) &
			math::F32_EXPONENT_MASK): u32;
		const sign = bits >> (math::F32_EXPONENT_BITS +
			math::F32_MANTISSA_BITS) > 0;
		const special = exponent == math::F32_EXPONENT_MASK;
		yield (mantissa, exponent, sign, special);
	};
	const accept_bounds = (m2 & 1) == 0;
	const mv = 4 * m2, mp = mv + 2;
	const mm_shift = ibool(mantissa != 0 || exponent <= 1);
	const mm = mv - 1 - mm_shift;
	let vr: u32 = 0, vp: u32 = 0, vm: u32 = 0;
	let e10: i32 = 0;
	let vm_trailing_zeroes = false, vr_trailing_zeroes = false;
	let last_removed_digit: u8 = 0;
	if (e2 >= 0) {
		const q = log10pow2(e2: u32);
		e10 = q: i32;
		const k = F32_POW5_INV_BITCOUNT + pow5bits(q) - 1;
		const i = -e2 + (q + k): i32;
		vr = mulpow5inv_divpow2(mv, q, i);
		vp = mulpow5inv_divpow2(mp, q, i);
		vm = mulpow5inv_divpow2(mm, q, i);
		if (q != 0 && (vp - 1) / 10 <= vm / 10) {
			const l = F32_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1;
			last_removed_digit = (mulpow5inv_divpow2(mv, q - 1,
				-e2 + ((q + l): i32) - 1) % 10): u8;
		};
		if (q <= 9) {
			if (mv % 5 == 0) {
				vr_trailing_zeroes = pow5multiple32(mv, q);
			} else if (accept_bounds) {
				vm_trailing_zeroes = pow5multiple32(mm, q);
			} else {
				vp -= ibool(pow5multiple32(mp, q));
			};
		};
	} else {
		const q = log10pow5((-e2): u32);
		e10 = (q: i32) + e2;
		const i = (-e2 - (q: i32)): u32;
		const k = pow5bits(i) - F32_POW5_BITCOUNT;
		let j = (q: i32) - k: i32;
		vr = mulpow5_divpow2(mv, i, j);
		vp = mulpow5_divpow2(mp, i, j);
		vm = mulpow5_divpow2(mm, i, j);
		if (q != 0 && (vp - 1) / 10 <= vm / 10) {
			j = (q: i32) - 1 - (pow5bits(i + 1): i32 -
				F32_POW5_BITCOUNT: i32);
			last_removed_digit = (mulpow5_divpow2(mv,
				(i + 1), j) % 10): u8;
		};
		if (q <= 1) {
			vr_trailing_zeroes = true;
			if (accept_bounds) {
				vm_trailing_zeroes = mm_shift == 1;
			} else {
				vp -= 1;
			};
		} else if (q < 31) {
			vr_trailing_zeroes = pow2multiple32(mv, q - 1);
		};

	if (special && mantissa != 0) {
		return writestr(h, if (ffcaps(flag)) "NAN" else "nan");
	};
	let removed: i32 = 0, output: u32 = 0;
	if (vm_trailing_zeroes || vr_trailing_zeroes) {
		for (vp / 10 > vm / 10) {
			vm_trailing_zeroes &&= (vm - (vm / 10) * 10) == 0;
			vr_trailing_zeroes &&= last_removed_digit == 0;
			last_removed_digit = (vr % 10): u8;
			vr /= 10;
			vp /= 10;
			vm /= 10;
			removed += 1;
		};
		if (vm_trailing_zeroes) {
			for (vm % 10 == 0) {
				vr_trailing_zeroes &&= last_removed_digit == 0;
				last_removed_digit = (vr % 10): u8;
				vr /= 10;
				vp /= 10;
				vm /= 10;
				removed += 1;
			};
		};
		if (vr_trailing_zeroes && last_removed_digit == 5 && vr % 2 == 0) {
			// round to even
			last_removed_digit = 4;

	if (sign) {
		memio::appendrune(h, '-')?;
	} else if (ffpos(flag)) {
		memio::appendrune(h, '+')?;
	};

	if (special) {
		return writestr(h,
			if (ffcaps(flag)) "INFINITY" else "infinity");
	} else if (exponent == 0 && mantissa == 0) {
		return encode_zero(h, f, prec, flag);
	};

	let m = mp { ... };
	let ok = false;
	if (prec is void) {
		// shortest via Ryū
		const (mdec, edec) = match (n) {
		case f64 =>
			const d = f64todecf64(mantissa, exponent);
			yield (d.mantissa, d.exponent);
		case f32 =>
			const d = f32todecf32(mantissa: u32, exponent);
			yield (d.mantissa: u64, d.exponent);
		};
		output = vr + ibool((vr == vm &&
			(!accept_bounds || !vm_trailing_zeroes)) ||
			last_removed_digit >= 5);
	} else {
		for (vp / 10 > vm / 10) {
			last_removed_digit = (vr % 10): u8;
			vr /= 10;
			vp /= 10;
			vm /= 10;
			removed += 1;
		init_mp_dec(&m, mdec, edec);
		// if SHOW_POINT and we have too few digits, then we need to
		// fall back to multiprecision.
		ok = !ffpoint(flag) || m.dp < m.nd: int;
	};

	if (!ok) {
		// fall back to multiprecision
		match (n) {
		case f64 =>
			init_mp(&m, mantissa, exponent);
		case f32 =>
			init_mp(&m, mantissa: u32, exponent);
		};
		output = vr + ibool(vr == vm || last_removed_digit >= 5);
		trim_mp(&m);
		const nd = compute_round_mp(&m, f, prec, flag);
		round_mp(&m, nd);
	};
	const exp = e10 + removed;
	return decf32 { mantissa = output, exponent = exp };
};

def F32_DECIMAL_DIGITS: i32 = 9;
def F64_DECIMAL_DIGITS: i32 = 17;
	if (f == ffmt::G) {
		trim_mp(&m);
	};

fn encode_base10(buf: []u8, mantissa: u64, exponent: i32, digits: i32) size = {
	const n = mantissa, e = exponent, olen = declen(n);
	const exp = e + olen: i32 - 1;
	// use scientific notation for numbers whose exponent is beyond the
	// precision available for the underlying type
	if (exp > -4 && exp < digits) {
		if (e >= 0) {
			let k = exp;
			buf[k + 1] = '.';
			buf[k + 2] = '0';
			for (let a = e; a > 0; a -= 1) {
				buf[k] = '0';
				k -= 1;
			};
			let m = n;
			for (k >= 0; k -= 1) {
				const mby10 = m / 10;
				const mmod10 = (m: u32) - 10 * (mby10: u32);
				buf[k] = '0' + mmod10: u8;
				m = mby10;
			};
			return (e + olen: i32 + 2): size;
		} else if (exp < 0) {
			buf[0] = '0';
			buf[1] = '.';
			let k = -e + 1;
			let m = n;
			for (let a = olen: i32; a > 0; a -= 1) {
				const mby10 = m / 10;
				const mmod10 = (m: u32) - 10 * (mby10: u32);
				buf[k] = '0' + mmod10: u8;
				k -= 1;
				m = mby10;
			};
			for (k > 1; k -= 1) {
				buf[k] = '0';
			};
			return (-e + 2): size;
		} else {
			let k = olen: i32;
			let m = n;
			for (let a = -e; a > 0; a -= 1) {
				const mby10 = m / 10;
				const mmod10 = (m: u32) - 10 * (mby10: u32);
				buf[k] = '0' + mmod10: u8;
				k -= 1;
				m = mby10;
			};
			buf[k] = '.';
			k -= 1;
			for (k >= 0; k -= 1) {
				const mby10 = m / 10;
				const mmod10 = (m: u32) - 10 * (mby10: u32);
				buf[k] = '0' + mmod10: u8;
				m = mby10;
			};
			return (olen + 1): size;
		};
	if (f == ffmt::G && prec is uint) {
		if (prec as uint == 0)
			prec = 1u;
	};

	if (m.nd == 0) {
		// rounded to zero
		encode_zero(h, f, prec, flag)?;
	} else if (f == ffmt::E || (f == ffmt::G &&
			(m.dp < -1 || m.dp - m.nd: int > 2))) {
		encode_e_mp(&m, h, f, prec, flag)?;
	} else {
		let h: i32 = 0;
		let m = n;
		if (olen == 1) {
			buf[0] = '0' + m: u8;
			h = 1;
		} else {
			let k = olen: i32;
			for (let a = k; a > 1; a -= 1) {
				const mby10 = m / 10;
				const mmod10 = (m: u32) - 10 * (mby10: u32);
				buf[k] = '0' + mmod10: u8;
				k -= 1;
				m = mby10;
			};
			buf[k] = '.';
			buf[0] = '0' + m: u8;
			h = olen: i32 + 1;
		};
		buf[h] = 'e';
		h += 1;
		let ex = if (exp < 0) {
			buf[h] = '-';
			h += 1;
			yield -exp;
		} else exp;
		const elen = declen(ex: u64);
		let k = h + elen: i32 - 1;
		for (let a = elen: i32; a > 0; a -= 1) {
			const eby10 = ex / 10;
			const emod10 = (ex: u32) - 10 * (eby10: u32);
			buf[k] = '0' + emod10: u8;
			k -= 1;
			ex = eby10;
		};
		h += elen: i32;
		return h: size;
		encode_f_mp(&m, h, f, prec, flag)?;
	};
};

// Converts a f64 to a string in base 10 and writes it to the handle.  The
// definition of format, precision, and flags are as in [[floatingtosf]].
export fn f64tosf_handle(
	n: f64,
	h: io::handle,
	f: ffmt,
	prec: (void | uint),
	flag: fflags,
) (void | io::error) = {
	ftosf_handle_generic(n, h, f, prec, flag)?;
};

// Converts a f64 to a string in base 10. The return value must be freed. The
// definition of format, precision, and flags are as in [[floatingtosf]].
export fn f64tosf(n: f64, f: ffmt, prec: (void | uint), flag: fflags) str = {
	let m = memio::dynamic();
	ftosf_handle_generic(n, &m, f, prec, flag)!;
	return memio::string(&m)!;
};


// Converts a f64 to a string in base 10. The return value is statically
// allocated and will be overwritten on subsequent calls; see [[strings::dup]]
// to duplicate the result.
// to duplicate the result. The result is equivalent to [[f64tosf]] with format
// G and precision void.
export fn f64tos(n: f64) const str = {
	// The biggest string produced by a f64 number in base 10 would have the
	// negative sign, followed by a digit and decimal point, and then
@@ -628,111 +334,43 @@ export fn f64tos(n: f64) const str = {
	// sign and the maximum of three digits for exponent.
	// (1 + 1 + 1 + 16 + 1 + 1 + 3) = 24
	static let buf: [24]u8 = [0...];
	const bits = math::f64bits(n);
	const sign = (bits >> (math::F64_MANTISSA_BITS +
	math::F64_EXPONENT_BITS)): size; // bits >> 63
	const mantissa = bits & ((1u64 << math::F64_MANTISSA_BITS) - 1);
	const exponent = ((bits >> math::F64_MANTISSA_BITS) &
			(1u64 << math::F64_EXPONENT_BITS) - 1): u32;
	if (mantissa == 0 && exponent == 0) {
		return if (sign == 0) "0.0" else "-0.0";
	} else if (exponent == ((1 << math::F64_EXPONENT_BITS) - 1)) {
		if (mantissa != 0) {
			return "NaN";
		};
		return if (sign == 0) "Infinity" else "-Infinity";
	};
	const d = f64todecf64(mantissa, exponent);
	if (sign != 0) {
		buf[0] = '-';
	};
	let z = encode_base10(buf[sign..], d.mantissa, d.exponent,
			F64_DECIMAL_DIGITS) + sign;
	let s = types::string { data = &buf, length = z, ... };
	return *(&s: *str);
	let m = memio::fixed(buf);
	ftosf_handle_generic(n, &m, ffmt::G, void, 0)!;
	return memio::string(&m)!;
};

// Converts a f32 to a string in base 10 and writes it to the handle.  The
// definition of format, precision, and flags are as in [[floatingtosf]].
export fn f32tosf_handle(
	n: f32,
	h: io::handle,
	f: ffmt,
	prec: (void | uint),
	flag: fflags,
) (void | io::error) = {
	ftosf_handle_generic(n, h, f, prec, flag)?;
};

// Converts a f32 to a string in base 10. The return value must be freed. The
// definition of format, precision, and flags are as in [[floatingtosf]].
export fn f32tosf(n: f32, f: ffmt, prec: (void | uint), flag: fflags) str = {
	let m = memio::dynamic();
	ftosf_handle_generic(n, &m, f, prec, flag)!;
	return memio::string(&m)!;
};

// Converts a f32 to a string in base 10. The return value is statically
// allocated and will be overwritten on subsequent calls; see [[strings::dup]]
// to duplicate the result.
// to duplicate the result. The result is equivalent to [[f32tosf]] with format
// G and precision void.
export fn f32tos(n: f32) const str = {
	// The biggest string produced by a f32 number in base 10 would have the
	// The biggest string produced by a f64 number in base 10 would have the
	// negative sign, followed by a digit and decimal point, and then seven
	// more decimal digits, followed by 'e' and another negative sign and
	// the maximum of two digits for exponent.
	// (1 + 1 + 1 + 7 + 1 + 1 + 2) = 14
	static let buf: [16]u8 = [0...];
	const bits = math::f32bits(n);
	const sign = (bits >> (math::F32_MANTISSA_BITS +
	math::F32_EXPONENT_BITS)): size; // bits >> 31
	const mantissa = bits & ((1u32 << math::F32_MANTISSA_BITS) - 1): u32;
	const exponent = (bits >> math::F32_MANTISSA_BITS): u32 &
		((1u32 << math::F32_EXPONENT_BITS) - 1): u32;
	if (mantissa == 0 && exponent == 0) {
		return if (sign == 0) "0.0" else "-0.0";
	} else if (exponent == ((1 << math::F32_EXPONENT_BITS) - 1): u32) {
		if (mantissa != 0) {
			return "NaN";
		};
		return if (sign == 0) "Infinity" else "-Infinity";
	};
	const d = f32todecf32(mantissa, exponent);
	if (sign != 0) {
		buf[0] = '-';
	};
	let z = encode_base10(buf[sign..], d.mantissa, d.exponent,
			F32_DECIMAL_DIGITS) + sign;
	const s = types::string { data = &buf, length = z, ... };
	return *(&s: *str);
};

@test fn f64tos() void = {
	assert(f64tos(0.0) == "0.0");
	assert(f64tos(-0.0) == "-0.0");
	assert(f64tos(1.0 / 0.0) == "Infinity");
	assert(f64tos(-1.0 / 0.0) == "-Infinity");
	assert(f64tos(0.0 / 0.0) == "NaN");
	assert(f64tos(1.0) == "1.0");
	assert(f64tos(0.3) == "0.3");
	assert(f64tos(0.0031415) == "0.0031415");
	assert(f64tos(0.0000012345678) == "1.2345678e-6");
	assert(f64tos(1.414) == "1.414");
	assert(f64tos(1e234f64) == "1e234");
	assert(f64tos(1.2e-34) == "1.2e-34");
	assert(f64tos(-6345.9721) == "-6345.9721");
	assert(f64tos(1.23456789e67) == "1.23456789e67");
	assert(f64tos(11.2233445566778899e20) == "1.122334455667789e21");
	assert(f64tos(1000000.0e9) == "1000000000000000.0");
	assert(f64tos(9007199254740991.0) == "9007199254740991.0");
	assert(f64tos(90071992547409915.0) == "90071992547409920.0");
	assert(f64tos(90071992547409925.0) == "90071992547409920.0");
	assert(f64tos(5.0e-324) == "5e-324");
	assert(f64tos(2.2250738585072014e-308) == "2.2250738585072014e-308");
	assert(f64tos(1.7976931348623157e308) == "1.7976931348623157e308");
};

@test fn f32tos() void = {
	assert(f32tos(0.0) == "0.0");
	assert(f32tos(-0.0) == "-0.0");
	assert(f32tos(1.0 / 0.0) == "Infinity");
	assert(f32tos(-1.0 / 0.0) == "-Infinity");
	assert(f32tos(0.0 / 0.0) == "NaN");
	assert(f32tos(1.0) == "1.0");
	assert(f32tos(-8.0) == "-8.0");
	assert(f32tos(1.23) == "1.23");
	assert(f32tos(-0.618) == "-0.618");
	assert(f32tos(0.00456) == "0.00456");
	assert(f32tos(0.00000000000434655) == "4.34655e-12");
	assert(f32tos(123456.78) == "123456.78");
	assert(f32tos(-1.234567) == "-1.234567");
	assert(f32tos(12345.6789) == "12345.679");
	assert(f32tos(1.23e30) == "1.23e30");
	assert(f32tos(1.23e-30) == "1.23e-30");
	assert(f32tos(16777215.0) == "16777215.0");
	assert(f32tos(167772155.0) == "167772160.0");
	assert(f32tos(167772145.0) == "167772140.0");
	assert(f32tos(1.0e-45) == "1e-45");
	assert(f32tos(1.1754944e-38) == "1.1754944e-38");
	assert(f32tos(3.4028235e+38) == "3.4028235e38");
	static let buf: [14]u8 = [0...];
	let m = memio::fixed(buf);
	ftosf_handle_generic(n, &m, ffmt::G, void, 0)!;
	return memio::string(&m)!;
};

diff --git a/strconv/ftos_multiprecision.ha b/strconv/ftos_multiprecision.ha
new file mode 100644
index 00000000..b9d95e4c
--- /dev/null
+++ b/strconv/ftos_multiprecision.ha
@@ -0,0 +1,310 @@
// License: MPL-2.0

// Multiprecision float to string based on golang's strconv/decimal.go.

use ascii;
use math;
use strings;

type mp = struct {
	// Numbers 0-9, not ascii. Length for small numbers is
	// log10(mantissa * 5^-exp). Subnormal doubles have min exp -1074 and
	// max mantissa 4e16, giving at most 767 digits.
	buf: [768]u8,
	// Number of valid digits in buf. May be 0 if the number rounds to 0.
	nd: uint,
	// Decimal point index, may be negative.
	// -1 means 0.0ddd...
	// 0 means 0.ddd...
	// 1 means d.dd...
	// and so on
	dp: int,
};

// These come from golang. The index into the table is amount of shift, up to
// 60. The number is the count of new digits that will be added by the shift,
// but one fewer if the number's prefix is smaller than the string prefix.
//
// For example, leftcheats[2] is (1, "25"). Any number left shifted by 2 will
// therefore be 1 digit longer, or zero digits longer if its first two digits
// are smaller than 25.
const leftcheats: [](size, str) = [
	(0, ""),
	(1, "5"),
	(1, "25"),
	(1, "125"),
	(2, "625"),
	(2, "3125"),
	(2, "15625"),
	(3, "78125"),
	(3, "390625"),
	(3, "1953125"),
	(4, "9765625"),
	(4, "48828125"),
	(4, "244140625"),
	(4, "1220703125"),
	(5, "6103515625"),
	(5, "30517578125"),
	(5, "152587890625"),
	(6, "762939453125"),
	(6, "3814697265625"),
	(6, "19073486328125"),
	(7, "95367431640625"),
	(7, "476837158203125"),
	(7, "2384185791015625"),
	(7, "11920928955078125"),
	(8, "59604644775390625"),
	(8, "298023223876953125"),
	(8, "1490116119384765625"),
	(9, "7450580596923828125"),
	(9, "37252902984619140625"),
	(9, "186264514923095703125"),
	(10, "931322574615478515625"),
	(10, "4656612873077392578125"),
	(10, "23283064365386962890625"),
	(10, "116415321826934814453125"),
	(11, "582076609134674072265625"),
	(11, "2910383045673370361328125"),
	(11, "14551915228366851806640625"),
	(12, "72759576141834259033203125"),
	(12, "363797880709171295166015625"),
	(12, "1818989403545856475830078125"),
	(13, "9094947017729282379150390625"),
	(13, "45474735088646411895751953125"),
	(13, "227373675443232059478759765625"),
	(13, "1136868377216160297393798828125"),
	(14, "5684341886080801486968994140625"),
	(14, "28421709430404007434844970703125"),
	(14, "142108547152020037174224853515625"),
	(15, "710542735760100185871124267578125"),
	(15, "3552713678800500929355621337890625"),
	(15, "17763568394002504646778106689453125"),
	(16, "88817841970012523233890533447265625"),
	(16, "444089209850062616169452667236328125"),
	(16, "2220446049250313080847263336181640625"),
	(16, "11102230246251565404236316680908203125"),
	(17, "55511151231257827021181583404541015625"),
	(17, "277555756156289135105907917022705078125"),
	(17, "1387778780781445675529539585113525390625"),
	(18, "6938893903907228377647697925567626953125"),
	(18, "34694469519536141888238489627838134765625"),
	(18, "173472347597680709441192448139190673828125"),
	(19, "867361737988403547205962240695953369140625"),
];

fn prefix_less_than_mp(m: *mp, s: str) bool = {
	const u = strings::toutf8(s);
	for (let i = 0z; i < len(s); i += 1) {
		if (i >= m.nd)
			return true;
		if (m.buf[i] + '0': u8 != u[i])
			return m.buf[i] + '0': u8 < u[i];
	};
	return false;
};

// Shift left by k.
fn shl_mp(m: *mp, k: u64) void = {
	let delta = leftcheats[k].0;
	if (prefix_less_than_mp(m, leftcheats[k].1))
		delta -= 1;
	let r = (m.nd - 1): int;
	let w = m.nd + delta;
	let n = 0u64;
	for (r >= 0; r -= 1) {
		n += m.buf[r]: u64 << k;
		const quo = n / 10;
		const rem = n - 10 * quo;
		w -= 1;
		m.buf[w] = rem: u8;
		n = quo;
	};
	for (n > 0) {
		const quo = n / 10;
		const rem = n - 10 * quo;
		w -= 1;
		m.buf[w] = rem: u8;
		n = quo;
	};
	m.nd += delta: uint;
	m.dp += delta: int;
};

// Shift right by k.
fn shr_mp(m: *mp, k: u64) void = {
	let r = 0z;
	let w = 0z;
	let n = 0u64;
	const mask = (1 << k) - 1;

	for (n >> k == 0; r += 1) {
		if (r >= m.nd) {
			for (n >> k == 0) {
				n *= 10;
				r += 1;
			};
			break;
		};
		n = 10 * n + m.buf[r];
	};
	m.dp -= r: int - 1;

	for (r < m.nd; r += 1) {
		const c = m.buf[r];
		const dig = n >> k;
		n &= mask;
		m.buf[w] = dig: u8;
		w += 1;
		n = n * 10 + c;
	};

	for (n > 0; w += 1) {
		const dig = n >> k;
		n &= mask;
		m.buf[w] = dig: u8;
		n = n * 10;
	};
	m.nd = w: uint;
};

// Shift right (k < 0) or left (k > 0). We can only shift up to 60 at a time
// without losing bits, so break up big shifts.
fn shift_mp(m: *mp, k: int) void = {
	if (k < 0) {
		let nk = (-k): uint;
		for (nk > 60) {
			shr_mp(m, 60);
			nk -= 60;
		};
		shr_mp(m, nk);
	} else if (k > 0) {
		for (k > 60) {
			shl_mp(m, 60);
			k -= 60;
		};
		shl_mp(m, k: uint);
	};
};

fn init_mp(m: *mp, mantissa: (u64 | u32), exponent: u32) void = {
	const (eb, mb, mant) = match (mantissa) {
	case let u: u32 =>
		yield (math::F32_EXPONENT_BIAS, math::F32_MANTISSA_BITS, u: u64);
	case let u: u64 =>
		yield (math::F64_EXPONENT_BIAS, math::F64_MANTISSA_BITS, u);
	};
	let e2 = (eb + mb): i32;
	let m2: u64 = 0;
	if (exponent == 0) {
		e2 = 1 - e2;
		m2 = mant;
	} else {
		e2 = (exponent: i32) - e2;
		m2 = (1u64 << mb) | mant;
	};

	m.nd = declen(m2);
	m.dp = m.nd: int;
	for (let i = 0z; i < m.nd; i += 1) {
		m.buf[m.nd - i - 1] = (m2 % 10): u8;
		m2 /= 10;
	};
	shift_mp(m, e2);
};

fn init_mp_dec(m: *mp, mantissa: u64, exponent: i32) void = {
	const dl = declen(mantissa);
	for (let i = 0u; i < dl; i += 1) {
		m.buf[dl - i - 1] = (mantissa % 10): u8;
		mantissa /= 10;
	};
	m.nd = dl;
	m.dp = dl: i32 + exponent;
};

fn round_up_mp(m: *mp) void = {
	for (let i = 1z; i <= m.nd; i += 1) {
		if (m.buf[m.nd - i] < 9) {
			m.buf[m.nd - i] += 1;
			return;
		} else {
			m.buf[m.nd - i] =  0;
		};
	};
	// All high
	m.buf[0] = 1;
	m.nd = 1;
	m.dp += 1;
};

// Compute the number of figs to round to for the given arguments.
fn compute_round_mp(m: *mp, f: ffmt, prec: (void | uint), flag: fflags) uint = {
	// nd is the number of sig figs that we want to end up with
	let nd: int = match (prec) {
	case void =>
		// we should only get here if Ryu did not extend past the
		// decimal point
		assert(ffpoint(flag));
		yield m.nd: int + (if (m.dp > 0) m.dp else 0);
	case let u: uint =>
		yield switch (f) {
		case ffmt::E =>
			yield u: int + 1;
		case ffmt::F =>
			yield u: int + m.dp;
		case ffmt::G =>
			yield if (u == 0) 1 else u: int;
		};
	};
	const nde = if (nd < 2) 2 else nd;
	const ndf = if (m.dp >= 0 && nd < m.dp + 1) m.dp + 1 else nd;
	if (ffpoint(flag)) {
		nd = switch (f) {
		case ffmt::E =>
			// need at least two digits, d.de0.
			yield nde;
		case ffmt::F =>
			// need enough to clear the decimal point by one.
			yield ndf;
		case ffmt::G =>
			// XXX: dup'd with the condition in ftosf_handle
			if (m.dp < -1 || m.dp - m.nd: int > 2)
				yield nde;
			yield ndf;
		};
	};
	if (nd <= 0) {
		nd = 0;
	};
	return if (nd: uint > m.nd) m.nd else nd: uint;
};

fn round_mp(m: *mp, nd: uint) void = {
	assert(nd <= m.nd);
	if (nd == m.nd)
		return;
	const oldnd = m.nd;
	m.nd = nd;
	if (m.buf[nd] > 5) {
		round_up_mp(m);
	} else if (m.buf[nd] == 5) {
		let gt = false;
		for (let i = m.nd + 1; i < oldnd; i += 1) {
			if (m.buf[i] > 0) {
				round_up_mp(m);
				gt = true;
				break;
			};
		};
		if (!gt && nd > 0 && m.buf[nd - 1] & 1 > 0) {
			round_up_mp(m);
		};
	};
};

// Remove trailing zeros.
fn trim_mp(m: *mp) void = {
	for (m.nd > 1 && m.buf[m.nd - 1] == 0) {
		m.nd -= 1;
	};
};
diff --git a/strconv/ftos_ryu.ha b/strconv/ftos_ryu.ha
new file mode 100644
index 00000000..fccc9ba2
--- /dev/null
+++ b/strconv/ftos_ryu.ha
@@ -0,0 +1,508 @@
// License: MPL-2.0
// (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
// (c) 2021 Drew DeVault <sir@cmpwn.com>
// (c) 2021 Ember Sawady <ecs@d2evs.net>
// (c) 2021 Sudipto Mallick <smlckz@disroot.org>
// (c) 2021 Sudipto Mallick <smlckz@envs.net>
// (c) 2021 Vlad-Stefan Harbuz <vlad@vladh.net>

// Using Ryū: fast float-to-string conversion algorithm by Ulf Adams.
// https://doi.org/10.1145/3192366.3192369
// This Hare implementation is translated from the original
// C implementation here: https://github.com/ulfjack/ryu

use ascii;
use math;
use types;

type r128 = struct {
	hi: u64,
	lo: u64,
};

// TODO: use 128-bit integers when implemented
fn u128mul(a: u64, b: u64) r128 = {
	const a0 = a: u32: u64, a1 = a >> 32;
	const b0 = b: u32: u64, b1 = b >> 32;
	const p00 = a0 * b0, p01 = a0 * b1, p10 = a1 * b0, p11 = a1 * b1;
	const p00_lo = p00: u32: u64, p00_hi = p00 >> 32;
	const mid1 = p10 + p00_hi;
	const mid1_lo = mid1: u32: u64, mid1_hi = mid1 >> 32;
	const mid2 = p01 + mid1_lo;
	const mid2_lo = mid2: u32: u64, mid2_hi = mid2 >> 32;
	const r_hi = p11 + mid1_hi + mid2_hi;
	const r_lo = (mid2_lo << 32) | p00_lo;
	return r128 { hi = r_hi, lo = r_lo };
};

// TODO: Same as above
fn u128rshift(lo: u64, hi: u64, s: u32) u64 = {
	assert(0 <= s);
	assert(s <= 64);
	return (hi << (64 - s)) | (lo >> s);
};

fn pow5fac(value: u64) u32 = {
	const m_inv_5: u64 = 14757395258967641293; // 5 * m_inv_5 = 1 (mod 2^64)
	const n_div_5: u64 = 3689348814741910323;
	let count: u32 = 0;
	for (true) {
		assert(value != 0);
		value *= m_inv_5;
		if (value > n_div_5) break;
		count += 1;
	};
	return count;
};

fn pow5fac32(value: u32) u32 = {
	let count: u32 = 0;
	for (true) {
		assert(value != 0);
		const q = value / 5, r = value % 5;
		if (r != 0) break;
		value = q;
		count += 1;
	};
	return count;
};

fn ibool(b: bool) u8 = if (b) 1 else 0;

fn pow5multiple(v: u64, p: u32) bool = pow5fac(v) >= p;
fn pow5multiple32(v: u32, p: u32) bool = pow5fac32(v) >= p;

fn pow2multiple(v: u64, p: u32) bool = {
	assert(v > 0);
	assert(p < 64);
	return (v & ((1u64 << p) - 1)) == 0;
};

fn pow2multiple32(v: u32, p: u32) bool = {
	assert(v > 0);
	assert(p < 32);
	return (v & ((1u32 << p) - 1)) == 0;
};

fn mulshift64(m: u64, mul: (u64, u64), j: u32) u64 = {
	// m is maximum 55 bits
	let r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1);
	const sum = r1.lo + r0.hi;
	r1.hi += ibool(sum < r0.hi);
	return u128rshift(sum, r1.hi, j - 64);
};

fn mulshiftall64(
	m: u64,
	mul: (u64, u64),
	j: i32,
	mm_shift: u32,
) (u64, u64, u64) = {
	m <<= 1;
	const r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1);
	const lo = r0.lo, tmp = r0.hi, mid = tmp + r1.lo;
	const hi = r1.hi + ibool(mid < tmp);
	const lo2 = lo + mul.0;
	const mid2 = mid + mul.1 + ibool(lo2 < lo);
	const hi2 = hi + ibool(mid2 < mid);
	const v_plus = u128rshift(mid2, hi2, (j - 64 - 1): u32);
	const v_minus = if (mm_shift == 1) {
		const lo3 = lo - mul.0;
		const mid3 = mid - mul.1 - ibool(lo3 > lo);
		const hi3 = hi - ibool(mid3 > mid);
		yield u128rshift(mid3, hi3, (j - 64 - 1): u32);
	} else {
		const lo3 = lo + lo;
		const mid3 = mid + mid + ibool(lo3 < lo);
		const hi3 = hi + hi + ibool(mid3 < mid);
		const lo4 = lo3 - mul.0;
		const mid4 = mid3 - mul.1 - ibool(lo4 > lo3);
		const hi4 = hi3 - ibool(mid4 > mid3);
		yield u128rshift(mid4, hi4, (j - 64): u32);
	};
	const v_rounded = u128rshift(mid, hi, (j - 64 - 1): u32);
	return (v_plus, v_rounded, v_minus);
};

fn mulshift32(m: u32, a: u64, s: u32) u32 = {
	assert(s > 32);
	const a_lo = a: u32: u64, a_hi = a >> 32;
	const b0 = m * a_lo, b1 = m * a_hi;
	const sum = (b0 >> 32) + b1, ss = sum >> (s - 32);
	assert(ss <= types::U32_MAX);
	return ss: u32;
};

fn mulpow5inv_divpow2(m: u32, q: u32, j: i32) u32 = {
	const pow5 = f64computeinvpow5(q);
	return mulshift32(m, pow5.1 + 1, j: u32);
};

fn mulpow5_divpow2(m: u32, i: u32, j: i32) u32 = {
	const pow5 = f64computepow5(i);
	return mulshift32(m, pow5.1, j: u32);
};

fn log2pow5(e: u32) u32 = {
	assert(e <= 3528);
	return ((e * 1217359) >> 19);
};

fn ceil_log2pow5(e: u32) u32 = log2pow5(e) + 1;

fn pow5bits(e: u32) u32 = ceil_log2pow5(e);

fn log10pow2(e: u32) u32 = {
	assert(e <= 1650);
	return ((e * 78913) >> 18);
};

fn log10pow5(e: u32) u32 = {
	assert(e <= 2620);
	return ((e * 732923) >> 20);
};

def F64_POW5_INV_BITCOUNT: u8 = 125;
def F64_POW5_BITCOUNT: u8 = 125;

def F32_POW5_INV_BITCOUNT: u8 = F64_POW5_INV_BITCOUNT - 64;
def F32_POW5_BITCOUNT: u8 = F64_POW5_BITCOUNT - 64;

const F64_POW5_INV_SPLIT2: [15][2]u64 = [
	[1, 2305843009213693952],
	[5955668970331000884, 1784059615882449851],
	[8982663654677661702, 1380349269358112757],
	[7286864317269821294, 2135987035920910082],
	[7005857020398200553, 1652639921975621497],
	[17965325103354776697, 1278668206209430417],
	[8928596168509315048, 1978643211784836272],
	[10075671573058298858, 1530901034580419511],
	[597001226353042382, 1184477304306571148],
	[1527430471115325346, 1832889850782397517],
	[12533209867169019542, 1418129833677084982],
	[5577825024675947042, 2194449627517475473],
	[11006974540203867551, 1697873161311732311],
	[10313493231639821582, 1313665730009899186],
	[12701016819766672773, 2032799256770390445],
];

const POW5_INV_OFFSETS: [19]u32 = [
	0x54544554, 0x04055545, 0x10041000, 0x00400414, 0x40010000, 0x41155555,
	0x00000454, 0x00010044, 0x40000000, 0x44000041, 0x50454450, 0x55550054,
	0x51655554, 0x40004000, 0x01000001, 0x00010500, 0x51515411, 0x05555554,
	0x00000000
];

const F64_POW5_SPLIT2: [13][2]u64 = [
	[0, 1152921504606846976],
	[0, 1490116119384765625],
	[1032610780636961552, 1925929944387235853],
	[7910200175544436838, 1244603055572228341],
	[16941905809032713930, 1608611746708759036],
	[13024893955298202172, 2079081953128979843],
	[6607496772837067824, 1343575221513417750],
	[17332926989895652603, 1736530273035216783],
	[13037379183483547984, 2244412773384604712],
	[1605989338741628675, 1450417759929778918],
	[9630225068416591280, 1874621017369538693],
	[665883850346957067, 1211445438634777304],
	[14931890668723713708, 1565756531257009982]
];

const POW5_OFFSETS: [21]u32 = [
	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x59695995,
	0x55545555, 0x56555515, 0x41150504, 0x40555410, 0x44555145, 0x44504540,
	0x45555550, 0x40004000, 0x96440440, 0x55565565, 0x54454045, 0x40154151,
	0x55559155, 0x51405555, 0x00000105
];

def POW5_TABLE_SZ: u8 = 26;

const POW5_TABLE: [POW5_TABLE_SZ]u64 = [
	1u64, 5u64, 25u64, 125u64, 625u64, 3125u64, 15625u64, 78125u64,
	390625u64, 1953125u64, 9765625u64, 48828125u64, 244140625u64,
	1220703125u64, 6103515625u64, 30517578125u64, 152587890625u64,
	762939453125u64, 3814697265625u64, 19073486328125u64, 95367431640625u64,
	476837158203125u64, 2384185791015625u64, 11920928955078125u64,
	59604644775390625u64, 298023223876953125u64 //, 1490116119384765625u64
];

fn f64computeinvpow5(i: u32) (u64, u64) = {
	const base = ((i + POW5_TABLE_SZ - 1) / POW5_TABLE_SZ): u32;
	const base2 = base * POW5_TABLE_SZ;
	const mul = F64_POW5_INV_SPLIT2[base];
	const off = base2 - i;
	if (off == 0) {
		return (mul[0], mul[1]);
	};
	const m = POW5_TABLE[off];
	const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0] - 1);
	let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo;
	const sum = high0 + low1;
	if (sum < high0) {
		high1 += 1;
	};
	const delta = pow5bits(base2) - pow5bits(i);
	const res0 = u128rshift(low0, sum, delta) + 1 +
		((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
	const res1 = u128rshift(sum, high1, delta);
	return (res0, res1);
};

fn f64computepow5(i: u32) (u64, u64) = {
	const base = i / POW5_TABLE_SZ, base2 = base * POW5_TABLE_SZ;
	const mul = F64_POW5_SPLIT2[base];
	const off = i - base2;
	if (off == 0) {
		return (mul[0], mul[1]);
	};
	const m = POW5_TABLE[off];
	const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0]);
	let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo;
	const sum = high0 + low1;
	if (sum < high0) {
		high1 += 1;
	};
	const delta = pow5bits(i) - pow5bits(base2);
	const res0 = u128rshift(low0, sum, delta) +
		((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
	const res1 = u128rshift(sum, high1, delta);
	return (res0, res1);
};

type decf64 = struct {
	mantissa: u64,
	exponent: i32,
};

fn f64todecf64(mantissa: u64, exponent: u32) decf64 = {
	let e2 = (math::F64_EXPONENT_BIAS + math::F64_MANTISSA_BITS + 2): i32;
	let m2: u64 = 0;
	if (exponent == 0) {
		e2 = 1 - e2;
		m2 = mantissa;
	} else {
		e2 = (exponent: i32) - e2;
		m2 = (1u64 << math::F64_MANTISSA_BITS) | mantissa;
	};
	const accept_bounds = (m2 & 1) == 0;
	const mv = 4 * m2;
	const mm_shift = ibool(mantissa != 0 || exponent <= 1);
	let vp: u64 = 0, vr: u64 = 0, vm: u64 = 0;
	let e10: i32 = 0;
	let vm_trailing_zeros = false, vr_trailing_zeros = false;
	if (e2 >= 0) {
		const q = log10pow2(e2: u32) - ibool(e2 > 3);
		e10 = q: i32;
		const k = F64_POW5_INV_BITCOUNT + pow5bits(q) - 1;
		const i = -e2 + (q + k): i32;
		let pow5 = f64computeinvpow5(q);
		const res = mulshiftall64(m2, pow5, i, mm_shift);
		vp = res.0; vr = res.1; vm = res.2;
		if (q <= 21) {
			if ((mv - 5 * (mv / 5)) == 0) {
				vr_trailing_zeros = pow5multiple(mv, q);
			} else if (accept_bounds) {
				vm_trailing_zeros = pow5multiple(mv - 1 -
					mm_shift, q);
			} else {
				vp -= ibool(pow5multiple(mv + 2, q));
			};
		};
	} else {
		const q = log10pow5((-e2): u32) - ibool(-e2 > 1);
		e10 = e2 + (q: i32);
		const i = -e2 - (q: i32);
		const k = pow5bits(i: u32): i32 - F64_POW5_BITCOUNT: i32;
		const j = (q: i32) - k;
		let pow5 = f64computepow5(i: u32);
		const res = mulshiftall64(m2, pow5, j, mm_shift);
		vp = res.0; vr = res.1; vm = res.2;
		if (q <= 1) {
			vr_trailing_zeros = true;
			if (accept_bounds) {
				vm_trailing_zeros = mm_shift == 1;
			} else {
				vp -= 1;
			};
		} else if (q < 63) {
			vr_trailing_zeros = pow2multiple(mv, q);
		};
	};
	let removed: i32 = 0, last_removed_digit: u8 = 0;
	let output: u64 = 0;
	if (vm_trailing_zeros || vr_trailing_zeros) {
		for (true) {
			const vpby10 = vp / 10, vmby10 = vm / 10;
			if (vpby10 <= vmby10) break;
			const vmmod10 = (vm: u32) - 10 * (vmby10: u32);
			const vrby10 = vr / 10;
			const vrmod10 = (vr: u32) - 10 * (vrby10: u32);
			vm_trailing_zeros &&= vmmod10 == 0;
			vr_trailing_zeros &&= last_removed_digit == 0;
			last_removed_digit = vrmod10: u8;
			vr = vrby10; vp = vpby10; vm = vmby10;
			removed += 1;
		};
		if (vm_trailing_zeros) {
			for (true) {
				const vmby10 = vm / 10;
				const vmmod10 = (vm: u32) - 10 * (vmby10: u32);
				if (vmmod10 != 0) break;
				const vpby10 = vp / 10, vrby10 = vr / 10;
				const vrmod10 = (vr: u32) - 10 * (vrby10: u32);
				vr_trailing_zeros &&= last_removed_digit == 0;
				last_removed_digit = vrmod10: u8;
				vr = vrby10; vp = vpby10; vm = vmby10;
				removed += 1;
			};
		};
		if (vr_trailing_zeros && last_removed_digit == 5 &&
				(vr & 1 == 0)) {
			// round to even
			last_removed_digit = 4;
		};
		output = vr + ibool((vr == vm &&
			(!accept_bounds || !vm_trailing_zeros)) ||
			last_removed_digit >= 5);
	} else {
		let round_up = false;
		const vpby100 = vp / 100, vmby100 = vm / 100;
		if (vpby100 > vmby100) {
			const vrby100 = vr / 100;
			const vrmod100 = (vr: u32) - 100 * (vrby100: u32);
			round_up = vrmod100 >= 50;
			vr = vrby100; vp = vpby100; vm = vmby100;
			removed += 2;
		};
		for (true) {
			const vmby10 = vm / 10, vpby10 = vp / 10;
			if (vpby10 <= vmby10) break;
			const vrby10 = vr / 10;
			const vrmod10 = (vr: u32) - 10 * (vrby10: u32);
			round_up = vrmod10 >= 5;
			vr = vrby10; vp = vpby10; vm = vmby10;
			removed += 1;
		};
		output = vr + ibool(vr == vm || round_up);
	};
	const exp = e10 + removed;
	return decf64 { exponent = exp, mantissa = output };
};

type decf32 = struct {
	mantissa: u32,
	exponent: i32,
};

fn f32todecf32(mantissa: u32, exponent: u32) decf32 = {
	let e2 = (math::F32_EXPONENT_BIAS + math::F32_MANTISSA_BITS + 2): i32;
	let m2: u32 = 0;
	if (exponent == 0) {
		e2 = 1 - e2;
		m2 = mantissa;
	} else {
		e2 = (exponent: i32) - e2;
		m2 = (1u32 << math::F32_MANTISSA_BITS: u32) | mantissa;
	};
	const accept_bounds = (m2 & 1) == 0;
	const mv = 4 * m2, mp = mv + 2;
	const mm_shift = ibool(mantissa != 0 || exponent <= 1);
	const mm = mv - 1 - mm_shift;
	let vr: u32 = 0, vp: u32 = 0, vm: u32 = 0;
	let e10: i32 = 0;
	let vm_trailing_zeroes = false, vr_trailing_zeroes = false;
	let last_removed_digit: u8 = 0;
	if (e2 >= 0) {
		const q = log10pow2(e2: u32);
		e10 = q: i32;
		const k = F32_POW5_INV_BITCOUNT + pow5bits(q) - 1;
		const i = -e2 + (q + k): i32;
		vr = mulpow5inv_divpow2(mv, q, i);
		vp = mulpow5inv_divpow2(mp, q, i);
		vm = mulpow5inv_divpow2(mm, q, i);
		if (q != 0 && (vp - 1) / 10 <= vm / 10) {
			const l = F32_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1;
			last_removed_digit = (mulpow5inv_divpow2(mv, q - 1,
				-e2 + ((q + l): i32) - 1) % 10): u8;
		};
		if (q <= 9) {
			if (mv % 5 == 0) {
				vr_trailing_zeroes = pow5multiple32(mv, q);
			} else if (accept_bounds) {
				vm_trailing_zeroes = pow5multiple32(mm, q);
			} else {
				vp -= ibool(pow5multiple32(mp, q));
			};
		};
	} else {
		const q = log10pow5((-e2): u32);
		e10 = (q: i32) + e2;
		const i = (-e2 - (q: i32)): u32;
		const k = pow5bits(i) - F32_POW5_BITCOUNT;
		let j = (q: i32) - k: i32;
		vr = mulpow5_divpow2(mv, i, j);
		vp = mulpow5_divpow2(mp, i, j);
		vm = mulpow5_divpow2(mm, i, j);
		if (q != 0 && (vp - 1) / 10 <= vm / 10) {
			j = (q: i32) - 1 - (pow5bits(i + 1): i32 -
				F32_POW5_BITCOUNT: i32);
			last_removed_digit = (mulpow5_divpow2(mv,
				(i + 1), j) % 10): u8;
		};
		if (q <= 1) {
			vr_trailing_zeroes = true;
			if (accept_bounds) {
				vm_trailing_zeroes = mm_shift == 1;
			} else {
				vp -= 1;
			};
		} else if (q < 31) {
			vr_trailing_zeroes = pow2multiple32(mv, q - 1);
		};
	};
	let removed: i32 = 0, output: u32 = 0;
	if (vm_trailing_zeroes || vr_trailing_zeroes) {
		for (vp / 10 > vm / 10) {
			vm_trailing_zeroes &&= (vm - (vm / 10) * 10) == 0;
			vr_trailing_zeroes &&= last_removed_digit == 0;
			last_removed_digit = (vr % 10): u8;
			vr /= 10;
			vp /= 10;
			vm /= 10;
			removed += 1;
		};
		if (vm_trailing_zeroes) {
			for (vm % 10 == 0) {
				vr_trailing_zeroes &&= last_removed_digit == 0;
				last_removed_digit = (vr % 10): u8;
				vr /= 10;
				vp /= 10;
				vm /= 10;
				removed += 1;
			};
		};
		if (vr_trailing_zeroes && last_removed_digit == 5 &&
				vr % 2 == 0) {
			// round to even
			last_removed_digit = 4;
		};
		output = vr + ibool((vr == vm &&
			(!accept_bounds || !vm_trailing_zeroes)) ||
			last_removed_digit >= 5);
	} else {
		for (vp / 10 > vm / 10) {
			last_removed_digit = (vr % 10): u8;
			vr /= 10;
			vp /= 10;
			vm /= 10;
			removed += 1;
		};
		output = vr + ibool(vr == vm || last_removed_digit >= 5);
	};
	const exp = e10 + removed;
	return decf32 { mantissa = output, exponent = exp };
};

def F32_DECIMAL_DIGITS: i32 = 9;
def F64_DECIMAL_DIGITS: i32 = 17;
diff --git a/strconv/ftostof+test.ha b/strconv/ftostof+test.ha
deleted file mode 100644
index 1264e7b0..00000000
--- a/strconv/ftostof+test.ha
@@ -1,30 +0,0 @@
use math;

// test round-trip accuracy of ftos followed by stof.
@test fn ftostof() void = {
	const tcs: []f64 = [
		1.0,
		0.0,
		0.1,
		-0.0,
		1f64 / 3f64,
		4f64 / 3f64,

		1f64 / 0f64,
		-1f64 / 0f64,

		0f64 / 0f64,
	];
	for (let i = 0z; i < len(tcs); i += 1) {
		const res64 = stof64(f64tos(tcs[i]))!;
		const res32 = stof32(f32tos(tcs[i]: f32))!;
		// there can be multiple NaNs. only test isnan.
		if (math::isnan(tcs[i])) {
			assert(math::isnan(res64));
			assert(math::isnan(res32));
		} else {
			assert(tcs[i] == res64);
			assert(tcs[i]: f32 == res32);
		};
	};
};
diff --git a/strconv/numeric.ha b/strconv/numeric.ha
index e38e510e..bd45009f 100644
--- a/strconv/numeric.ha
+++ b/strconv/numeric.ha
@@ -86,6 +86,28 @@ export fn floatingtosb(n: types::floating, b: base) const str = {
// [[strings::dup]] to duplicate the result.
export fn floatingtos(n: types::floating) const str = floatingtosb(n, base::DEC);

// Converts any [[types::floating]] to a string in base 10. The return value
// must be freed.
//
// A precision of void yields the smallest number of digits that can be parsed
// into the exact same number. Otherwise, the meaning depends on f:
// - ffmt::F, ffmt::E: Number of digits after the decimal point.
// - ffmt::G: Number of significant digits. 0 is equivalent to 1 precision, and
//   trailing zeros are removed.
export fn floatingtosf(
	n: types::floating,
	f: ffmt,
	prec: (void | uint),
	flag: fflags,
) str = {
	match (n) {
	case let v: f32 =>
		return f32tosf(v, f, prec, flag);
	case let v: f64 =>
		return f64tosf(v, f, prec, flag);
	};
};

// Converts any [[types::numeric]] to a string in a given base. The return value
// is statically allocated and will be overwritten on subsequent calls; see
// [[strings::dup]] to duplicate the result.
-- 
2.42.0
hare/patches: SUCCESS in 1m36s

[strconv: implement ftosf.][0] v3 from [Joe Finney][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/44928
[1]: mailto:me@spxtr.net

✓ #1060798 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1060798
✓ #1060797 SUCCESS hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1060797