~sircmpwn/hare-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
2 2

[PATCH harec v4] lex: allow digit separators

Details
Message ID
<20240414233754.92305-2-malloryadams@fastmail.com>
DKIM signature
pass
Download raw message
Patch: +48 -5
From: jturtle <jturtl@pm.me>

Allows the separator byte '_' to be placed between digits.

Co-authored-by: Jonas Fenkter <jonas@fentker.eu>
Co-authored-by: Mallory Adams <malloryadams@fastmail.com>
Signed-off-by: Mallory Adams <malloryadams@fastmail.com>
---
 src/lex.c            | 15 +++++++++++----
 tests/00-literals.ha | 38 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/src/lex.c b/src/lex.c
index c97355f..506f56c 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -339,7 +339,7 @@ lex_number(struct lexer *lexer, struct token *out)
	};
	static_assert((BIN | OCT | HEX | DEC) == DEC, "DEC bits must be a superset of all other bases");
	enum flags {
		FLT = 3, EXP, SUFF, DIG,
		FLT = 3, EXP, SUFF, DIG, SEP,
	};

	static const char chrs[][24] = {
@@ -349,7 +349,7 @@ lex_number(struct lexer *lexer, struct token *out)
		[HEX] = "0123456789abcdefABCDEF",
	};

	static const char matching_states[0x80][6] = {
	static const char matching_states[0x80][7] = {
		['.'] = {DEC, HEX, 0},
		['e'] = {DEC, DEC | 1<<FLT, 0},
		['E'] = {DEC, DEC | 1<<FLT, 0},
@@ -361,13 +361,14 @@ lex_number(struct lexer *lexer, struct token *out)
		['u'] = {BIN, OCT, HEX, DEC, DEC | 1<<EXP, 0},
		['z'] = {BIN, OCT, HEX, DEC, DEC | 1<<EXP, 0},
		['f'] = {DEC, DEC | 1<<FLT, DEC | 1<<EXP, DEC | 1<<FLT | 1<<EXP, 0},
		['_'] = {BIN, OCT, HEX, DEC, DEC | 1<<FLT, HEX | 1<<FLT, 0},
	};
	int state = DEC, base = 10, oldstate = DEC;
	uint32_t c = next(lexer, &out->loc, true), last = 0;
	assert(c != C_EOF && c <= 0x7F && isdigit(c));
	if (c == '0') {
		c = next(lexer, NULL, true);
		if (c <= 0x7F && isdigit(c)) {
		if (c <= 0x7F && (isdigit(c) || c == '_')) {
			error(out->loc, "Leading zero in base 10 literal");
		} else if (c == 'b') {
			state = BIN | 1 << DIG;
@@ -387,9 +388,11 @@ lex_number(struct lexer *lexer, struct token *out)
	size_t exp = 0, suff = 0;
	do {
		if (strchr(chrs[state & MASK], c)) {
			state &= ~(1 << DIG);
			state &= ~(1 << DIG | 1 << SEP);
			last = c;
			continue;
		} else if (state & 1 << SEP) {
			error(out->loc, "Expected digit after separator");
		} else if (c > 0x7f || !strchr(matching_states[c], state)) {
			goto end;
		}
@@ -421,6 +424,10 @@ lex_number(struct lexer *lexer, struct token *out)
			state |= DEC | 1 << SUFF;
			suff = lexer->buflen - 1;
			break;
		case '_':
			consume(lexer, 1);
			state |= 1 << SEP;
			break;
		default:
			goto end;
		}
diff --git a/tests/00-literals.ha b/tests/00-literals.ha
index 04cded8..f1064c8 100644
--- a/tests/00-literals.ha
+++ b/tests/00-literals.ha
@@ -260,7 +260,8 @@ fn numeric() void = {
		0.0e01, 0.0e+01, 0.0e+00, 0.0e-00, 0e-0, 0e-00, 0e-1, 0e-01,
		0x0p0, 0x0p1, 0x0p-1, 0x0p+1,
		0x0.0p0, 0x0.00p0, 0x0.0p1, 0x0.00p1, 0x0.0p+0, 0x0.0p+1, 0x0.0p-0, 0x0.0p00,
		0x0.0p01, 0x0.0p+01, 0x0.0p+00, 0x0.0p-00, 0x0p-0, 0x0p-00, 0x0p-1, 0x0p-01];
		0x0.0p01, 0x0.0p+01, 0x0.0p+00, 0x0.0p-00, 0x0p-0, 0x0p-00, 0x0p-1, 0x0p-01,
		0.00_00];
	for (let j = 0z; j < len(f); j+= 1) {
		assert(f[j] == 0.0);
	};
@@ -295,6 +296,22 @@ fn numeric() void = {
	assert(0x0P0 == 0.0);
	assert(0E0 == 0);

	// separators
	assert(1_000 == 1000);
	assert(1_000_000 == 1000000);
	assert(1_0 == 10);
	assert(0xAB_CD == 0xABCD);
	assert(0b1_0_0_1 == 0b1001);
	assert(0o542_11 == 0o54211);
	assert(1_6e2 == 16e2);
	assert(1_000u32 == 1000u32);
	assert(0x1B_AD_C0_DEu32 == 0x1BADC0DE);
	assert(1_000.0f32 == 1000f32);
	assert(0.00_01 == 0.0001);
	assert(1_00.00_1 == 100.001);
	assert(1_6.0e2 == 16.0e2);
	assert(1_6e-2 == 16e-2);

	// double tuple subscript special case
	let tup = (('a', 'b'), 'c');
	assert(tup.0.0 == 'a');
@@ -330,6 +347,10 @@ fn numeric() void = {
	assert(tup.0.0x0 == 'a');
	assert(tup.0x0.0x0 == 'a');

	// tuple with separator
	let tup = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k');
	assert(tup.1_0 == 'k');

	// zero with large exponent
	assert(0e10000000 == 0);
	assert(0e010000000 == 0);
@@ -397,6 +418,7 @@ fn numeric() void = {
		"05p3", "00000010p3", "00.0p3", "01.0p3",
		"05p+3", "00000010p+3", "00.0p+3", "01.0p+3",
		"05p-3", "00000010p-3", "00.0p-3", "01.0p-3",
		"0_10",

		// invalid sequences of special characters
		"1.",
@@ -479,6 +501,14 @@ fn numeric() void = {
		"1.p-1", "1p-.1",
		"1.1p-", "1p-1.",
		"1p-1.1",

		// invalid digit separators
		"1_", "100_", "1_000_",
		"1__0", "1__000_0", "1_000__0", "1___0",
		"2e_8", "2_e8", "2e8_", "3e1__1", "2e+_5", "2e_+5",
		"0x_FFFF", "0b_1010", "0b1111_0000_", "0o6__6",
		"0_b1010", "0_o77", "0_xFF", "_0b1010", "_0o77", "_0xFF",
		"2e1_6", "0x2p1_0", "2e-1_0",
	];
	let extra: [_]str = [
		"let t = 4e-0i;", "let t = 4e-1i;",
@@ -493,6 +523,12 @@ fn numeric() void = {

		// exponent overflow
		"let t: u64 = 1e1000;",

		"let t = 100u3_2;",
		"let t = 100u32_;",
		"let t = 100u_32;",
		"let t = 100_u32;",
		"let t = _100u32;",
	];
	let suffix = [";", "i;", "i8;", "f32;"];
	let buf: [256]u8 = [0...];
-- 
2.44.0

[harec/patches] build success

builds.sr.ht <builds@sr.ht>
Details
Message ID
<D0K8PS6JUP5D.3MY65EEP4ENMG@fra01>
In-Reply-To
<20240414233754.92305-2-malloryadams@fastmail.com> (view parent)
DKIM signature
missing
Download raw message
harec/patches: SUCCESS in 40s

[lex: allow digit separators][0] v4 from [Mallory Adams][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/51024
[1]: malloryadams@fastmail.com

✓ #1195962 SUCCESS harec/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1195962
✓ #1195959 SUCCESS harec/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1195959
✓ #1195960 SUCCESS harec/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1195960
✓ #1195961 SUCCESS harec/patches/netbsd.yml  https://builds.sr.ht/~sircmpwn/job/1195961
Details
Message ID
<D0KY1P7YW82C.7WIZK68AUIK0@turminal.net>
In-Reply-To
<20240414233754.92305-2-malloryadams@fastmail.com> (view parent)
DKIM signature
pass
Download raw message
Thanks!

To git@git.sr.ht:~sircmpwn/harec
   868b7f0..5d525a5  master -> master
Reply to thread Export thread (mbox)