~sircmpwn/hare-dev

harec: lex: don't hang on big integer exponents v1 PROPOSED

Bor Grošelj Simić: 4
 lex: don't hang on big integer exponents
 lex: add tests for 53f2eb16
 lex: don't ignore invalid literal errors
 lex: error out on leading zeros in base 10 literals

 7 files changed, 53 insertions(+), 21 deletions(-)
#933536 alpine.yml success
#933537 freebsd.yml success
#933538 netbsd.yml success
harec/patches: SUCCESS in 1m21s

[lex: don't hang on big integer exponents][0] from [Bor Grošelj Simić][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/38677
[1]: mailto:bgs@turminal.net

✓ #933536 SUCCESS harec/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/933536
✓ #933538 SUCCESS harec/patches/netbsd.yml  https://builds.sr.ht/~sircmpwn/job/933538
✓ #933537 SUCCESS harec/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/933537
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/38677/mbox | git am -3
Learn more about email & git

[PATCH harec 1/4] lex: don't hang on big integer exponents Export this patch

(10^m) mod (2^n) will always be zero for m >= n, so we can speed up such
cases.

References: https://todo.sr.ht/~sircmpwn/hare/737
Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>
---
 src/lex.c             | 37 +++++++++++++++++++++----------------
 tests/00-constants.ha |  6 +++++-
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/lex.c b/src/lex.c
index e4a56cf..74067b7 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -297,6 +297,19 @@ lex_name(struct lexer *lexer, struct token *out)
	return out->token;
}

static uintmax_t
compute_exp(int exponent)
{
	if (exponent > 64) {
		return 0;
	}
	uintmax_t out = 1;
	for (int i = 0; i < exponent; i++) {
		out *= 10;
	}
	return out;
}

static uint32_t
lex_literal(struct lexer *lexer, struct token *out)
{
@@ -488,16 +501,12 @@ finalize:
	case STORAGE_UINT:
	case STORAGE_U64:
	case STORAGE_SIZE:
		out->uval = strtoumax(lexer->buf, NULL, base);
		for (intmax_t i = 0; i < exponent; i++) {
			out->uval *= 10;
		}
		out->uval = strtoumax(lexer->buf, NULL, base)
			* compute_exp(exponent);
		break;
	case STORAGE_ICONST:
		out->uval = strtoumax(lexer->buf, NULL, base);
		for (intmax_t i = 0; i < exponent; i++) {
			out->uval *= 10;
		}
		out->uval = strtoumax(lexer->buf, NULL, base)
			* compute_exp(exponent);
		if (out->uval > (uintmax_t)INT64_MAX) {
			out->storage = STORAGE_U64;
			break;
@@ -508,19 +517,15 @@ finalize:
	case STORAGE_I32:
	case STORAGE_INT:
	case STORAGE_I64:
		out->uval = strtoumax(lexer->buf, NULL, base);
		for (intmax_t i = 0; i < exponent; i++) {
			out->uval *= 10;
		}
		out->uval = strtoumax(lexer->buf, NULL, base)
			* compute_exp(exponent);
		if (out->uval == (uintmax_t)INT64_MIN) {
			// XXX: Hack
			out->ival = INT64_MIN;
			break;
		}
		out->ival = strtoimax(lexer->buf, NULL, base);
		for (intmax_t i = 0; i < exponent; i++) {
			out->ival *= 10;
		}
		out->ival = strtoimax(lexer->buf, NULL, base)
			* (intmax_t)compute_exp(exponent);
		break;
	case STORAGE_F32:
	case STORAGE_F64:
diff --git a/tests/00-constants.ha b/tests/00-constants.ha
index b7c513f..22b4943 100644
--- a/tests/00-constants.ha
+++ b/tests/00-constants.ha
@@ -137,7 +137,7 @@ fn aggregates() void = {
	u2arr as [3]u8;
};

export fn main() void = {
fn basics() void = {
	let i1 = 13, i2 = 13i, i3 = 13i8, i4 = 13i16, i5 = 13i32, i6 = 13i64;
	let u1 = 13u, u2 = 13z, u3 = 13u8, u4 = 13u16, u5 = 13u32, u6 = 13u64;
	let n1 = -13, n2 = -13u;
@@ -149,9 +149,13 @@ export fn main() void = {
		r15 = '\U12345678';
	let f1 = 1.0, f2 = 1f32, f3 = 1.0e2, f4 = 1.0f64;
	let f5 = 1.23e+45, f6 = 9.87e-65, f7 = 1e-7, f8 = 5.0e-324;
	let ie1 = 1e5i, ie2 = 1e50000000000i;
};

export fn main() void = {
	// The interaction between constants and result type reduction is tested
	// in 30-reduction.c
	basics();
	assignment();
	aggregates();
};
-- 
2.36.4
Hm, shouldn't we error out in this case rather than treating it like e0?
It's just an out-of-range literal.

[PATCH harec 2/4] lex: add tests for 53f2eb16 Export this patch

Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>
---
 tests/00-constants.ha | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/00-constants.ha b/tests/00-constants.ha
index 22b4943..1ba16bb 100644
--- a/tests/00-constants.ha
+++ b/tests/00-constants.ha
@@ -150,6 +150,18 @@ fn basics() void = {
	let f1 = 1.0, f2 = 1f32, f3 = 1.0e2, f4 = 1.0f64;
	let f5 = 1.23e+45, f6 = 9.87e-65, f7 = 1e-7, f8 = 5.0e-324;
	let ie1 = 1e5i, ie2 = 1e50000000000i;

	let failures: [_]str = [
		"let x = 0x;",
		"let x = 0xz;",
		"let x = 0xu;",
		"let x = 0xu64;",
		"let x = 0be+0;",
		"let x = 0bf64;",
	];
	for (let i = 0z; i < len(failures); i += 1) {
		assert(compile(failures[i]) as exited != EXIT_SUCCESS);
	};
};

export fn main() void = {
-- 
2.36.4

[PATCH harec 3/4] lex: don't ignore invalid literal errors Export this patch

The lexer completely ignores T_ERROR tokens, so using them for signaling
errors doesn't have the desired effect.

Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>
---
 src/lex.c             | 4 +---
 tests/00-constants.ha | 5 +++++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/lex.c b/src/lex.c
index 74067b7..0080060 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -418,9 +418,7 @@ lex_literal(struct lexer *lexer, struct token *out)

finalize:
	if (!started) {
		out->token = T_ERROR;
		consume(lexer, -1);
		return out->token;
		error(&out->loc, "Invalid literal");
	}
	lexer->require_int = false;
	out->token = T_LITERAL;
diff --git a/tests/00-constants.ha b/tests/00-constants.ha
index 1ba16bb..5e2bf01 100644
--- a/tests/00-constants.ha
+++ b/tests/00-constants.ha
@@ -158,6 +158,11 @@ fn basics() void = {
		"let x = 0xu64;",
		"let x = 0be+0;",
		"let x = 0bf64;",

		"let x = 0b41;",
		"let x = 0b14;",
		"let x = 0o82;",
		"let x = 0o28;",
	];
	for (let i = 0z; i < len(failures); i += 1) {
		assert(compile(failures[i]) as exited != EXIT_SUCCESS);
-- 
2.36.4
harec/patches: SUCCESS in 1m21s

[lex: don't hang on big integer exponents][0] from [Bor Grošelj Simić][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/38677
[1]: mailto:bgs@turminal.net

✓ #933536 SUCCESS harec/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/933536
✓ #933538 SUCCESS harec/patches/netbsd.yml  https://builds.sr.ht/~sircmpwn/job/933538
✓ #933537 SUCCESS harec/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/933537

[PATCH harec 4/4] lex: error out on leading zeros in base 10 literals Export this patch

Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>
---
 src/lex.c             | 6 +++++-
 tests/00-constants.ha | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/lex.c b/src/lex.c
index 0080060..ca2c791 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -316,7 +316,7 @@ lex_literal(struct lexer *lexer, struct token *out)
	uint32_t c = next(lexer, &out->loc, true);
	assert(c != C_EOF && c <= 0x7F && isdigit(c));

	bool started = false;
	bool started = false, leadingzero = false;
	int base = 10;
	const char *basechrs = "0123456789";
	if (c == '0') {
@@ -338,6 +338,7 @@ lex_literal(struct lexer *lexer, struct token *out)
			break;
		default:
			started = true;
			leadingzero = true;
			push(lexer, c, true);
			break;
		}
@@ -420,6 +421,9 @@ finalize:
	if (!started) {
		error(&out->loc, "Invalid literal");
	}
	if (leadingzero && lexer->buflen >= 2 && strchr(basechrs, lexer->buf[1])) {
		error(&out->loc, "Leading zero in base 10 literal");
	}
	lexer->require_int = false;
	out->token = T_LITERAL;
	if (isfloat) {
diff --git a/tests/00-constants.ha b/tests/00-constants.ha
index 5e2bf01..a29e669 100644
--- a/tests/00-constants.ha
+++ b/tests/00-constants.ha
@@ -163,6 +163,10 @@ fn basics() void = {
		"let x = 0b14;",
		"let x = 0o82;",
		"let x = 0o28;",

		// leading zeros
		"let x = 05;"
		"let x = 0000000010;"
	];
	for (let i = 0z; i < len(failures); i += 1) {
		assert(compile(failures[i]) as exited != EXIT_SUCCESS);
-- 
2.36.4