Bor Grošelj Simić: 4 lex: don't hang on big integer exponents lex: add tests for 53f2eb16 lex: don't ignore invalid literal errors lex: error out on leading zeros in base 10 literals 7 files changed, 53 insertions(+), 21 deletions(-)
harec/patches: SUCCESS in 1m21s [lex: don't hang on big integer exponents][0] from [Bor Grošelj Simić][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/38677 [1]: mailto:bgs@turminal.net ✓ #933536 SUCCESS harec/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/933536 ✓ #933538 SUCCESS harec/patches/netbsd.yml https://builds.sr.ht/~sircmpwn/job/933538 ✓ #933537 SUCCESS harec/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/933537
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/38677/mbox | git am -3Learn more about email & git
(10^m) mod (2^n) will always be zero for m >= n, so we can speed up such cases. References: https://todo.sr.ht/~sircmpwn/hare/737 Signed-off-by: Bor Grošelj Simić <bgs@turminal.net> --- src/lex.c | 37 +++++++++++++++++++++---------------- tests/00-constants.ha | 6 +++++- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/lex.c b/src/lex.c index e4a56cf..74067b7 100644 --- a/src/lex.c +++ b/src/lex.c @@ -297,6 +297,19 @@ lex_name(struct lexer *lexer, struct token *out) return out->token; } +static uintmax_t +compute_exp(int exponent) +{ + if (exponent > 64) { + return 0; + } + uintmax_t out = 1; + for (int i = 0; i < exponent; i++) { + out *= 10; + } + return out; +} + static uint32_t lex_literal(struct lexer *lexer, struct token *out) { @@ -488,16 +501,12 @@ finalize: case STORAGE_UINT: case STORAGE_U64: case STORAGE_SIZE: - out->uval = strtoumax(lexer->buf, NULL, base); - for (intmax_t i = 0; i < exponent; i++) { - out->uval *= 10; - } + out->uval = strtoumax(lexer->buf, NULL, base) + * compute_exp(exponent); break; case STORAGE_ICONST: - out->uval = strtoumax(lexer->buf, NULL, base); - for (intmax_t i = 0; i < exponent; i++) { - out->uval *= 10; - } + out->uval = strtoumax(lexer->buf, NULL, base) + * compute_exp(exponent); if (out->uval > (uintmax_t)INT64_MAX) { out->storage = STORAGE_U64; break; @@ -508,19 +517,15 @@ finalize: case STORAGE_I32: case STORAGE_INT: case STORAGE_I64: - out->uval = strtoumax(lexer->buf, NULL, base); - for (intmax_t i = 0; i < exponent; i++) { - out->uval *= 10; - } + out->uval = strtoumax(lexer->buf, NULL, base) + * compute_exp(exponent); if (out->uval == (uintmax_t)INT64_MIN) { // XXX: Hack out->ival = INT64_MIN; break; } - out->ival = strtoimax(lexer->buf, NULL, base); - for (intmax_t i = 0; i < exponent; i++) { - out->ival *= 10; - } + out->ival = strtoimax(lexer->buf, NULL, base) + * (intmax_t)compute_exp(exponent); break; case STORAGE_F32: case STORAGE_F64: diff --git a/tests/00-constants.ha b/tests/00-constants.ha index b7c513f..22b4943 100644 --- a/tests/00-constants.ha +++ b/tests/00-constants.ha @@ -137,7 +137,7 @@ fn aggregates() void = { u2arr as [3]u8; }; -export fn main() void = { +fn basics() void = { let i1 = 13, i2 = 13i, i3 = 13i8, i4 = 13i16, i5 = 13i32, i6 = 13i64; let u1 = 13u, u2 = 13z, u3 = 13u8, u4 = 13u16, u5 = 13u32, u6 = 13u64; let n1 = -13, n2 = -13u; @@ -149,9 +149,13 @@ export fn main() void = { r15 = '\U12345678'; let f1 = 1.0, f2 = 1f32, f3 = 1.0e2, f4 = 1.0f64; let f5 = 1.23e+45, f6 = 9.87e-65, f7 = 1e-7, f8 = 5.0e-324; + let ie1 = 1e5i, ie2 = 1e50000000000i; +}; +export fn main() void = { // The interaction between constants and result type reduction is tested // in 30-reduction.c + basics(); assignment(); aggregates(); }; -- 2.36.4
Hm, shouldn't we error out in this case rather than treating it like e0? It's just an out-of-range literal.
Signed-off-by: Bor Grošelj Simić <bgs@turminal.net> --- tests/00-constants.ha | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/00-constants.ha b/tests/00-constants.ha index 22b4943..1ba16bb 100644 --- a/tests/00-constants.ha +++ b/tests/00-constants.ha @@ -150,6 +150,18 @@ fn basics() void = { let f1 = 1.0, f2 = 1f32, f3 = 1.0e2, f4 = 1.0f64; let f5 = 1.23e+45, f6 = 9.87e-65, f7 = 1e-7, f8 = 5.0e-324; let ie1 = 1e5i, ie2 = 1e50000000000i; + + let failures: [_]str = [ + "let x = 0x;", + "let x = 0xz;", + "let x = 0xu;", + "let x = 0xu64;", + "let x = 0be+0;", + "let x = 0bf64;", + ]; + for (let i = 0z; i < len(failures); i += 1) { + assert(compile(failures[i]) as exited != EXIT_SUCCESS); + }; }; export fn main() void = { -- 2.36.4
The lexer completely ignores T_ERROR tokens, so using them for signaling errors doesn't have the desired effect. Signed-off-by: Bor Grošelj Simić <bgs@turminal.net> --- src/lex.c | 4 +--- tests/00-constants.ha | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lex.c b/src/lex.c index 74067b7..0080060 100644 --- a/src/lex.c +++ b/src/lex.c @@ -418,9 +418,7 @@ lex_literal(struct lexer *lexer, struct token *out) finalize: if (!started) { - out->token = T_ERROR; - consume(lexer, -1); - return out->token; + error(&out->loc, "Invalid literal"); } lexer->require_int = false; out->token = T_LITERAL; diff --git a/tests/00-constants.ha b/tests/00-constants.ha index 1ba16bb..5e2bf01 100644 --- a/tests/00-constants.ha +++ b/tests/00-constants.ha @@ -158,6 +158,11 @@ fn basics() void = { "let x = 0xu64;", "let x = 0be+0;", "let x = 0bf64;", + + "let x = 0b41;", + "let x = 0b14;", + "let x = 0o82;", + "let x = 0o28;", ]; for (let i = 0z; i < len(failures); i += 1) { assert(compile(failures[i]) as exited != EXIT_SUCCESS); -- 2.36.4
builds.sr.ht <builds@sr.ht>harec/patches: SUCCESS in 1m21s [lex: don't hang on big integer exponents][0] from [Bor Grošelj Simić][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/38677 [1]: mailto:bgs@turminal.net ✓ #933536 SUCCESS harec/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/933536 ✓ #933538 SUCCESS harec/patches/netbsd.yml https://builds.sr.ht/~sircmpwn/job/933538 ✓ #933537 SUCCESS harec/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/933537
Signed-off-by: Bor Grošelj Simić <bgs@turminal.net> --- src/lex.c | 6 +++++- tests/00-constants.ha | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/lex.c b/src/lex.c index 0080060..ca2c791 100644 --- a/src/lex.c +++ b/src/lex.c @@ -316,7 +316,7 @@ lex_literal(struct lexer *lexer, struct token *out) uint32_t c = next(lexer, &out->loc, true); assert(c != C_EOF && c <= 0x7F && isdigit(c)); - bool started = false; + bool started = false, leadingzero = false; int base = 10; const char *basechrs = "0123456789"; if (c == '0') { @@ -338,6 +338,7 @@ lex_literal(struct lexer *lexer, struct token *out) break; default: started = true; + leadingzero = true; push(lexer, c, true); break; } @@ -420,6 +421,9 @@ finalize: if (!started) { error(&out->loc, "Invalid literal"); } + if (leadingzero && lexer->buflen >= 2 && strchr(basechrs, lexer->buf[1])) { + error(&out->loc, "Leading zero in base 10 literal"); + } lexer->require_int = false; out->token = T_LITERAL; if (isfloat) { diff --git a/tests/00-constants.ha b/tests/00-constants.ha index 5e2bf01..a29e669 100644 --- a/tests/00-constants.ha +++ b/tests/00-constants.ha @@ -163,6 +163,10 @@ fn basics() void = { "let x = 0b14;", "let x = 0o82;", "let x = 0o28;", + + // leading zeros + "let x = 05;" + "let x = 0000000010;" ]; for (let i = 0z; i < len(failures); i += 1) { assert(compile(failures[i]) as exited != EXIT_SUCCESS); -- 2.36.4