It looks like v1 of this patch (#39292) was never applied. I have rebased it to master and extended it to the later added function scan_unread. These are the results for Jose's mini benchmark: master branch: time ./newlines-master l.txt 20.01 secs time ./newlines-master l8k.txt 59.58 secs With this patch (v2) applied: time ./newlines-patch l.txt 230.16 millis time ./newlines-patch l8k.txt 238.67 millis Jose Lombera (1): bufio: remove scanner inefficiency bufio/scanner.ha | 147 +++++++++++++++++++++-------------------------- 1 file changed, 67 insertions(+), 80 deletions(-) -- 2.44.0
hare/patches: SUCCESS in 1m5s [bufio: remove scanner inefficiency][0] v2 from [Max Schillinger][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/50792 [1]: mailto:max@mxsr.de ✓ #1188413 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1188413 ✓ #1188414 SUCCESS hare/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1188414 ✓ #1188412 SUCCESS hare/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/1188412
Thank your for his hint. I think it's the C way trying not to call the same [str]len twice. :-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/50792/mbox | git am -3Learn more about email & git
From: Jose Lombera <jose@lombera.dev> The new scanner with an internal read-ahead buffer contains an inefficiency in which the underlying buffer is shifted left every time a token is consumed. Fix this by delaying the shift until an actual read-ahead from the source IO handle is made, and only if the shift is required. Co-authored-by: Max Schillinger <max@mxsr.de> Signed-off-by: Jose Lombera <jose@lombera.dev> Signed-off-by: Max Schillinger <max@mxsr.de>
Lorenz (xha) <me@xha.li>thanks for this patch, it's a huge performance improvement, which is really nice. it took a couple of hours for me to dig through this patch/logic and figure it out; most of this is fine, but there are a couple of nits and one pretty serious bug. can you please send a v3 with this addresseed? you can find a proposed patch from me below.
--- bufio/scanner.ha | 147 +++++++++++++++++++++-------------------------- 1 file changed, 67 insertions(+), 80 deletions(-) diff --git a/bufio/scanner.ha b/bufio/scanner.ha index 1f156050..30f96d07 100644 --- a/bufio/scanner.ha @@ -19,10 +19,10 @@ export type scanner = struct { stream: io::stream, src: io::handle, buffer: []u8, - // Number of bytes available in buffer - pending: size, - // Number of bytes returned to the user - readout: size, + // Index of start of pending bytes in buffer + start: size, + // Sub-slice with pending bytes in buffer + pending: []u8, // User-confirmed maximum size of read buffer maxread: size, }; @@ -44,8 +44,8 @@ export fn newscanner( src = src, buffer = alloc([0...], BUFSZ), maxread = maxread, - pending = 0, - readout = 0, + start = 0, + pending = [], }; }; @@ -59,13 +59,13 @@ export fn newscanner_static(src: io::handle, buffer: []u8) scanner = { src = src, buffer = buffer, maxread = len(buffer), - pending = 0, - readout = 0, + start = 0, + pending = [], }; }; -// Frees resources associated associated with a [[scanner]]. Does not close the -// underlying I/O handle. +// Frees resources associated with a [[scanner]]. Does not close the underlying +// I/O handle. export fn finish(scan: *scanner) void = { free(scan.buffer); }; @@ -73,10 +73,7 @@ export fn finish(scan: *scanner) void = { fn scan_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { let scan = s: *scanner; - // Consume previous read, if any - scan_shift(scan); - - if (scan.pending == 0) { + if (len(scan.pending) == 0) { match (scan_readahead(scan)?) { case io::EOF => return io::EOF; @@ -84,7 +81,7 @@ fn scan_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { }; }; - const n = if (len(buf) > scan.pending) scan.pending else len(buf); + const n = if (len(buf) > len(scan.pending)) len(scan.pending) else len(buf); buf[..n] = scan_consume(scan, n)[..]; return n; }; @@ -95,52 +92,50 @@ fn scan_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { // is updated accordingly. Returns the number of bytes which had been available // prior to the call. fn scan_readahead(scan: *scanner) (size | io::EOF | io::error) = { - if (scan.pending >= len(scan.buffer)) { - let readahead = scan.pending + BUFSZ; - if (readahead > scan.maxread) { - readahead = scan.maxread; - }; - if (scan.pending >= readahead) { - return errors::overflow; + let start = scan.start; + const pending = len(scan.pending); + const buf_len = len(scan.buffer); + + if ((start + pending) >= buf_len) { + if (start > 0) {
Lorenz (xha) <me@xha.li>the "bigger than" part of this check does not make sense, it's logically not possible. this should use "==". also, the extra parentheses are not needed.
+ // Shift buffer to the left to free space at the end + scan.buffer[..buf_len - start] = scan.buffer[start..]; + scan.pending = scan.buffer[..pending]; + start = 0; + scan.start = 0; + } else { + // Buffer is full, expand it + let readahead = pending + BUFSZ; + if (readahead > scan.maxread) { + readahead = scan.maxread; + }; + if (pending >= readahead) { + return errors::overflow; + }; + append(scan.buffer, [0...], readahead); }; - append(scan.buffer, [0...], readahead); }; - const prev = scan.pending; - match (io::read(scan.src, scan.buffer[scan.pending..])?) { + match (io::read(scan.src, scan.buffer[start + pending..])?) { case let z: size => - scan.pending += z; - return prev; + scan.pending = scan.buffer[start..start + pending + z]; + return pending; case io::EOF => return io::EOF; }; }; -// Shifts the buffer towards the start, discarding bytes which were read out. -fn scan_shift(scan: *scanner) void = { - const n = scan.readout; - if (n == 0) { - return; - }; - scan.buffer[..len(scan.buffer) - n] = scan.buffer[n..]; - scan.readout = 0; - scan.pending -= n; -}; - -// Consumes N bytes from the buffer, updating scan.readout. User must call -// [[scan_shift]] before calling scan_consume again. +// Consumes N bytes from the buffer. fn scan_consume(scan: *scanner, n: size) []u8 = { - assert(len(scan.buffer) >= n && scan.readout == 0); - scan.readout = n; - return scan.buffer[..n]; + assert(len(scan.pending) >= n); + scan.start += n; + defer scan.pending = scan.pending[n..]; + return scan.pending[..n]; }; // Reads one byte from a [[scanner]]. export fn scan_byte(scan: *scanner) (u8 | io::EOF | io::error) = { - // Consume previous read, if any - scan_shift(scan); - - if (scan.pending == 0) { + if (len(scan.pending) == 0) { match (scan_readahead(scan)?) { case io::EOF => return io::EOF; @@ -159,26 +154,24 @@ export fn scan_bytes( scan: *scanner, delim: (u8 | []u8), ) ([]u8 | io::EOF | io::error) = { - scan_shift(scan); - - let i = 0z, nread = 0z; + let i = 0z; for (true) { - match (bytes::index(scan.buffer[nread..scan.pending], delim)) { + match (bytes::index(scan.pending[i..], delim)) { case let ix: size => - i = ix; + i += ix; break; case void => void; }; match (scan_readahead(scan)?) { case io::EOF => - if (scan.pending == 0) { + if (len(scan.pending) == 0) { return io::EOF; }; - return scan_consume(scan, scan.pending); + return scan_consume(scan, len(scan.pending)); case let prevpending: size => // No need to re-index the earlier part of the buffer - nread = prevpending; + i = prevpending; }; }; @@ -188,36 +181,27 @@ export fn scan_bytes( case let u: []u8 => yield len(u); }; - const nuser = nread + i, nconsume = nuser + ndelim; - return scan_consume(scan, nconsume)[..nuser]; + const nconsume = i + ndelim; + return scan_consume(scan, nconsume)[..i]; }; // Reads one rune from a [[scanner]]. export fn scan_rune( scan: *scanner, ) (rune | io::EOF | io::error | utf8::invalid) = { - // Consume previous read, if any - scan_shift(scan); - - if (scan.pending == 0) { + if (len(scan.pending) < 4) { match (scan_readahead(scan)?) { case io::EOF => - if (scan.pending == 0) { + if (len(scan.pending) == 0) { return io::EOF; }; case size => void; }; }; - const sz = utf8::utf8sz(scan.buffer[0])?; - - for (scan.pending < sz) { - match (scan_readahead(scan)?) { - case io::EOF => - return utf8::invalid; - case size => void; - }; + const sz = utf8::utf8sz(scan.pending[0])?; + if (len(scan.pending) < sz) { + return utf8::invalid; }; - const buf = scan_consume(scan, sz); const dec = utf8::decode(buf[..sz]); match (utf8::next(&dec)?) { @@ -259,25 +243,28 @@ export fn scan_line( // Returns the internal scanner buffer, which contains all bytes read ahead by // the scanner up to this point. export fn scan_buffer(scan: *scanner) []u8 = { - scan_shift(scan); - return scan.buffer[..scan.pending]; + return scan.pending[..]; }; fn scan_unread(scan: *scanner, buf: []u8) void = { if (len(buf) == 0) { return; }; - if (len(buf) <= scan.readout) { - scan.buffer[scan.readout - len(buf)..scan.readout] = buf; - scan.readout -= len(buf); + if (len(buf) <= scan.start) { + const pending_end = scan.start + len(scan.pending); + scan.buffer[scan.start - len(buf)..scan.start] = buf; + scan.start -= len(buf); + scan.pending = scan.buffer[scan.start..pending_end]; } else { - const n = len(buf) - scan.readout; - assert(n < scan.maxread - scan.pending, + const n = len(buf) - scan.start; + assert(n < scan.maxread - len(scan.pending), "Attempted to unread more data than buffer has available"); + // Shift buffer to the right to free space at the beginning scan.buffer[n..] = scan.buffer[..len(scan.buffer) - n]; - scan.pending += n; scan.buffer[..len(buf)] = buf; - scan.readout = 0; + const pending_end = len(scan.pending) + len(buf); + scan.pending = scan.buffer[..pending_end]; + scan.start = 0; }; }; -- 2.44.0
builds.sr.ht <builds@sr.ht>hare/patches: SUCCESS in 1m5s [bufio: remove scanner inefficiency][0] v2 from [Max Schillinger][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/50792 [1]: mailto:max@mxsr.de ✓ #1188413 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1188413 ✓ #1188414 SUCCESS hare/patches/openbsd.yml https://builds.sr.ht/~sircmpwn/job/1188414 ✓ #1188412 SUCCESS hare/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/1188412
On Sat, Apr 06, 2024 at 08:12:35AM +0200, Max Schillinger wrote: > From: Jose Lombera <jose@lombera.dev> > > The new scanner with an internal read-ahead buffer contains an > inefficiency in which the underlying buffer is shifted left every time a > token is consumed. > > Fix this by delaying the shift until an actual read-ahead from the > source IO handle is made, and only if the shift is required. > > Co-authored-by: Max Schillinger <max@mxsr.de> > Signed-off-by: Jose Lombera <jose@lombera.dev> > Signed-off-by: Max Schillinger <max@mxsr.de> thanks for this patch, it's a huge performance improvement, which is really nice. it took a couple of hours for me to dig through this patch/logic and figure it out; most of this is fine, but there are a couple of nits and one pretty serious bug. can you please send a v3 with this addresseed? you can find a proposed patch from me below. > --- > bufio/scanner.ha | 147 +++++++++++++++++++++-------------------------- > 1 file changed, 67 insertions(+), 80 deletions(-) > > diff --git a/bufio/scanner.ha b/bufio/scanner.ha > index 1f156050..30f96d07 100644 > --- a/bufio/scanner.ha > +++ b/bufio/scanner.ha > @@ -95,52 +92,50 @@ fn scan_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { > // is updated accordingly. Returns the number of bytes which had been available > // prior to the call. > fn scan_readahead(scan: *scanner) (size | io::EOF | io::error) = { > - if (scan.pending >= len(scan.buffer)) { > - let readahead = scan.pending + BUFSZ; > - if (readahead > scan.maxread) { > - readahead = scan.maxread; > - }; > - if (scan.pending >= readahead) { > - return errors::overflow; > + let start = scan.start; > + const pending = len(scan.pending); > + const buf_len = len(scan.buffer); i would remove "buf_len"; it's confusing because there is (1) a variable "buf" in this scope and (2) obscures code, len(scan.buffer) is much more readable in the two places it's used anyways. > + > + if ((start + pending) >= buf_len) { the "bigger than" part of this check does not make sense, it's logically not possible. this should use "==". also, the extra parentheses are not needed. > + if (start > 0) { > + // Shift buffer to the left to free space at the end > + scan.buffer[..buf_len - start] = scan.buffer[start..]; > + scan.pending = scan.buffer[..pending]; > + start = 0; > + scan.start = 0; > + } else { > + // Buffer is full, expand it > + let readahead = pending + BUFSZ; > + if (readahead > scan.maxread) { > + readahead = scan.maxread; > + }; > + if (pending >= readahead) { > + return errors::overflow; > + }; > + append(scan.buffer, [0...], readahead); > }; > - append(scan.buffer, [0...], readahead); > }; > > - const prev = scan.pending; > - match (io::read(scan.src, scan.buffer[scan.pending..])?) { > + match (io::read(scan.src, scan.buffer[start + pending..])?) { > case let z: size => > - scan.pending += z; > - return prev; > + scan.pending = scan.buffer[start..start + pending + z]; > + return pending; > case io::EOF => > return io::EOF; > }; > }; > > -// Shifts the buffer towards the start, discarding bytes which were read out. > -fn scan_shift(scan: *scanner) void = { > - const n = scan.readout; > - if (n == 0) { > - return; > - }; > - scan.buffer[..len(scan.buffer) - n] = scan.buffer[n..]; > - scan.readout = 0; > - scan.pending -= n; > -}; > - > -// Consumes N bytes from the buffer, updating scan.readout. User must call > -// [[scan_shift]] before calling scan_consume again. > +// Consumes N bytes from the buffer. > fn scan_consume(scan: *scanner, n: size) []u8 = { > - assert(len(scan.buffer) >= n && scan.readout == 0); > - scan.readout = n; > - return scan.buffer[..n]; > + assert(len(scan.pending) >= n); > + scan.start += n; > + defer scan.pending = scan.pending[n..]; > + return scan.pending[..n]; > }; > > // Reads one byte from a [[scanner]]. > export fn scan_byte(scan: *scanner) (u8 | io::EOF | io::error) = { > - // Consume previous read, if any > - scan_shift(scan); > - > - if (scan.pending == 0) { > + if (len(scan.pending) == 0) { > match (scan_readahead(scan)?) { > case io::EOF => > return io::EOF; > @@ -259,25 +243,28 @@ export fn scan_line( > // Returns the internal scanner buffer, which contains all bytes read ahead by > // the scanner up to this point. > export fn scan_buffer(scan: *scanner) []u8 = { > - scan_shift(scan); > - return scan.buffer[..scan.pending]; > + return scan.pending[..]; > }; > > fn scan_unread(scan: *scanner, buf: []u8) void = { > if (len(buf) == 0) { > return; > }; > - if (len(buf) <= scan.readout) { > - scan.buffer[scan.readout - len(buf)..scan.readout] = buf; > - scan.readout -= len(buf); > + if (len(buf) <= scan.start) { > + const pending_end = scan.start + len(scan.pending); > + scan.buffer[scan.start - len(buf)..scan.start] = buf; > + scan.start -= len(buf); > + scan.pending = scan.buffer[scan.start..pending_end]; > } else { > - const n = len(buf) - scan.readout; > - assert(n < scan.maxread - scan.pending, > + const n = len(buf) - scan.start; > + assert(n < scan.maxread - len(scan.pending), > "Attempted to unread more data than buffer has available"); > + // Shift buffer to the right to free space at the beginning > scan.buffer[n..] = scan.buffer[..len(scan.buffer) - n]; > - scan.pending += n; > scan.buffer[..len(buf)] = buf; > - scan.readout = 0; > + const pending_end = len(scan.pending) + len(buf); > + scan.pending = scan.buffer[..pending_end]; > + scan.start = 0; the logic in this else branch is completely broken; here's an example:
Yesterday I forgot to say sorry for this bug and thank you for your fix!
suppose we have a buf where len(buf) is 9. now, scan.start is 5, scan.maxread is 15 and len(scan.pending) is 10. the first thing that we are doing is: const n = len(buf) - scan.start; now n is 4. let's see what the assert is doing: assert(n < scan.maxread - len(scan.pending), "Attempted to unread more data than buffer has available"); the calculation is: `4 < 15 - 10` or `4 < 5`. NOW: this passes the assert even though len(buf) is 9 and we only have 5 spare bytes that we can unread; this will cause an overwrite of the existing data further down below in the code! suppose len(buf) is 5, scan.start is 0, scan.maxread is 15 and len(scan.pending) is 10. this should theoretically pass the assert, right? no, because: const n = len(buf) - scan.start; here n is 5. assert(n < scan.maxread - len(scan.pending), "Attempted to unread more data than buffer has available"); the calculation is: `5 < 15 - 10`, or `5 < 5` NOW: this will cause the assertion to fail, even though the inputs to the function are logically correct and should not cause it to fail. apart from these issues, another issue here is that this code assumes that len(scan.buffer) == scan.maxread, which is wrong and will also cause an out-of-bounds write below in some cases (in almost all of them with the default value of scan.maxread). > }; > }; > > -- > 2.44.0 > diff --git a/bufio/scanner.ha b/bufio/scanner.ha index ff63784b..8b8b6b68 100644 --- a/bufio/scanner.ha @@ -94,12 +94,11 @@ fn scan_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { fn scan_readahead(scan: *scanner) (size | io::EOF | io::error) = { let start = scan.start; const pending = len(scan.pending); - const buf_len = len(scan.buffer); - if ((start + pending) >= buf_len) { + if (start + pending == len(scan.buffer)) { if (start > 0) { // Shift buffer to the left to free space at the end - scan.buffer[..buf_len - start] = scan.buffer[start..]; + scan.buffer[..len(scan.buffer) - start] = scan.buffer[start..]; scan.pending = scan.buffer[..pending]; start = 0; scan.start = 0; @@ -256,14 +255,13 @@ fn scan_unread(scan: *scanner, buf: []u8) void = { scan.start -= len(buf); scan.pending = scan.buffer[scan.start..pending_end]; } else { - const n = len(buf) - scan.start; - assert(n < scan.maxread - len(scan.pending), + assert(len(buf) <= len(scan.buffer) - len(scan.pending), "Attempted to unread more data than buffer has available"); // Shift buffer to the right to free space at the beginning - scan.buffer[n..] = scan.buffer[..len(scan.buffer) - n]; + scan.buffer[len(buf)..len(buf) + len(scan.pending)] = + scan.buffer[scan.start..scan.start + len(scan.pending)]; scan.buffer[..len(buf)] = buf; - const pending_end = len(scan.pending) + len(buf); - scan.pending = scan.buffer[..pending_end]; + scan.pending = scan.buffer[..len(scan.pending) + len(buf)]; scan.start = 0; }; };
Thank your for his hint. I think it's the C way trying not to call the same [str]len twice. :-)