Armin Preiml: 3 bufio::scanner: implement io::reader bufio::scanner: support unread bufio::scanner::scan_byte: do not defer scan_shift 6 files changed, 120 insertions(+), 6 deletions(-)
hare/patches: SUCCESS in 1m43s [bufio::scanner: implement io::reader][0] from [Armin Preiml][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/44904 [1]: mailto:apreiml@strohwolke.at ✓ #1060447 SUCCESS hare/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/1060447 ✓ #1060448 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1060448
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/44904/mbox | git am -3Learn more about email & git
--- The reader will make it easier to implement unread. Also a reader for scanner doesn't hurt I guess. bufio/scanner.ha | 27 +++++++++++++++++++++++++++ bufio/scanner_test+test.ha | 12 ++++++++++++ 2 files changed, 39 insertions(+) diff --git a/bufio/scanner.ha b/bufio/scanner.ha index 31d4e188..8a73f8b6 100644 --- a/bufio/scanner.ha @@ -11,7 +11,13 @@ use types; def BUFSZ: size = 4096; +const scanner_vtable = io::vtable { + reader = &scan_read, + ... +}; + export type scanner = struct { + stream: io::stream, src: io::handle, buffer: []u8, // Number of bytes available in buffer @@ -31,6 +37,7 @@ export type scanner = struct { // reached. export fn newscanner(src: io::handle, maxread: size) scanner = { return scanner { + stream = &scanner_vtable, src = src, buffer = alloc([0...], BUFSZ), maxread = maxread, @@ -45,6 +52,7 @@ export fn newscanner(src: io::handle, maxread: size) scanner = { // they wish to free the underlying buffer through bufio. export fn newscanner_static(src: io::handle, buffer: []u8) scanner = { return scanner { + stream = &scanner_vtable, src = src, buffer = buffer, maxread = len(buffer), @@ -59,6 +67,25 @@ export fn finish(scan: *scanner) void = { free(scan.buffer); }; +fn scan_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + let scan = s: *scanner; + if (scan.pending == 0) { + match (scan_readahead(scan)?) { + case io::EOF => + return io::EOF; + case size => + yield; + }; + }; + + // Consume previous read, if any + scan_shift(scan); + + const n = if (len(buf) > scan.pending) scan.pending else len(buf); + buf[..n] = scan_consume(scan, n)[..]; + return n; +}; + // Fills up the scanner buffer with data from the underlying I/O handle. If no // space remains in the read buffer, it is expanded by BUFSZ (up to maxread). // Then, one read from the underlying I/O handle is performed and scan.pending diff --git a/bufio/scanner_test+test.ha b/bufio/scanner_test+test.ha index 5cdb380c..d23d56fc 100644 --- a/bufio/scanner_test+test.ha @@ -111,3 +111,15 @@ use strings; }; }; }; + +@test fn scan_read() void = { + const expected: [_]u8 = [ + 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, + ]; + let in = memio::fixed(expected); + + let scanner = newscanner(&in, 2); + let result = io::drain(&scanner)!; + defer free(result); + assert(bytes::equal(expected, result)); +}; -- 2.42.0
--- bufio/scanner.ha | 41 ++++++++++++++++++++++++++++++++++++++ bufio/scanner_test+test.ha | 30 ++++++++++++++++++++++++++++ bufio/stream.ha | 13 +++++++++--- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/bufio/scanner.ha b/bufio/scanner.ha index 8a73f8b6..92db972c 100644 --- a/bufio/scanner.ha @@ -271,6 +271,47 @@ export fn scan_buffer(scan: *scanner) []u8 = { return scan.buffer[..scan.pending]; }; +fn scan_unread(scan: *scanner, buf: []u8) void = { + if (len(buf) == 0) { + return; + }; + assert(len(buf) <= scan.readout); + + match (slice_borrowed(scan.buffer, buf)) { + case let r: (size, size) => + // Usually buf is borrowed from the scan.buffer. In that case + // we need to move the buffer forward to the end of readout. + for (let i = 0z; i < len(buf); i += 1) { + scan.buffer[scan.readout - 1 - i] = scan.buffer[r.1 - i]; + }; + case void => + const start = scan.readout - len(buf); + scan.buffer[start..scan.readout] = buf[..]; + }; + + scan.readout -= len(buf); +}; + +// Checks whether 'slice' is borrowed from 'src' and returns a tuple of start +// and end index. Or void if it's not borrowed. Asserts if slice is overlapping. +fn slice_borrowed(src: []u8, slice: []u8) ((size , size) | void) = { + const srcstart = &src[0]: uintptr; + const srcend = &src[len(src) - 1]: uintptr; + + const bufstart = &slice[0]: uintptr; + const bufend = &slice[len(slice) - 1]: uintptr; + + if (bufstart < srcstart || bufstart > srcend) { + assert(bufstart > srcend || bufend < srcstart); + return; + }; + + assert(bufstart >= srcstart && bufend <= srcend); + const start = (bufstart - srcstart) / size(u8); + const end = (bufend - srcstart) / size(u8); + return (start, end); +}; + // Reads a single byte from an [[io::handle]]. export fn read_byte(file: io::handle) (u8 | io::EOF | io::error) = { let buf: [1]u8 = [0...]; diff --git a/bufio/scanner_test+test.ha b/bufio/scanner_test+test.ha index d23d56fc..51178896 100644 --- a/bufio/scanner_test+test.ha @@ -119,7 +119,37 @@ use strings; let in = memio::fixed(expected); let scanner = newscanner(&in, 2); + defer finish(&scanner); let result = io::drain(&scanner)!; defer free(result); assert(bytes::equal(expected, result)); }; + +@test fn scan_unread() void = { + const expected: str = " I will not repeat \nDone!\n"; + let in = memio::fixed(strings::toutf8(expected)); + + let scanner = newscanner(&in, 32); + defer finish(&scanner); + let l = scan_line(&scanner)! as const str; + assert(l == " I will not repeat "); + + unread(&scanner, strings::toutf8("\n")); + unread(&scanner, strings::toutf8(l)); + let l = scan_line(&scanner)! as const str; + assert(l == " I will not repeat "); + + unread(&scanner, strings::toutf8("\n")); + unread(&scanner, strings::toutf8(strings::trim(l))); + let l = scan_line(&scanner)! as const str; + assert(l == "I will not repeat"); + + unread(&scanner, strings::toutf8("See?\n")); + let l = scan_line(&scanner)! as const str; + assert(l == "See?"); + + let l = scan_line(&scanner)! as const str; + assert(l == "Done!"); + + assert(scan_line(&scanner) is io::EOF); +}; diff --git a/bufio/stream.ha b/bufio/stream.ha index e787429d..f6e89d89 100644 --- a/bufio/stream.ha @@ -128,15 +128,22 @@ export fn setflush(s: io::handle, b: []u8) void = { // buffered stream. Attempting to unread more data than can fit into the read // buffer will abort the program. export fn unread(s: io::handle, buf: []u8) void = { - let s = match (s) { + match (s) { case let st: *io::stream => - if (st.reader != &read) { + switch (st.reader) { + case &read => + stream_unread(s: *stream, buf); + case &scan_read => + scan_unread(s: *scanner, buf); + case => abort("Attempted unread on unbuffered stream"); }; - yield st: *stream; case => abort("Attempted unread on unbuffered stream"); }; +}; + +fn stream_unread(s: *stream, buf: []u8) void = { assert(s.rpos >= len(buf), "Attempted to unread more data than buffer has available"); s.rbuffer[s.rpos - len(buf)..s.rpos] = buf; -- 2.42.0
Otherwise we won't have space for unread. --- bufio/scanner.ha | 3 --- 1 file changed, 3 deletions(-) diff --git a/bufio/scanner.ha b/bufio/scanner.ha index 92db972c..8976c8b4 100644 --- a/bufio/scanner.ha @@ -145,9 +145,6 @@ export fn scan_byte(scan: *scanner) (u8 | io::EOF | io::error) = { // Consume previous read, if any scan_shift(scan); - // Consume this read right away - defer scan_shift(scan); - return scan_consume(scan, 1)[0]; }; -- 2.42.0
builds.sr.ht <builds@sr.ht>hare/patches: SUCCESS in 1m43s [bufio::scanner: implement io::reader][0] from [Armin Preiml][1] [0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/44904 [1]: mailto:apreiml@strohwolke.at ✓ #1060447 SUCCESS hare/patches/alpine.yml https://builds.sr.ht/~sircmpwn/job/1060447 ✓ #1060448 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1060448