I think there are some memory leaks which you need to address, like unfreed fmt::asprintf. You could use a static buffer with 5 bytes ('"x', runes can be 4 bytes) and fmt::bsprintf instead. I don't see tests or clearly defined behaviour for the (un)escaping of balckslashes, or when (delim == '"'). I didn't want to write the code myself on assumptions, but perhaps you can add something like this to newreader & newwriter: delim = if (delim == '"') abort("format::csv: Invalid delimiter") else delim, Anyways, nice to see the Hare ecosystem grow!
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~blainsmith/public-inbox/patches/38142/mbox | git am -3Learn more about email & git
--- Tab indents and 80-ish width folding. See https://harelang.org/style See https://harelang.org/editors format/csv/reader.ha | 222 ++++++++++++++++++++++--------------------- format/csv/writer.ha | 116 +++++++++++----------- 2 files changed, 172 insertions(+), 166 deletions(-) diff --git a/format/csv/reader.ha b/format/csv/reader.ha index 0a601bf..ed0d8cd 100644 --- a/format/csv/reader.ha +++ b/format/csv/reader.ha @@ -9,125 +9,131 @@ use encoding::utf8; // reading records from the handle. export type reader = struct { in: io::handle, - delim: rune, + delim: rune, }; // Create a new [[reader]] with a comma as the default delim. export fn newreader(in: io::handle) reader = reader { - in = in, - delim = ',', + in = in, + delim = ',', }; // Read reads one record (a slice of strings) from [[reader]]. export fn read(r: *reader) ([]str | io::EOF | io::error) = { - let line = match (bufio::scanline(r.in)?) { - case let b: []u8 => - yield strings::fromutf8_unsafe(bytes::trim(b, ' ')); - case io::EOF => - return io::EOF; - }; - - if (len(line) == 0) { - return read(r); - }; - - let fieldend = fmt::asprintf("\"{}", r.delim); - - let record: []str = []; - for (true) { - if (len(line) == 0 || !strings::hasprefix(line, '"')) { - match(strings::index(line, r.delim)) { - case let i: size => - append(record, strings::sub(line, 0, i)); - line = strings::sub(line, i + 1, strings::end); - continue; - case void => - append(record, line); - break; - }; - } else { - line = strings::sub(line, 1, strings::end); - match(strings::index(line, fieldend)) { - case let i: size => - append(record, strings::replace(strings::sub(line, 0, i), `""`, `"`)); - line = strings::sub(line, i + 2, strings::end); - case void => - match(strings::index(line, '"')) { - case let i: size => - append(record, strings::replace(strings::sub(line, 0, i), `""`, `"`)); - break; - case void => - append(record, strings::sub(line, 0, len(line) - 1)); - break; - }; - }; - }; - }; - - return record; + let line = match (bufio::scanline(r.in)?) { + case let b: []u8 => + yield strings::fromutf8_unsafe(bytes::trim(b, ' ')); + case io::EOF => + return io::EOF; + }; + + if (len(line) == 0) { + return read(r); + }; + + let fieldend = fmt::asprintf("\"{}", r.delim); + + let record: []str = []; + for (true) { + if (len(line) == 0 || !strings::hasprefix(line, '"')) { + match(strings::index(line, r.delim)) { + case let i: size => + append(record, strings::sub(line, 0, i)); + line = strings::sub(line, i + 1, strings::end); + continue; + case void => + append(record, line); + break; + }; + } else { + line = strings::sub(line, 1, strings::end); + match(strings::index(line, fieldend)) { + case let i: size => + append(record, strings::replace( + strings::sub(line, 0, i), + `""`, `"`, + )); + line = strings::sub(line, i + 2, strings::end); + case void => + match(strings::index(line, '"')) { + case let i: size => + append(record, strings::replace( + strings::sub(line, 0, i), + `""`, `"`, + )); + break; + case void => + append(record, strings::sub( + line, 0, len(line) - 1, + )); + break; + }; + }; + }; + }; + + return record; }; @test fn read() void = { - let csv = strings::toutf8(` - col1,col2,col3 - "1,1",12,13 - 21,"2""2",23 - 31,32,"3,,,,3" - `); - let buf = bufio::fixed(csv, io::mode::READ); - - let r = newreader(&buf); - - let record = read(&r)! as []str; - assert(record[0] == "col1"); - assert(record[1] == "col2"); - assert(record[2] == "col3"); - - let record = read(&r)! as []str; - assert(record[0] == "1,1"); - assert(record[1] == "12"); - assert(record[2] == "13"); - - let record = read(&r)! as []str; - assert(record[0] == "21"); - assert(record[1] == `2"2`); - assert(record[2] == "23"); - - let record = read(&r)! as []str; - assert(record[0] == "31"); - assert(record[1] == "32"); - assert(record[2] == "3,,,,3"); + let csv = strings::toutf8(`col1,col2,col3 +"1,1",12,13 +21,"2""2",23 +31,32,"3,,,,3" +`); + let buf = bufio::fixed(csv, io::mode::READ); + + let r = newreader(&buf); + + let record = read(&r)! as []str; + assert(record[0] == "col1"); + assert(record[1] == "col2"); + assert(record[2] == "col3"); + + let record = read(&r)! as []str; + assert(record[0] == "1,1"); + assert(record[1] == "12"); + assert(record[2] == "13"); + + let record = read(&r)! as []str; + assert(record[0] == "21"); + assert(record[1] == `2"2`); + assert(record[2] == "23"); + + let record = read(&r)! as []str; + assert(record[0] == "31"); + assert(record[1] == "32"); + assert(record[2] == "3,,,,3"); }; @test fn read_delim() void = { - let csv = strings::toutf8(` - col1|col2|col3 - 1,1|12|13 - 21|"2""2"|23 - 31|32|3,,,,3 - `); - let buf = bufio::fixed(csv, io::mode::READ); - - let r = newreader(&buf); - r.delim = '|'; - - let record = read(&r)! as []str; - assert(record[0] == "col1"); - assert(record[1] == "col2"); - assert(record[2] == "col3"); - - let record = read(&r)! as []str; - assert(record[0] == "1,1"); - assert(record[1] == "12"); - assert(record[2] == "13"); - - let record = read(&r)! as []str; - assert(record[0] == "21"); - assert(record[1] == `2"2`); - assert(record[2] == "23"); - - let record = read(&r)! as []str; - assert(record[0] == "31"); - assert(record[1] == "32"); - assert(record[2] == "3,,,,3"); -}; \ No newline at end of file + let csv = strings::toutf8(`col1|col2|col3 +1,1|12|13 +21|"2""2"|23 +31|32|3,,,,3 +`); + let buf = bufio::fixed(csv, io::mode::READ); + + let r = newreader(&buf); + r.delim = '|'; + + let record = read(&r)! as []str; + assert(record[0] == "col1"); + assert(record[1] == "col2"); + assert(record[2] == "col3"); + + let record = read(&r)! as []str; + assert(record[0] == "1,1"); + assert(record[1] == "12"); + assert(record[2] == "13"); + + let record = read(&r)! as []str; + assert(record[0] == "21"); + assert(record[1] == `2"2`); + assert(record[2] == "23"); + + let record = read(&r)! as []str; + assert(record[0] == "31"); + assert(record[1] == "32"); + assert(record[2] == "3,,,,3"); +}; diff --git a/format/csv/writer.ha b/format/csv/writer.ha index 7515eee..cadcd9b 100644 --- a/format/csv/writer.ha +++ b/format/csv/writer.ha @@ -8,96 +8,96 @@ use strio; // [[writer]] wraps an [[io::handle]] and defines a delim as the separator when // writing records from the handle. export type writer = struct { - out: io::handle, - delim: rune, + out: io::handle, + delim: rune, }; // Create a new [[writer]] with a comma as the default delim. export fn newwriter(out: io::handle) writer = writer { - out = out, - delim = ',', + out = out, + delim = ',', }; // Write one record (a slice of strings) to [[writer]] escaping double-quotes // and enclosing fields in quotes when necessary. export fn write(w: *writer, record: []str) (void | utf8::invalid | io::error) = { - let quote = `"`; - let escquote = `""`; - - for (let i = 0z; i < len(record); i += 1z) { - let hasdelim = strings::contains(record[i], w.delim); - let hasquote = strings::contains(record[i], quote); - - let rec = record[i]; - if (hasquote) { - rec = strings::replace(rec, quote, escquote); - }; - - if (hasdelim || hasquote) { - strio::concat(w.out, quote, rec, quote)?; - } else { - strio::concat(w.out, rec)?; - }; - - if (i == len(record) - 1) { - fmt::fprintln(w.out)?; - } else { - fmt::fprint(w.out, w.delim)?; - }; - }; - - return; + let quote = `"`; + let escquote = `""`; + + for (let i = 0z; i < len(record); i += 1z) { + let hasdelim = strings::contains(record[i], w.delim); + let hasquote = strings::contains(record[i], quote); + + let rec = record[i]; + if (hasquote) { + rec = strings::replace(rec, quote, escquote); + }; + + if (hasdelim || hasquote) { + strio::concat(w.out, quote, rec, quote)?; + } else { + strio::concat(w.out, rec)?; + }; + + if (i == len(record) - 1) { + fmt::fprintln(w.out)?; + } else { + fmt::fprint(w.out, w.delim)?; + }; + }; + + return; }; @test fn write() void = { - let buf = strio::dynamic(); + let buf = strio::dynamic(); - let w = newwriter(&buf); + let w = newwriter(&buf); - let record: []str = ["col1", "col2", "col3"]; - write(&w, record)!; + let record: []str = ["col1", "col2", "col3"]; + write(&w, record)!; - let record: []str = [`1,1`, "12", "13"]; - write(&w, record)!; + let record: []str = [`1,1`, "12", "13"]; + write(&w, record)!; - let record: []str = ["21", `2"2`, "23"]; - write(&w, record)!; + let record: []str = ["21", `2"2`, "23"]; + write(&w, record)!; - let record: []str = ["31", "32", "3,,,,3"]; - write(&w, record)!; - - let expected = `col1,col2,col3 + let record: []str = ["31", "32", "3,,,,3"]; + write(&w, record)!; + + let expected = `col1,col2,col3 "1,1",12,13 21,"2""2",23 31,32,"3,,,,3" `; - assert(strings::compare(strio::string(&buf), expected) == 0); + assert(strings::compare(strio::string(&buf), expected) == 0); }; @test fn write_delim() void = { - let buf = strio::dynamic(); + let buf = strio::dynamic(); - let w = newwriter(&buf); - w.delim = '|'; + let w = newwriter(&buf); + w.delim = '|'; - let record: []str = ["col1", "col2", "col3"]; - write(&w, record)!; + let record: []str = ["col1", "col2", "col3"]; + write(&w, record)!; - let record: []str = [`1,1`, "12", "13"]; - write(&w, record)!; + let record: []str = [`1,1`, "12", "13"]; + write(&w, record)!; - let record: []str = ["21", `2"2`, "23"]; - write(&w, record)!; + let record: []str = ["21", `2"2`, "23"]; + write(&w, record)!; - let record: []str = ["31", "3|2", "3,,,,3"]; - write(&w, record)!; - - let expected = `col1|col2|col3 + let record: []str = ["31", "3|2", "3,,,,3"]; + write(&w, record)!; + + let expected = `col1|col2|col3 1,1|12|13 21|"2""2"|23 31|"3|2"|3,,,,3 `; - assert(strings::compare(strio::string(&buf), expected) == 0); -}; \ No newline at end of file + assert(strings::compare(strio::string(&buf), expected) == 0); +}; -- 2.39.0
--- format/csv/reader.ha | 4 ++-- format/csv/writer.ha | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/format/csv/reader.ha b/format/csv/reader.ha index ed0d8cd..7ee3c42 100644 --- a/format/csv/reader.ha +++ b/format/csv/reader.ha @@ -19,10 +19,10 @@ export fn newreader(in: io::handle) reader = reader { }; // Read reads one record (a slice of strings) from [[reader]]. -export fn read(r: *reader) ([]str | io::EOF | io::error) = { +export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::error) = { let line = match (bufio::scanline(r.in)?) { case let b: []u8 => - yield strings::fromutf8_unsafe(bytes::trim(b, ' ')); + yield strings::fromutf8(bytes::trim(b, ' '))?; case io::EOF => return io::EOF; }; diff --git a/format/csv/writer.ha b/format/csv/writer.ha index cadcd9b..ec6f168 100644 --- a/format/csv/writer.ha +++ b/format/csv/writer.ha @@ -20,7 +20,7 @@ export fn newwriter(out: io::handle) writer = writer { // Write one record (a slice of strings) to [[writer]] escaping double-quotes // and enclosing fields in quotes when necessary. -export fn write(w: *writer, record: []str) (void | utf8::invalid | io::error) = { +export fn write(w: *writer, record: []str) (void | io::error) = { let quote = `"`; let escquote = `""`; -- 2.39.0
--- format/csv/writer.ha | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/format/csv/writer.ha b/format/csv/writer.ha index ec6f168..4b6408b 100644 --- a/format/csv/writer.ha +++ b/format/csv/writer.ha @@ -54,17 +54,10 @@ export fn write(w: *writer, record: []str) (void | io::error) = { let w = newwriter(&buf); - let record: []str = ["col1", "col2", "col3"]; - write(&w, record)!; - - let record: []str = [`1,1`, "12", "13"]; - write(&w, record)!; - - let record: []str = ["21", `2"2`, "23"]; - write(&w, record)!; - - let record: []str = ["31", "32", "3,,,,3"]; - write(&w, record)!; + write(&w, ["col1", "col2", "col3"])!; + write(&w, [`1,1`, "12", "13"])!; + write(&w, ["21", `2"2`, "23"])!; + write(&w, ["31", "32", "3,,,,3"])!; let expected = `col1,col2,col3 "1,1",12,13 @@ -81,17 +74,10 @@ export fn write(w: *writer, record: []str) (void | io::error) = { let w = newwriter(&buf); w.delim = '|'; - let record: []str = ["col1", "col2", "col3"]; - write(&w, record)!; - - let record: []str = [`1,1`, "12", "13"]; - write(&w, record)!; - - let record: []str = ["21", `2"2`, "23"]; - write(&w, record)!; - - let record: []str = ["31", "3|2", "3,,,,3"]; - write(&w, record)!; + write(&w, ["col1", "col2", "col3"])!; + write(&w, [`1,1`, "12", "13"])!; + write(&w, ["21", `2"2`, "23"])!; + write(&w, ["31", "3|2", "3,,,,3"])!; let expected = `col1|col2|col3 1,1|12|13 -- 2.39.0
--- format/csv/README | 21 +++++++++------------ format/csv/reader.ha | 8 ++++---- format/csv/writer.ha | 10 +++++----- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/format/csv/README b/format/csv/README index 397f5de..1576c02 100644 --- a/format/csv/README +++ b/format/csv/README @@ -1,18 +1,15 @@ -The csv module provides basic reading and writing of comma-separated text. By -default, the comma is used as the default delimiter when creating a [[reader]] -or [[writer]]. You may change this by setting the `delim` field. +The format::csv module provides basic reading and writing of comma-separated +text. By default, the comma is used as the default delimiter when creating a +[[reader]] or [[writer]]. You may change this by setting the `delim` field. let w = csv::newwriter(os::stdout); w.delim = '|'; - let fields: []str = ["field1", "field2", "field3"]; - csv::write(&w, fields)!; + csv::write(&w, ["field1", "field2", "field3"])!; + // "field1|field2|field3\n" - // field1|field2|field3 +When reading, fields wrapped in double-quotes will be unwrapped, and +backslash-escaped double-quotes will be unescaped. -Reading and writing will handle quoted fields appropriately and only when -necessary. When reading fields that are wrapped in double quotes the read field -will have the trimmed off the ends. Escaped double quotes will be unescaped as -well. When writing fields that contain the delimiter then resulting field will -be wrapped in double quotes. Any double quotes appearing in fields will be -automatically escaped as well. \ No newline at end of file +When writing, double-quotes will be backslash-escaped, and fields containing an +occurence of the delimiter will be wrapped in double-quotes. diff --git a/format/csv/reader.ha b/format/csv/reader.ha index 7ee3c42..6a89f78 100644 --- a/format/csv/reader.ha +++ b/format/csv/reader.ha @@ -5,20 +5,20 @@ use io; use strings; use encoding::utf8; -// [[reader]] wraps an [[io::handle]] and defines a delim as the separator when -// reading records from the handle. +// An [[io::handle]] wrapper for reading CSV data, with a configurable +// delimiter. export type reader = struct { in: io::handle, delim: rune, }; -// Create a new [[reader]] with a comma as the default delim. +// Creates a new [[reader]] with a comma (',') as the delimiter. export fn newreader(in: io::handle) reader = reader { in = in, delim = ',', }; -// Read reads one record (a slice of strings) from [[reader]]. +// Reads one record (a slice of strings) from a [[reader]]. export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::error) = { let line = match (bufio::scanline(r.in)?) { case let b: []u8 => diff --git a/format/csv/writer.ha b/format/csv/writer.ha index 4b6408b..9e3ed4e 100644 --- a/format/csv/writer.ha +++ b/format/csv/writer.ha @@ -5,21 +5,21 @@ use fmt; use strings; use strio; -// [[writer]] wraps an [[io::handle]] and defines a delim as the separator when -// writing records from the handle. +// An [[io::handle]] wrapper for rriting CSV data, with a configurable +// delimiter. export type writer = struct { out: io::handle, delim: rune, }; -// Create a new [[writer]] with a comma as the default delim. +// Creates a new [[writer]] with a comma (',') as the delimiter. export fn newwriter(out: io::handle) writer = writer { out = out, delim = ',', }; -// Write one record (a slice of strings) to [[writer]] escaping double-quotes -// and enclosing fields in quotes when necessary. +// Writes one record (a slice of strings) to a [[writer]], escaping +// double-quotes and enclosing fields in quotes when necessary. export fn write(w: *writer, record: []str) (void | io::error) = { let quote = `"`; let escquote = `""`; -- 2.39.0
--- The two-step {let r = newreader(); r.deilm = d;} defeats the purpose of newreader(), and in the future, you may want to validate the delimiter (e.g. error on '"'). format/csv/README | 8 +++----- format/csv/reader.ha | 11 +++++------ format/csv/writer.ha | 11 +++++------ 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/format/csv/README b/format/csv/README index 1576c02..72dbc39 100644 --- a/format/csv/README +++ b/format/csv/README @@ -1,9 +1,7 @@ -The format::csv module provides basic reading and writing of comma-separated -text. By default, the comma is used as the default delimiter when creating a -[[reader]] or [[writer]]. You may change this by setting the `delim` field. +The format::csv module handles the reading and writing of comma-separated text, +or newline-delimited records of text with generic field delimiters. - let w = csv::newwriter(os::stdout); - w.delim = '|'; + let w = csv::newwriter(os::stdout, '|'); csv::write(&w, ["field1", "field2", "field3"])!; // "field1|field2|field3\n" diff --git a/format/csv/reader.ha b/format/csv/reader.ha index 6a89f78..1b243f2 100644 --- a/format/csv/reader.ha +++ b/format/csv/reader.ha @@ -12,10 +12,10 @@ export type reader = struct { delim: rune, }; -// Creates a new [[reader]] with a comma (',') as the delimiter. -export fn newreader(in: io::handle) reader = reader { +// Creates a new [[reader]] with a specified delimiter. +export fn newreader(in: io::handle, delim: rune) reader = reader { in = in, - delim = ',', + delim = delim, }; // Reads one record (a slice of strings) from a [[reader]]. @@ -83,7 +83,7 @@ export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::erro `); let buf = bufio::fixed(csv, io::mode::READ); - let r = newreader(&buf); + let r = newreader(&buf, ','); let record = read(&r)! as []str; assert(record[0] == "col1"); @@ -114,8 +114,7 @@ export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::erro `); let buf = bufio::fixed(csv, io::mode::READ); - let r = newreader(&buf); - r.delim = '|'; + let r = newreader(&buf, '|'); let record = read(&r)! as []str; assert(record[0] == "col1"); diff --git a/format/csv/writer.ha b/format/csv/writer.ha index 9e3ed4e..57ad986 100644 --- a/format/csv/writer.ha +++ b/format/csv/writer.ha @@ -12,10 +12,10 @@ export type writer = struct { delim: rune, }; -// Creates a new [[writer]] with a comma (',') as the delimiter. -export fn newwriter(out: io::handle) writer = writer { +// Creates a new [[writer]] with a specified delimiter. +export fn newwriter(out: io::handle, delim: rune) writer = writer { out = out, - delim = ',', + delim = delim, }; // Writes one record (a slice of strings) to a [[writer]], escaping @@ -52,7 +52,7 @@ export fn write(w: *writer, record: []str) (void | io::error) = { @test fn write() void = { let buf = strio::dynamic(); - let w = newwriter(&buf); + let w = newwriter(&buf, ','); write(&w, ["col1", "col2", "col3"])!; write(&w, [`1,1`, "12", "13"])!; @@ -71,8 +71,7 @@ export fn write(w: *writer, record: []str) (void | io::error) = { @test fn write_delim() void = { let buf = strio::dynamic(); - let w = newwriter(&buf); - w.delim = '|'; + let w = newwriter(&buf, '|'); write(&w, ["col1", "col2", "col3"])!; write(&w, [`1,1`, "12", "13"])!; -- 2.39.0