~blainsmith/public-inbox

hare-csv: suggestions v1 APPLIED

I think there are some memory leaks which you need to address, like
unfreed fmt::asprintf. You could use a static buffer with 5 bytes ('"x',
runes can be 4 bytes) and fmt::bsprintf instead.

I don't see tests or clearly defined behaviour for the (un)escaping of
balckslashes, or when (delim == '"'). I didn't want to write the code
myself on assumptions, but perhaps you can add something like this to
newreader & newwriter:

	delim = if (delim == '"') abort("format::csv: Invalid delimiter") else delim,

Anyways, nice to see the Hare ecosystem grow!
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~blainsmith/public-inbox/patches/38142/mbox | git am -3
Learn more about email & git

[PATCH hare-csv 1/5] code style Export this patch

---

Tab indents and 80-ish width folding.

See https://harelang.org/style
See https://harelang.org/editors


 format/csv/reader.ha | 222 ++++++++++++++++++++++---------------------
 format/csv/writer.ha | 116 +++++++++++-----------
 2 files changed, 172 insertions(+), 166 deletions(-)

diff --git a/format/csv/reader.ha b/format/csv/reader.ha
index 0a601bf..ed0d8cd 100644
--- a/format/csv/reader.ha
+++ b/format/csv/reader.ha
@@ -9,125 +9,131 @@ use encoding::utf8;
// reading records from the handle.
export type reader = struct {
	in: io::handle,
    delim: rune,
	delim: rune,
};

// Create a new [[reader]] with a comma as the default delim.
export fn newreader(in: io::handle) reader = reader {
    in = in,
    delim = ',',
	in = in,
	delim = ',',
};

// Read reads one record (a slice of strings) from [[reader]].
export fn read(r: *reader) ([]str | io::EOF | io::error) = {
    let line = match (bufio::scanline(r.in)?) {
    case let b: []u8 =>
        yield strings::fromutf8_unsafe(bytes::trim(b, ' '));
    case io::EOF =>
        return io::EOF;
    };

    if (len(line) == 0) {
        return read(r);
    };

    let fieldend = fmt::asprintf("\"{}", r.delim);

    let record: []str = [];
    for (true) {
        if (len(line) == 0 || !strings::hasprefix(line, '"')) {
            match(strings::index(line, r.delim)) {
            case let i: size =>
                append(record, strings::sub(line, 0, i));
                line = strings::sub(line, i + 1, strings::end);
                continue;
            case void =>
                append(record, line);
                break;
            };
        } else {
            line = strings::sub(line, 1, strings::end);
            match(strings::index(line, fieldend)) {
            case let i: size =>
                append(record, strings::replace(strings::sub(line, 0, i), `""`, `"`));
                line = strings::sub(line, i + 2, strings::end);
            case void =>
                match(strings::index(line, '"')) {
                case let i: size =>
                    append(record, strings::replace(strings::sub(line, 0, i), `""`, `"`));
                    break;
                case void =>
                    append(record, strings::sub(line, 0, len(line) - 1));
                    break;
                };
            };
        };
    };

    return record;
	let line = match (bufio::scanline(r.in)?) {
	case let b: []u8 =>
		yield strings::fromutf8_unsafe(bytes::trim(b, ' '));
	case io::EOF =>
		return io::EOF;
	};

	if (len(line) == 0) {
		return read(r);
	};

	let fieldend = fmt::asprintf("\"{}", r.delim);

	let record: []str = [];
	for (true) {
		if (len(line) == 0 || !strings::hasprefix(line, '"')) {
			match(strings::index(line, r.delim)) {
			case let i: size =>
				append(record, strings::sub(line, 0, i));
				line = strings::sub(line, i + 1, strings::end);
				continue;
			case void =>
				append(record, line);
				break;
			};
		} else {
			line = strings::sub(line, 1, strings::end);
			match(strings::index(line, fieldend)) {
			case let i: size =>
				append(record, strings::replace(
					strings::sub(line, 0, i),
					`""`, `"`,
				));
				line = strings::sub(line, i + 2, strings::end);
			case void =>
				match(strings::index(line, '"')) {
				case let i: size =>
					append(record, strings::replace(
						strings::sub(line, 0, i),
						`""`, `"`,
					));
					break;
				case void =>
					append(record, strings::sub(
						line, 0, len(line) - 1,
					));
					break;
				};
			};
		};
	};

	return record;
};

@test fn read() void = {
    let csv = strings::toutf8(`
        col1,col2,col3
        "1,1",12,13
        21,"2""2",23
        31,32,"3,,,,3"
    `); 
    let buf = bufio::fixed(csv, io::mode::READ);

    let r = newreader(&buf);

    let record = read(&r)! as []str;
    assert(record[0] == "col1");
    assert(record[1] == "col2");
    assert(record[2] == "col3");

    let record = read(&r)! as []str;
    assert(record[0] == "1,1");
    assert(record[1] == "12");
    assert(record[2] == "13");

    let record = read(&r)! as []str;
    assert(record[0] == "21");
    assert(record[1] == `2"2`);
    assert(record[2] == "23");

    let record = read(&r)! as []str;
    assert(record[0] == "31");
    assert(record[1] == "32");
    assert(record[2] == "3,,,,3");
	let csv = strings::toutf8(`col1,col2,col3
"1,1",12,13
21,"2""2",23
31,32,"3,,,,3"
`);
	let buf = bufio::fixed(csv, io::mode::READ);

	let r = newreader(&buf);

	let record = read(&r)! as []str;
	assert(record[0] == "col1");
	assert(record[1] == "col2");
	assert(record[2] == "col3");

	let record = read(&r)! as []str;
	assert(record[0] == "1,1");
	assert(record[1] == "12");
	assert(record[2] == "13");

	let record = read(&r)! as []str;
	assert(record[0] == "21");
	assert(record[1] == `2"2`);
	assert(record[2] == "23");

	let record = read(&r)! as []str;
	assert(record[0] == "31");
	assert(record[1] == "32");
	assert(record[2] == "3,,,,3");
};

@test fn read_delim() void = {
    let csv = strings::toutf8(`
        col1|col2|col3
        1,1|12|13
        21|"2""2"|23
        31|32|3,,,,3
    `); 
    let buf = bufio::fixed(csv, io::mode::READ);

    let r = newreader(&buf);
    r.delim = '|';

    let record = read(&r)! as []str;
    assert(record[0] == "col1");
    assert(record[1] == "col2");
    assert(record[2] == "col3");

    let record = read(&r)! as []str;
    assert(record[0] == "1,1");
    assert(record[1] == "12");
    assert(record[2] == "13");

    let record = read(&r)! as []str;
    assert(record[0] == "21");
    assert(record[1] == `2"2`);
    assert(record[2] == "23");

    let record = read(&r)! as []str;
    assert(record[0] == "31");
    assert(record[1] == "32");
    assert(record[2] == "3,,,,3");
};
\ No newline at end of file
	let csv = strings::toutf8(`col1|col2|col3
1,1|12|13
21|"2""2"|23
31|32|3,,,,3
`);
	let buf = bufio::fixed(csv, io::mode::READ);

	let r = newreader(&buf);
	r.delim = '|';

	let record = read(&r)! as []str;
	assert(record[0] == "col1");
	assert(record[1] == "col2");
	assert(record[2] == "col3");

	let record = read(&r)! as []str;
	assert(record[0] == "1,1");
	assert(record[1] == "12");
	assert(record[2] == "13");

	let record = read(&r)! as []str;
	assert(record[0] == "21");
	assert(record[1] == `2"2`);
	assert(record[2] == "23");

	let record = read(&r)! as []str;
	assert(record[0] == "31");
	assert(record[1] == "32");
	assert(record[2] == "3,,,,3");
};
diff --git a/format/csv/writer.ha b/format/csv/writer.ha
index 7515eee..cadcd9b 100644
--- a/format/csv/writer.ha
+++ b/format/csv/writer.ha
@@ -8,96 +8,96 @@ use strio;
// [[writer]] wraps an [[io::handle]] and defines a delim as the separator when
// writing records from the handle.
export type writer = struct {
    out: io::handle,
    delim: rune,
	out: io::handle,
	delim: rune,
};

// Create a new [[writer]] with a comma as the default delim.
export fn newwriter(out: io::handle) writer = writer {
    out = out,
    delim = ',',
	out = out,
	delim = ',',
};

// Write one record (a slice of strings) to [[writer]] escaping double-quotes
// and enclosing fields in quotes when necessary.
export fn write(w: *writer, record: []str) (void | utf8::invalid | io::error) = {
    let quote = `"`;
    let escquote = `""`;

    for (let i = 0z; i < len(record); i += 1z) {
        let hasdelim = strings::contains(record[i], w.delim);
        let hasquote = strings::contains(record[i], quote);

        let rec = record[i];
        if (hasquote) {
            rec = strings::replace(rec, quote, escquote);
        };

        if (hasdelim || hasquote) {
            strio::concat(w.out, quote, rec, quote)?;
        } else {
            strio::concat(w.out, rec)?;
        };

        if (i == len(record) - 1) {
            fmt::fprintln(w.out)?;
        } else {
            fmt::fprint(w.out, w.delim)?;
        };
    };

    return;
	let quote = `"`;
	let escquote = `""`;

	for (let i = 0z; i < len(record); i += 1z) {
		let hasdelim = strings::contains(record[i], w.delim);
		let hasquote = strings::contains(record[i], quote);

		let rec = record[i];
		if (hasquote) {
			rec = strings::replace(rec, quote, escquote);
		};

		if (hasdelim || hasquote) {
			strio::concat(w.out, quote, rec, quote)?;
		} else {
			strio::concat(w.out, rec)?;
		};

		if (i == len(record) - 1) {
			fmt::fprintln(w.out)?;
		} else {
			fmt::fprint(w.out, w.delim)?;
		};
	};

	return;
};

@test fn write() void = {
    let buf = strio::dynamic();
	let buf = strio::dynamic();

    let w = newwriter(&buf);
	let w = newwriter(&buf);

    let record: []str = ["col1", "col2", "col3"];
    write(&w, record)!;
	let record: []str = ["col1", "col2", "col3"];
	write(&w, record)!;

    let record: []str = [`1,1`, "12", "13"];
    write(&w, record)!;
	let record: []str = [`1,1`, "12", "13"];
	write(&w, record)!;

    let record: []str = ["21", `2"2`, "23"];
    write(&w, record)!;
	let record: []str = ["21", `2"2`, "23"];
	write(&w, record)!;

    let record: []str = ["31", "32", "3,,,,3"];
    write(&w, record)!;
    
    let expected = `col1,col2,col3
	let record: []str = ["31", "32", "3,,,,3"];
	write(&w, record)!;
	
	let expected = `col1,col2,col3
"1,1",12,13
21,"2""2",23
31,32,"3,,,,3"
`;

    assert(strings::compare(strio::string(&buf), expected) == 0);
	assert(strings::compare(strio::string(&buf), expected) == 0);
};

@test fn write_delim() void = {
    let buf = strio::dynamic();
	let buf = strio::dynamic();

    let w = newwriter(&buf);
    w.delim = '|';
	let w = newwriter(&buf);
	w.delim = '|';

    let record: []str = ["col1", "col2", "col3"];
    write(&w, record)!;
	let record: []str = ["col1", "col2", "col3"];
	write(&w, record)!;

    let record: []str = [`1,1`, "12", "13"];
    write(&w, record)!;
	let record: []str = [`1,1`, "12", "13"];
	write(&w, record)!;

    let record: []str = ["21", `2"2`, "23"];
    write(&w, record)!;
	let record: []str = ["21", `2"2`, "23"];
	write(&w, record)!;

    let record: []str = ["31", "3|2", "3,,,,3"];
    write(&w, record)!;
    
    let expected = `col1|col2|col3
	let record: []str = ["31", "3|2", "3,,,,3"];
	write(&w, record)!;
	
	let expected = `col1|col2|col3
1,1|12|13
21|"2""2"|23
31|"3|2"|3,,,,3
`;

    assert(strings::compare(strio::string(&buf), expected) == 0);
};
\ No newline at end of file
	assert(strings::compare(strio::string(&buf), expected) == 0);
};
-- 
2.39.0

[PATCH hare-csv 2/5] use utf8::invalid Export this patch

---
 format/csv/reader.ha | 4 ++--
 format/csv/writer.ha | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/format/csv/reader.ha b/format/csv/reader.ha
index ed0d8cd..7ee3c42 100644
--- a/format/csv/reader.ha
+++ b/format/csv/reader.ha
@@ -19,10 +19,10 @@ export fn newreader(in: io::handle) reader = reader {
};

// Read reads one record (a slice of strings) from [[reader]].
export fn read(r: *reader) ([]str | io::EOF | io::error) = {
export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::error) = {
	let line = match (bufio::scanline(r.in)?) {
	case let b: []u8 =>
		yield strings::fromutf8_unsafe(bytes::trim(b, ' '));
		yield strings::fromutf8(bytes::trim(b, ' '))?;
	case io::EOF =>
		return io::EOF;
	};
diff --git a/format/csv/writer.ha b/format/csv/writer.ha
index cadcd9b..ec6f168 100644
--- a/format/csv/writer.ha
+++ b/format/csv/writer.ha
@@ -20,7 +20,7 @@ export fn newwriter(out: io::handle) writer = writer {

// Write one record (a slice of strings) to [[writer]] escaping double-quotes
// and enclosing fields in quotes when necessary.
export fn write(w: *writer, record: []str) (void | utf8::invalid | io::error) = {
export fn write(w: *writer, record: []str) (void | io::error) = {
	let quote = `"`;
	let escquote = `""`;

-- 
2.39.0

[PATCH hare-csv 3/5] simplify tests Export this patch

---
 format/csv/writer.ha | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/format/csv/writer.ha b/format/csv/writer.ha
index ec6f168..4b6408b 100644
--- a/format/csv/writer.ha
+++ b/format/csv/writer.ha
@@ -54,17 +54,10 @@ export fn write(w: *writer, record: []str) (void | io::error) = {

	let w = newwriter(&buf);

	let record: []str = ["col1", "col2", "col3"];
	write(&w, record)!;

	let record: []str = [`1,1`, "12", "13"];
	write(&w, record)!;

	let record: []str = ["21", `2"2`, "23"];
	write(&w, record)!;

	let record: []str = ["31", "32", "3,,,,3"];
	write(&w, record)!;
	write(&w, ["col1", "col2", "col3"])!;
	write(&w, [`1,1`, "12", "13"])!;
	write(&w, ["21", `2"2`, "23"])!;
	write(&w, ["31", "32", "3,,,,3"])!;
	
	let expected = `col1,col2,col3
"1,1",12,13
@@ -81,17 +74,10 @@ export fn write(w: *writer, record: []str) (void | io::error) = {
	let w = newwriter(&buf);
	w.delim = '|';

	let record: []str = ["col1", "col2", "col3"];
	write(&w, record)!;

	let record: []str = [`1,1`, "12", "13"];
	write(&w, record)!;

	let record: []str = ["21", `2"2`, "23"];
	write(&w, record)!;

	let record: []str = ["31", "3|2", "3,,,,3"];
	write(&w, record)!;
	write(&w, ["col1", "col2", "col3"])!;
	write(&w, [`1,1`, "12", "13"])!;
	write(&w, ["21", `2"2`, "23"])!;
	write(&w, ["31", "3|2", "3,,,,3"])!;
	
	let expected = `col1|col2|col3
1,1|12|13
-- 
2.39.0

[PATCH hare-csv 4/5] improve docs Export this patch

---
 format/csv/README    | 21 +++++++++------------
 format/csv/reader.ha |  8 ++++----
 format/csv/writer.ha | 10 +++++-----
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/format/csv/README b/format/csv/README
index 397f5de..1576c02 100644
--- a/format/csv/README
+++ b/format/csv/README
@@ -1,18 +1,15 @@
The csv module provides basic reading and writing of comma-separated text. By
default, the comma is used as the default delimiter when creating a [[reader]]
or [[writer]]. You may change this by setting the `delim` field.
The format::csv module provides basic reading and writing of comma-separated
text. By default, the comma is used as the default delimiter when creating a
[[reader]] or [[writer]]. You may change this by setting the `delim` field.

    let w = csv::newwriter(os::stdout);
    w.delim = '|';

    let fields: []str = ["field1", "field2", "field3"];
    csv::write(&w, fields)!;
    csv::write(&w, ["field1", "field2", "field3"])!;
    // "field1|field2|field3\n"

    // field1|field2|field3
When reading, fields wrapped in double-quotes will be unwrapped, and
backslash-escaped double-quotes will be unescaped.

Reading and writing will handle quoted fields appropriately and only when
necessary. When reading fields that are wrapped in double quotes the read field
will have the trimmed off the ends. Escaped double quotes will be unescaped as
well. When writing fields that contain the delimiter then resulting field will
be wrapped in double quotes. Any double quotes appearing in fields will be
automatically escaped as well.
\ No newline at end of file
When writing, double-quotes will be backslash-escaped, and fields containing an
occurence of the delimiter will be wrapped in double-quotes.
diff --git a/format/csv/reader.ha b/format/csv/reader.ha
index 7ee3c42..6a89f78 100644
--- a/format/csv/reader.ha
+++ b/format/csv/reader.ha
@@ -5,20 +5,20 @@ use io;
use strings;
use encoding::utf8;

// [[reader]] wraps an [[io::handle]] and defines a delim as the separator when
// reading records from the handle.
// An [[io::handle]] wrapper for reading CSV data, with a configurable
// delimiter.
export type reader = struct {
	in: io::handle,
	delim: rune,
};

// Create a new [[reader]] with a comma as the default delim.
// Creates a new [[reader]] with a comma (',') as the delimiter.
export fn newreader(in: io::handle) reader = reader {
	in = in,
	delim = ',',
};

// Read reads one record (a slice of strings) from [[reader]].
// Reads one record (a slice of strings) from a [[reader]].
export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::error) = {
	let line = match (bufio::scanline(r.in)?) {
	case let b: []u8 =>
diff --git a/format/csv/writer.ha b/format/csv/writer.ha
index 4b6408b..9e3ed4e 100644
--- a/format/csv/writer.ha
+++ b/format/csv/writer.ha
@@ -5,21 +5,21 @@ use fmt;
use strings;
use strio;

// [[writer]] wraps an [[io::handle]] and defines a delim as the separator when
// writing records from the handle.
// An [[io::handle]] wrapper for rriting CSV data, with a configurable
// delimiter.
export type writer = struct {
	out: io::handle,
	delim: rune,
};

// Create a new [[writer]] with a comma as the default delim.
// Creates a new [[writer]] with a comma (',') as the delimiter.
export fn newwriter(out: io::handle) writer = writer {
	out = out,
	delim = ',',
};

// Write one record (a slice of strings) to [[writer]] escaping double-quotes
// and enclosing fields in quotes when necessary.
// Writes one record (a slice of strings) to a [[writer]], escaping
// double-quotes and enclosing fields in quotes when necessary.
export fn write(w: *writer, record: []str) (void | io::error) = {
	let quote = `"`;
	let escquote = `""`;
-- 
2.39.0

[PATCH hare-csv 5/5] add delim parameter to newreader, newwriter Export this patch

---

The two-step {let r = newreader(); r.deilm = d;} defeats the purpose of
newreader(), and in the future, you may want to validate the delimiter
(e.g. error on '"').


 format/csv/README    |  8 +++-----
 format/csv/reader.ha | 11 +++++------
 format/csv/writer.ha | 11 +++++------
 3 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/format/csv/README b/format/csv/README
index 1576c02..72dbc39 100644
--- a/format/csv/README
+++ b/format/csv/README
@@ -1,9 +1,7 @@
The format::csv module provides basic reading and writing of comma-separated
text. By default, the comma is used as the default delimiter when creating a
[[reader]] or [[writer]]. You may change this by setting the `delim` field.
The format::csv module handles the reading and writing of comma-separated text,
or newline-delimited records of text with generic field delimiters.

    let w = csv::newwriter(os::stdout);
    w.delim = '|';
    let w = csv::newwriter(os::stdout, '|');

    csv::write(&w, ["field1", "field2", "field3"])!;
    // "field1|field2|field3\n"
diff --git a/format/csv/reader.ha b/format/csv/reader.ha
index 6a89f78..1b243f2 100644
--- a/format/csv/reader.ha
+++ b/format/csv/reader.ha
@@ -12,10 +12,10 @@ export type reader = struct {
	delim: rune,
};

// Creates a new [[reader]] with a comma (',') as the delimiter.
export fn newreader(in: io::handle) reader = reader {
// Creates a new [[reader]] with a specified delimiter.
export fn newreader(in: io::handle, delim: rune) reader = reader {
	in = in,
	delim = ',',
	delim = delim,
};

// Reads one record (a slice of strings) from a [[reader]].
@@ -83,7 +83,7 @@ export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::erro
`);
	let buf = bufio::fixed(csv, io::mode::READ);

	let r = newreader(&buf);
	let r = newreader(&buf, ',');

	let record = read(&r)! as []str;
	assert(record[0] == "col1");
@@ -114,8 +114,7 @@ export fn read(r: *reader) ([]str | io::EOF | encoding::utf8::invalid | io::erro
`);
	let buf = bufio::fixed(csv, io::mode::READ);

	let r = newreader(&buf);
	r.delim = '|';
	let r = newreader(&buf, '|');

	let record = read(&r)! as []str;
	assert(record[0] == "col1");
diff --git a/format/csv/writer.ha b/format/csv/writer.ha
index 9e3ed4e..57ad986 100644
--- a/format/csv/writer.ha
+++ b/format/csv/writer.ha
@@ -12,10 +12,10 @@ export type writer = struct {
	delim: rune,
};

// Creates a new [[writer]] with a comma (',') as the delimiter.
export fn newwriter(out: io::handle) writer = writer {
// Creates a new [[writer]] with a specified delimiter.
export fn newwriter(out: io::handle, delim: rune) writer = writer {
	out = out,
	delim = ',',
	delim = delim,
};

// Writes one record (a slice of strings) to a [[writer]], escaping
@@ -52,7 +52,7 @@ export fn write(w: *writer, record: []str) (void | io::error) = {
@test fn write() void = {
	let buf = strio::dynamic();

	let w = newwriter(&buf);
	let w = newwriter(&buf, ',');

	write(&w, ["col1", "col2", "col3"])!;
	write(&w, [`1,1`, "12", "13"])!;
@@ -71,8 +71,7 @@ export fn write(w: *writer, record: []str) (void | io::error) = {
@test fn write_delim() void = {
	let buf = strio::dynamic();

	let w = newwriter(&buf);
	w.delim = '|';
	let w = newwriter(&buf, '|');

	write(&w, ["col1", "col2", "col3"])!;
	write(&w, [`1,1`, "12", "13"])!;
-- 
2.39.0