~technomancy/fennel

Store column information in AST nodes. v1 PROPOSED

Phil Hagelberg: 2
 Store column information in AST nodes.
 Use column number information from parser in error messages.

 10 files changed, 90 insertions(+), 75 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~technomancy/fennel/patches/33716/mbox | git am -3
Learn more about email & git

[PATCH 1/2] Store column information in AST nodes. Export this patch

This will help make fennel.friend's implementation much cleaner, and
it will also help editor integration tooling.
---
 api.md                | 10 ++++++----
 changelog.md          |  1 +
 src/fennel/parser.fnl | 31 ++++++++++++++++---------------
 test/parser.fnl       | 19 ++++++++++++++++++-
 4 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/api.md b/api.md
index 0461df4..e39cb20 100644
--- a/api.md
+++ b/api.md
@@ -253,8 +253,9 @@ Note that lists are compile-time constructs in Fennel. They do not exist at
runtime, except in such cases as the compiler is in use at runtime.

The list also contains these keys indicating where it was defined:
`filename`, `line`, `bytestart`, and `byteend`. This data is used for
stack traces and for pinpointing compiler error messages.
`filename`, `line`, `col`, `bytestart`, and `byteend`. This data is used
for stack traces and for pinpointing compiler error messages. Note that
column numbers are based on bytes, not characters.

### sequence/kv table

@@ -283,8 +284,9 @@ Symbols typically represent identifiers in Fennel code. Symbols can be
identified with `fennel.sym?` and constructed with `fennel.sym` which
takes a string name as its first argument and a source data table as
the second. Symbols are represented as tables which store their source
data in fields on themselves. Unlike the other tables in the AST, they
do not represent collections; they are used as scalar types.
data (`filename`, `line`, `col`, etc) in fields on themselves. Unlike
the other tables in the AST, they do not represent collections; they
are used as scalar types.

**Note:** `nil` is not a valid AST; code that references nil will have
the symbol named `"nil"` which unfortunately prints in a way that is
diff --git a/changelog.md b/changelog.md
index 2030324..92f520e 100644
--- a/changelog.md
+++ b/changelog.md
@@ -12,6 +12,7 @@ deprecated forms.
* Add `fcollect` macro for range "comprehension"

### New Features
* Parser now includes column information (byte-based) in AST nodes
* For greater consistency, add `&into`/`&until` to certain looping constructs

### Bug Fixes
diff --git a/src/fennel/parser.fnl b/src/fennel/parser.fnl
index d30fea9..f72099b 100644
--- a/src/fennel/parser.fnl
+++ b/src/fennel/parser.fnl
@@ -62,13 +62,11 @@ Also returns a second function to clear the buffer in the byte stream"
(fn parser-fn [getbyte filename {: source : unfriendly : comments}]
  (var stack []) ; stack of unfinished values
  ;; Provide one character buffer and keep track of current line and byte index
  (var line 1)
  (var byteindex 0)
  (var lastb nil)
  (var (line line-start prev-line-start byteindex lastb) (values 1 0 0 0 nil))

  (fn ungetb [ub]
    (when (= ub 10)
      (set line (- line 1)))
      (set (line line-start) (values (- line 1) prev-line-start)))
    (set byteindex (- byteindex 1))
    (set lastb ub))

@@ -79,7 +77,8 @@ Also returns a second function to clear the buffer in the byte stream"
        (set r (getbyte {:stack-size (length stack)})))
    (set byteindex (+ byteindex 1))
    (when (= r 10)
      (set line (+ line 1)))
      (set prev-line-start line-start)
      (set (line line-start) (values (+ line 1) byteindex)))
    r)

  ;; If you add new calls to this function, please update fennel.friend as well
@@ -99,12 +98,15 @@ Also returns a second function to clear the buffer in the byte stream"
  (fn parse-stream []
    (var (whitespace-since-dispatch done? retval) true)

    (fn set-source-fields [source]
      (set source.col (- source.bytestart source.line-start 1))
      (set (source.byteend source.line-start) byteindex))

    (fn dispatch [v]
      "Dispatch when we complete a value"
      (match (. stack (length stack))
        nil (set (retval done? whitespace-since-dispatch) (values v true false))
        {: prefix} (let [source (doto (table.remove stack)
                                  (tset :byteend byteindex))
        {: prefix} (let [source (doto (table.remove stack) set-source-fields)
                         list (utils.list (utils.sym prefix source) v)]
                     (each [k v (pairs source)]
                       (tset list k v))
@@ -142,7 +144,7 @@ Also returns a second function to clear the buffer in the byte stream"
        (parse-error (.. "expected whitespace before opening delimiter "
                         (string.char b))))
      (table.insert stack {:bytestart byteindex :closer (. delims b)
                           : filename : line}))
                           : filename : line : line-start}))

    (fn close-list [list]
      (dispatch (setmetatable list (getmetatable (utils.list)))))
@@ -215,7 +217,7 @@ Also returns a second function to clear the buffer in the byte stream"
        (when (and top.closer (not= top.closer b))
          (parse-error (.. "mismatched closing delimiter " (string.char b)
                           ", expected " (string.char top.closer))))
        (set top.byteend byteindex) ; set closing byte index
        (set-source-fields top)
        (if (= b 41) (close-list top)
            (= b 93) (close-sequence top)
            (close-curly-table top))))
@@ -250,9 +252,8 @@ Also returns a second function to clear the buffer in the byte stream"

    (fn parse-prefix [b]
      "expand prefix byte into wrapping form eg. '`a' into '(quote a)'"
      (table.insert stack {:prefix (. prefixes b)
                           : filename : line
                           :bytestart byteindex})
      (table.insert stack {:prefix (. prefixes b) : filename : line
                           :bytestart byteindex : line-start})
      (let [nextb (getb)]
        (when (or (whitespace? nextb) (= true (. delims nextb)))
          (when (not= b 35)
@@ -312,9 +313,9 @@ Also returns a second function to clear the buffer in the byte stream"
          rawstr))

    (fn parse-sym [b] ; not just syms actually...
      (let [bytestart byteindex
            rawstr (string.char (unpack (parse-sym-loop [b] (getb))))
            source {:byteend byteindex : bytestart : filename : line}]
      (let [source {:bytestart byteindex : filename : line : line-start}
            rawstr (string.char (unpack (parse-sym-loop [b] (getb))))]
        (set-source-fields source)
        (if (= rawstr :true)
            (dispatch true)
            (= rawstr :false)
diff --git a/test/parser.fnl b/test/parser.fnl
index a69c43e..22b7724 100644
--- a/test/parser.fnl
+++ b/test/parser.fnl
@@ -66,11 +66,28 @@
(fn test-prefixes []
  (let [code "\n\n`(let\n  ,abc #(+ 2 3))"
        (ok? ast) ((fennel.parser code))]
    (l.assertTrue ok?)
    (l.assertEquals ast.line 3)
    (l.assertEquals (. ast 2 2 :line) 4)
    (l.assertEquals (. ast 2 3 :line) 4)))

(fn line-col [{: line : col}] [line col])

(fn test-source-meta []
  (let [code "\n\n  (  let [x 5 \n        y {:z 66}]\n (+ x y.z))"
        (ok? ast) ((fennel.parser code))
        [let* [_ _ _ tbl]] ast
        [_ seq] ast]
    (l.assertTrue ok?)
    (l.assertEquals (line-col ast) [3 2] "line and column on lists")
    (l.assertEquals (line-col let*) [3 5] "line and column on symbols")
    (l.assertEquals (line-col (getmetatable seq)) [3 9]
                    "line and column on sequences")
    (l.assertEquals (line-col (getmetatable tbl)) [4 10]
                    "line and column on tables")))

{: test-basics
 : test-control-codes
 : test-comments
 : test-prefixes}
 : test-prefixes
 : test-source-meta}
-- 
2.20.1

[PATCH 2/2] Use column number information from parser in error messages. Export this patch

This introduces a utils.len function which will give the length of a
string in characters when that is available (lua 5.3+ or older luas
with a backport of the utf8 library, such as love2d) but falls back to
bytes otherwise. This means that the pinpoint ------^ feature in error
messages will make a best-effort to calculate the offsets correctly
but can sometimes be wrong. Even when the utf8 library is available,
it cannot reliably tell you the *width* of a string; for
instance (utf8.len "วัด") gives 3, which is the number of codepoints in
the string, but the string is only 2 characters wide for purposes of
calculating the column.

Currently the attempt to be utf8-aware is only done in the error
pinpointing code. The parser column number information is strictly
byte-based.

We are also emitting column numbers in the textual error messages
emitted whether or not fennel.friend is active:

    Compile error in scratch.fnl:4:13

The presence of column information on the AST nodes allows us to
simplify the reading of source files in fennel.friend fairly
significantly because we no longer need to depend on counting as it
steps thru the file to track byte offsets that way.
---
 src/fennel/compiler.fnl |  4 ++-
 src/fennel/friend.fnl   | 68 ++++++++++++++++-------------------------
 src/fennel/parser.fnl   | 10 +++---
 src/fennel/utils.fnl    |  6 ++++
 test/bad/all.sh         |  2 +-
 test/failures.fnl       | 14 ++++-----
 6 files changed, 49 insertions(+), 55 deletions(-)

diff --git a/src/fennel/compiler.fnl b/src/fennel/compiler.fnl
index c965822..6af220a 100644
--- a/src/fennel/compiler.fnl
+++ b/src/fennel/compiler.fnl
@@ -36,8 +36,10 @@ implement nesting. "
        m (getmetatable ast)
        filename (or (and m m.filename) ast-tbl.filename :unknown)
        line (or (and m m.line) ast-tbl.line "?")
        col (or (and m m.col) ast-tbl.col "?")
        target (tostring (or (utils.sym? (. ast-tbl 1)) (. ast-tbl 1) "()"))]
    (string.format "%s:%s: Compile error in '%s': %s" filename line target msg)))
    (string.format "%s:%s:%s Compile error in '%s': %s"
                   filename line col target msg)))

;; If you add new calls to this function, please update fennel.friend
;; as well to add suggestions for how to fix the new error!
diff --git a/src/fennel/friend.fnl b/src/fennel/friend.fnl
index 8b457ab..166113a 100644
--- a/src/fennel/friend.fnl
+++ b/src/fennel/friend.fnl
@@ -88,47 +88,32 @@
                            (sug matches))))))
  suggestion)

(fn read-line-from-file [filename line]
  (var bytes 0)
  (let [f (assert (io.open filename))
        _ (for [_ 1 (- line 1)]
            (set bytes (+ bytes 1 (length (f:read)))))
        codeline (f:read)]
    (f:close)
    (values codeline bytes)))
(fn read-line [filename line ?source]
  (if ?source
      (let [matcher (string.gmatch (.. ?source "\n") "(.-)(\r?\n)")]
        (for [_ 2 line] (matcher))
        (matcher))
      (with-open [f (assert (io.open filename))]
        (for [_ 2 line] (f:read))
        (f:read))))

(fn read-line-from-string [matcher target-line ?current-line ?bytes]
  (let [(this-line newline) (matcher)
        current-line (or ?current-line 1)
        bytes (+ (or ?bytes 0) (length this-line) (length newline))]
    (if (= target-line current-line)
        (values this-line (- bytes (length this-line) 1))
        this-line
        (read-line-from-string matcher target-line (+ current-line 1) bytes))))
(fn pinpoint [col len bytestart byteend]
  (let [bol (- bytestart col) eol (+ bol len)]
    (.. (string.rep " " col) "^"
        (string.rep "^" (- (math.min byteend eol) bytestart)))))

(fn read-line [filename line source]
  (if source
      (read-line-from-string (string.gmatch (.. source "\n") "(.-)(\r?\n)")
                             line)
      (read-line-from-file filename line)))

(fn friendly-msg [msg {: filename : line : bytestart : byteend} source]
  (let [(ok codeline bol) (pcall read-line filename line source)
(fn friendly-msg [msg {: filename : line : bytestart : byteend : col} source]
  (let [(ok codeline) (pcall read-line filename line source)
        suggestions (suggest msg)
        out [msg ""]]
    ;; don't assume the file can be read as-is
    ;; (when (not ok) (print :err codeline))
    (when (and ok codeline)
      (table.insert out codeline))
    (when (and ok codeline bytestart byteend)
      (table.insert out
                    (.. (string.rep " " (- bytestart bol 1)) "^"
                        (string.rep "^"
                                    (math.min (- byteend bytestart)
                                              (- (+ bol (length codeline))
                                                 bytestart))))))
    (when (and ok codeline bytestart (not byteend))
      (table.insert out (.. (string.rep "-" (- bytestart bol 1)) "^"))
    (when (and ok codeline bytestart byteend col)
      (table.insert out (pinpoint col (utils.len codeline) bytestart byteend)))
    (when (and ok codeline (not byteend) col)
      (table.insert out (.. (string.rep "-" col) "^"))
      (table.insert out ""))
    (when suggestions
      (each [_ suggestion (ipairs suggestions)]
@@ -138,18 +123,19 @@
(fn assert-compile [condition msg ast source]
  "A drop-in replacement for the internal assert-compile with friendly messages."
  (when (not condition)
    (let [{: filename : line} (utils.ast-source ast)]
      (error (friendly-msg (: "Compile error in %s:%s\n  %s" :format
    (let [{: filename : line : col} (utils.ast-source ast)]
      (error (friendly-msg (: "Compile error in %s:%s:%s\n  %s" :format
                              ;; still need fallbacks because backtick erases
                              ;; source data, and vararg has no source data
                              (or filename :unknown) (or line "?") msg)
                              ;; source and macros can generate source-less ast
                              (or filename :unknown) (or line "?")
                              (or col "?") msg)
                           (utils.ast-source ast) source) 0)))
  condition)

(fn parse-error [msg filename line bytestart source]
(fn parse-error [msg filename line col source]
  "A drop-in replacement for the internal parse-error with friendly messages."
  (error (friendly-msg (: "Parse error in %s:%s\n  %s" :format filename line
                          msg)
                       {: filename : line : bytestart} source) 0))
  (error (friendly-msg (: "Parse error in %s:%s:%s\n  %s" :format
                          filename line col msg)
                       {: filename : line : col} source) 0))

{: assert-compile : parse-error}
diff --git a/src/fennel/parser.fnl b/src/fennel/parser.fnl
index f72099b..99fea43 100644
--- a/src/fennel/parser.fnl
+++ b/src/fennel/parser.fnl
@@ -89,11 +89,11 @@ Also returns a second function to clear the buffer in the byte stream"
                             (or line "?") (or byteindex-override byteindex)
                             source utils.root.reset))
      (utils.root.reset)
      (if (or unfriendly (not friend) (not _G.io) (not _G.io.read))
          (error (string.format "%s:%s: Parse error: %s"
                                filename (or line "?") msg) 0)
          (friend.parse-error msg filename (or line "?")
                              (or byteindex-override byteindex) source))))
      (let [col (- (or byteindex-override byteindex) line-start 1)]
        (if (or unfriendly (not friend) (not _G.io) (not _G.io.read))
            (error (string.format "%s:%s:%s Parse error: %s"
                                  filename (or line "?") col msg) 0)
            (friend.parse-error msg filename (or line "?") col source)))))

  (fn parse-stream []
    (var (whitespace-since-dispatch done? retval) true)
diff --git a/src/fennel/utils.fnl b/src/fennel/utils.fnl
index d60d58f..29d42e0 100644
--- a/src/fennel/utils.fnl
+++ b/src/fennel/utils.fnl
@@ -41,6 +41,11 @@
  (when (and _G.io _G.io.stderr)
    (_G.io.stderr:write (: "--WARNING: %s\n" :format (tostring message)))))

;; if utf8-aware len is available, use it, otherwise use bytes
(local len (match (pcall require :utf8)
             (true utf8) utf8.len
             _ string.len))

(fn mt-keys-in-order [t out used-keys]
  ;; the metatable keys list gives us ordering; it is not canonical for what
  ;; keys actually exist in the table. for instance a macro can modify a k/v
@@ -418,6 +423,7 @@ handlers will be skipped."
 : ast-source
 : version
 : runtime-version
 : len
 :path (table.concat [:./?.fnl :./?/init.fnl (getenv :FENNEL_PATH)] ";")
 :macro-path (table.concat [:./?.fnl :./?/init-macros.fnl :./?/init.fnl
                            (getenv :FENNEL_MACRO_PATH)] ";")}
diff --git a/test/bad/all.sh b/test/bad/all.sh
index d98482b..ff387cb 100755
--- a/test/bad/all.sh
+++ b/test/bad/all.sh
@@ -4,5 +4,5 @@

for f in test/bad/*.fnl; do
    echo "============================================ $f"
    ./fennel --check-unused-locals $f || true
    ./fennel $f || true
done
diff --git a/test/failures.fnl b/test/failures.fnl
index b26ec0f..633418d 100644
--- a/test/failures.fnl
+++ b/test/failures.fnl
@@ -76,7 +76,7 @@
  (test-failures
   {"(let [:x 1] 1)" "unable to bind"
    "(let [[a & c d] [1 2]] c)" "rest argument before last parameter"
    "(let [b 9\nq (.)] q)" "unknown:2: Compile error in '.': expected table"
    "(let [b 9\nq (.)] q)" "unknown:2:2 Compile error in '.': expected table"
    "(let [false 1] 9)" "unable to bind boolean false"
    "(let [next #(next $)] print)" "aliased by a local"
    "(let [nil 1] 9)" "unable to bind"
@@ -107,18 +107,18 @@

(fn test-parse-fails []
  (test-failures
   {"\n\n(+))" "unknown:3: Parse error: unexpected closing delimiter )"
   {"\n\n(+))" "unknown:3:3 Parse error: unexpected closing delimiter )"
    "(foo:)" "malformed multisym"
    "(foo.bar:)" "malformed multisym"}))

(fn test-core-fails []
  (test-failures
   {"\n\n(let [f (lambda []\n(local))] (f))" "unknown:4: "
    "\n\n(let [x.y 9] nil)" "unknown:3: Compile error in 'let': unexpected multi"
    "\n(when)" "unknown:2: Compile error in 'when'"
   {"\n\n(let [f (lambda []\n(local))] (f))" "unknown:4:0 "
    "\n\n(let [x.y 9] nil)" "unknown:3:0 Compile error in 'let': unexpected multi"
    "\n(when)" "unknown:2:0 Compile error in 'when'"
    "()" "expected a function, macro, or special"
    "(789)" "cannot call literal value"
    "(do\n\n\n(each \n[x (pairs {})] (when)))" "unknown:5: "
    "(do\n\n\n(each \n[x (pairs {})] (when)))" "unknown:5:15 "
    "(each [k v (pairs {})] (BAD k v))" "BAD"
    "(f" "expected closing delimiter )"
    "(match [1 2 3] [a & b c] nil)" "rest argument before last parameter"
@@ -136,7 +136,7 @@
    "(each [k (do-iter) :igloo 33] nil)" "unexpected iterator clause igloo"
    "(for [i 1 3 2 other-stuff] nil)" "unexpected arguments"
    "(do\n\n\n(each \n[x 34 (pairs {})] 21))"
    "unknown:5: Compile error in 'x': unable to bind number 34"
    "unknown:5:0 Compile error in 'x': unable to bind number 34"
    "(with-open [(x y z) (values 1 2 3)])"
    "with-open only allows symbols in bindings"}))

-- 
2.20.1