~cdv/public-inbox

scfg-rs: Hand-made parser v1 APPLIED

Hubert Hirtz: 1
 Hand-made parser

 3 files changed, 137 insertions(+), 73 deletions(-)
#505447 alpine.yml success
#505448 archlinux.yml success
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~cdv/public-inbox/patches/22670/mbox | git am -3
Learn more about email & git

[PATCH scfg-rs] Hand-made parser Export this patch

Removes dependency on peg.
---

and replaces it with a 100-line parser. :)

Implementation has been taken from emersion's go-scfg parser:
<https://git.sr.ht/~emersion/go-scfg/tree/master/item/reader.go#L72>

made sure all tests pass.

 Cargo.toml    |   1 -
 src/lib.rs    |  82 ++++----------------------------
 src/parser.rs | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 137 insertions(+), 73 deletions(-)
 create mode 100644 src/parser.rs

diff --git a/Cargo.toml b/Cargo.toml
index 3fd2b65..995d921 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,6 @@ default = []
preserve_order = ["indexmap"]

[dependencies]
peg = "0.6.3"
shell-words = "1.0.0"

[dependencies.indexmap]
diff --git a/src/lib.rs b/src/lib.rs
index 76ffecb..3325ad4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -57,7 +57,9 @@ use indexmap::IndexMap;
#[cfg(not(feature = "preserve_order"))]
use std::collections::BTreeMap;

pub type ParseError = peg::error::ParseError<peg::str::LineCol>;
mod parser;

pub type ParseError = parser::Error;

/// An scfg document. Implemented as a multimap.
///
@@ -138,8 +140,12 @@ impl Scfg {
    /// This does not validate that `name` is a legal scfg word. It is possible to create
    /// unparsable documents should `name` contain control characters or newlines.
    pub fn add(&mut self, name: impl Into<String>) -> &mut Directive {
        self.add_directive(name, Directive::default())
    }

    fn add_directive(&mut self, name: impl Into<String>, directive: Directive) -> &mut Directive {
        let entry = self.directives.entry(name.into()).or_insert_with(Vec::new);
        entry.push(Directive::default());
        entry.push(directive);
        entry.last_mut().unwrap()
    }

@@ -209,7 +215,8 @@ impl Scfg {
impl FromStr for Scfg {
    type Err = ParseError;
    fn from_str(src: &str) -> Result<Self, Self::Err> {
        scfg_parser::document(src)
        let r = std::io::Cursor::new(src.as_bytes());
        parser::document(r)
    }
}

@@ -291,75 +298,6 @@ impl Directive {
    }
}

peg::parser! {
    grammar scfg_parser() for str {
        pub(crate) rule document() -> Scfg =
            blank() _ directives:directive()*
        {
            directives.into_iter().collect()
        }

        rule directive() -> (String, Directive) =
            blank() _ name:word() _ params:params() _ child:block()? newline()
        {
            (name, Directive { params, child })
        }

        rule block() -> Scfg =
            "{" newline() document:document() "}"
        {
            document
        }

        rule word() -> String =
            val:(atom() / dquote() / squote())
        {
            val
        }

        rule params() -> Vec<String> = vals:word() ** _ { vals }

        rule atom() -> String =
            s:$(quiet!{(['\x21'|'\x23'..='\x26'|'\x28'..='\x5b'|'\x5d'..='\x7a'
                 |'\x7c'|'\x7e'|'\u{80}'..='\u{10FFFF}'] / esc_pair())+} /
                expected!("an atom"))
        {
            if s.contains('\\') {
                s.chars().filter(|&c| c != '\\').collect::<String>()
            } else {
                String::from(s)
            }
        }

        rule dquote() -> String =
            s:$(quiet!{"\"" (['\t'|'\x20'|'\x21'|'\x23'..='\x5b'|'\x5d'..='\x7e'
                      |'\u{80}'..='\u{10FFFF}'] / esc_pair())+"\""}
                / expected!("a double quoted string"))
        {
            let s = &s[1..s.len() - 1];
            if s.contains('\\') {
                s.chars().filter(|&c| c != '\\').collect::<String>()
            } else {
                String::from(s)
            }
        }

        rule squote() -> String =
            s:$(quiet!{"'" ['\t'|'\x20'..='\x26'|'\x28'..='\x7e'|'\u{80}'..='\u{10FFFF}']+ "'"}
                / expected!("a single quoted string"))
        {
            s[1..s.len()-1].to_owned()
        }

        rule esc_pair() = ['\\']['\t'|'\x20'..='\x7e'|'\u{80}'..='\u{10FFFF}']

        rule blank() = quiet!{([' '|'\t']* ("#" [c if c != '\n']*)? ['\n'])*}
        rule _() = quiet!{[' '|'\t']*}
        rule newline() = quiet!{(_ "\n")+ _ / ![_]}
        rule __() = newline()*
    }
}

#[cfg(test)]
mod test {
    use super::*;
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..261356c
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,127 @@
use crate::Directive;
use crate::Scfg;
use std::fmt;
use std::io;

#[derive(Debug)]
enum ErrorKind {
    UnexpectedClosingBrace,
    Io(io::Error),
    ShellWords(shell_words::ParseError),
}

#[derive(Debug)]
pub struct Error {
    kind: ErrorKind,
    lineno: usize,
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "parsing error at line {}: ", self.lineno)?;
        match &self.kind {
            ErrorKind::UnexpectedClosingBrace => write!(f, "unexpected '}}'"),
            ErrorKind::Io(err) => write!(f, "io: {}", err),
            ErrorKind::ShellWords(err) => write!(f, "{}", err),
        }
    }
}

impl std::error::Error for Error {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match &self.kind {
            ErrorKind::Io(err) => Some(err),
            ErrorKind::ShellWords(err) => Some(err),
            _ => None,
        }
    }
}

pub fn document(mut r: impl io::BufRead) -> Result<Scfg, Error> {
    let mut lineno = 1;
    let (block, closing_brace) = read_block(&mut r, &mut lineno)?;
    if closing_brace {
        return Err(Error {
            kind: ErrorKind::UnexpectedClosingBrace,
            lineno,
        });
    }
    Ok(block)
}

/// Reads a block.
///
/// Returns `(block, closing_brace)` where `closing_brace` is true if parsing stopped on '}', and
/// false if parsing stopped on EOF.
///
/// `lineno` must be set the line number of the first line of the block, and is set to the line
/// number of the closing bracket or EOF.
fn read_block<R: io::BufRead>(r: &mut R, lineno: &mut usize) -> Result<(Scfg, bool), Error> {
    let mut block = Scfg::new();
    let mut line = String::new();

    loop {
        line.clear();
        let n = r.read_line(&mut line).map_err(|err| Error {
            kind: ErrorKind::Io(err),
            lineno: *lineno,
        })?;
        if n == 0 {
            // reached EOF.
            return Ok((block, false));
        }
        let line = line.trim();

        let mut words = shell_words::split(&line).map_err(|err| Error {
            kind: ErrorKind::ShellWords(err),
            lineno: *lineno,
        })?;
        if words.is_empty() {
            // line is either empty or a comment.
            continue;
        }

        let last_byte = *line.as_bytes().last().unwrap();
        if words.len() == 1 && last_byte == b'}' {
            // The line is a litteral '}' (end of block).
            return Ok((block, true));
        }

        let has_child = words.last().unwrap() == "{" && last_byte == b'{'; // avoid matching `"{"`
        let (name, directive) = if has_child {
            words.pop(); // remove brace
            let name = if words.is_empty() {
                String::new()
            } else {
                words.remove(0)
            };
            *lineno += 1;
            let (child, closing_brace) = read_block(r, lineno)?;
            if !closing_brace {
                return Err(Error {
                    kind: ErrorKind::Io(io::ErrorKind::UnexpectedEof.into()),
                    lineno: *lineno,
                });
            }
            (
                name,
                Directive {
                    params: words,
                    child: Some(child),
                },
            )
        } else {
            let name = words.remove(0);
            (
                name,
                Directive {
                    params: words,
                    child: None,
                },
            )
        };
        block.add_directive(name, directive);

        *lineno += 1;
    }
}
--
2.31.1
scfg-rs/patches: SUCCESS in 1m21s

[Hand-made parser][0] from [Hubert Hirtz][1]

[0]: https://lists.sr.ht/~cdv/public-inbox/patches/22670
[1]: mailto:hubert@hirtz.pm

✓ #505447 SUCCESS scfg-rs/patches/alpine.yml    https://builds.sr.ht/~cdv/job/505447
✓ #505448 SUCCESS scfg-rs/patches/archlinux.yml https://builds.sr.ht/~cdv/job/505448
Thanks! Pushed and uploaded a new version.

Chris