Hubert Hirtz: 1 Hand-made parser 3 files changed, 137 insertions(+), 73 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~cdv/public-inbox/patches/22670/mbox | git am -3Learn more about email & git
Removes dependency on peg. --- and replaces it with a 100-line parser. :) Implementation has been taken from emersion's go-scfg parser: <https://git.sr.ht/~emersion/go-scfg/tree/master/item/reader.go#L72> made sure all tests pass. Cargo.toml | 1 - src/lib.rs | 82 ++++---------------------------- src/parser.rs | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 73 deletions(-) create mode 100644 src/parser.rs diff --git a/Cargo.toml b/Cargo.toml index 3fd2b65..995d921 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ default = [] preserve_order = ["indexmap"] [dependencies] -peg = "0.6.3" shell-words = "1.0.0" [dependencies.indexmap] diff --git a/src/lib.rs b/src/lib.rs index 76ffecb..3325ad4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,7 +57,9 @@ use indexmap::IndexMap; #[cfg(not(feature = "preserve_order"))] use std::collections::BTreeMap; -pub type ParseError = peg::error::ParseError<peg::str::LineCol>; +mod parser; + +pub type ParseError = parser::Error; /// An scfg document. Implemented as a multimap. /// @@ -138,8 +140,12 @@ impl Scfg { /// This does not validate that `name` is a legal scfg word. It is possible to create /// unparsable documents should `name` contain control characters or newlines. pub fn add(&mut self, name: impl Into<String>) -> &mut Directive { + self.add_directive(name, Directive::default()) + } + + fn add_directive(&mut self, name: impl Into<String>, directive: Directive) -> &mut Directive { let entry = self.directives.entry(name.into()).or_insert_with(Vec::new); - entry.push(Directive::default()); + entry.push(directive); entry.last_mut().unwrap() } @@ -209,7 +215,8 @@ impl Scfg { impl FromStr for Scfg { type Err = ParseError; fn from_str(src: &str) -> Result<Self, Self::Err> { - scfg_parser::document(src) + let r = std::io::Cursor::new(src.as_bytes()); + parser::document(r) } } @@ -291,75 +298,6 @@ impl Directive { } } -peg::parser! { - grammar scfg_parser() for str { - pub(crate) rule document() -> Scfg = - blank() _ directives:directive()* - { - directives.into_iter().collect() - } - - rule directive() -> (String, Directive) = - blank() _ name:word() _ params:params() _ child:block()? newline() - { - (name, Directive { params, child }) - } - - rule block() -> Scfg = - "{" newline() document:document() "}" - { - document - } - - rule word() -> String = - val:(atom() / dquote() / squote()) - { - val - } - - rule params() -> Vec<String> = vals:word() ** _ { vals } - - rule atom() -> String = - s:$(quiet!{(['\x21'|'\x23'..='\x26'|'\x28'..='\x5b'|'\x5d'..='\x7a' - |'\x7c'|'\x7e'|'\u{80}'..='\u{10FFFF}'] / esc_pair())+} / - expected!("an atom")) - { - if s.contains('\\') { - s.chars().filter(|&c| c != '\\').collect::<String>() - } else { - String::from(s) - } - } - - rule dquote() -> String = - s:$(quiet!{"\"" (['\t'|'\x20'|'\x21'|'\x23'..='\x5b'|'\x5d'..='\x7e' - |'\u{80}'..='\u{10FFFF}'] / esc_pair())+"\""} - / expected!("a double quoted string")) - { - let s = &s[1..s.len() - 1]; - if s.contains('\\') { - s.chars().filter(|&c| c != '\\').collect::<String>() - } else { - String::from(s) - } - } - - rule squote() -> String = - s:$(quiet!{"'" ['\t'|'\x20'..='\x26'|'\x28'..='\x7e'|'\u{80}'..='\u{10FFFF}']+ "'"} - / expected!("a single quoted string")) - { - s[1..s.len()-1].to_owned() - } - - rule esc_pair() = ['\\']['\t'|'\x20'..='\x7e'|'\u{80}'..='\u{10FFFF}'] - - rule blank() = quiet!{([' '|'\t']* ("#" [c if c != '\n']*)? ['\n'])*} - rule _() = quiet!{[' '|'\t']*} - rule newline() = quiet!{(_ "\n")+ _ / ![_]} - rule __() = newline()* - } -} - #[cfg(test)] mod test { use super::*; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..261356c --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,127 @@ +use crate::Directive; +use crate::Scfg; +use std::fmt; +use std::io; + +#[derive(Debug)] +enum ErrorKind { + UnexpectedClosingBrace, + Io(io::Error), + ShellWords(shell_words::ParseError), +} + +#[derive(Debug)] +pub struct Error { + kind: ErrorKind, + lineno: usize, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "parsing error at line {}: ", self.lineno)?; + match &self.kind { + ErrorKind::UnexpectedClosingBrace => write!(f, "unexpected '}}'"), + ErrorKind::Io(err) => write!(f, "io: {}", err), + ErrorKind::ShellWords(err) => write!(f, "{}", err), + } + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match &self.kind { + ErrorKind::Io(err) => Some(err), + ErrorKind::ShellWords(err) => Some(err), + _ => None, + } + } +} + +pub fn document(mut r: impl io::BufRead) -> Result<Scfg, Error> { + let mut lineno = 1; + let (block, closing_brace) = read_block(&mut r, &mut lineno)?; + if closing_brace { + return Err(Error { + kind: ErrorKind::UnexpectedClosingBrace, + lineno, + }); + } + Ok(block) +} + +/// Reads a block. +/// +/// Returns `(block, closing_brace)` where `closing_brace` is true if parsing stopped on '}', and +/// false if parsing stopped on EOF. +/// +/// `lineno` must be set the line number of the first line of the block, and is set to the line +/// number of the closing bracket or EOF. +fn read_block<R: io::BufRead>(r: &mut R, lineno: &mut usize) -> Result<(Scfg, bool), Error> { + let mut block = Scfg::new(); + let mut line = String::new(); + + loop { + line.clear(); + let n = r.read_line(&mut line).map_err(|err| Error { + kind: ErrorKind::Io(err), + lineno: *lineno, + })?; + if n == 0 { + // reached EOF. + return Ok((block, false)); + } + let line = line.trim(); + + let mut words = shell_words::split(&line).map_err(|err| Error { + kind: ErrorKind::ShellWords(err), + lineno: *lineno, + })?; + if words.is_empty() { + // line is either empty or a comment. + continue; + } + + let last_byte = *line.as_bytes().last().unwrap(); + if words.len() == 1 && last_byte == b'}' { + // The line is a litteral '}' (end of block). + return Ok((block, true)); + } + + let has_child = words.last().unwrap() == "{" && last_byte == b'{'; // avoid matching `"{"` + let (name, directive) = if has_child { + words.pop(); // remove brace + let name = if words.is_empty() { + String::new() + } else { + words.remove(0) + }; + *lineno += 1; + let (child, closing_brace) = read_block(r, lineno)?; + if !closing_brace { + return Err(Error { + kind: ErrorKind::Io(io::ErrorKind::UnexpectedEof.into()), + lineno: *lineno, + }); + } + ( + name, + Directive { + params: words, + child: Some(child), + }, + ) + } else { + let name = words.remove(0); + ( + name, + Directive { + params: words, + child: None, + }, + ) + }; + block.add_directive(name, directive); + + *lineno += 1; + } +} -- 2.31.1
builds.sr.ht <builds@sr.ht>scfg-rs/patches: SUCCESS in 1m21s [Hand-made parser][0] from [Hubert Hirtz][1] [0]: https://lists.sr.ht/~cdv/public-inbox/patches/22670 [1]: mailto:hubert@hirtz.pm ✓ #505447 SUCCESS scfg-rs/patches/alpine.yml https://builds.sr.ht/~cdv/job/505447 ✓ #505448 SUCCESS scfg-rs/patches/archlinux.yml https://builds.sr.ht/~cdv/job/505448
Thanks! Pushed and uploaded a new version. Chris