The lexer was thinking that ' or " will close the string without taking
into account how it started which caused constructions like "It's ..."
to break.
This is how it was understood:
"Hello, it's me"
|_________||_____?
string 1 string 2 with EOL
Now it's correct:
"Hello, it's me"
|______________|
one string
---
src/lexer/mod.rs | 12 +++++-----
src/lexer/tests.rs | 55 ++++++++++++++++++++++++++++++++++++----------
2 files changed, 49 insertions(+), 18 deletions(-)
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index e5ec7fe..030d37a 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -208,7 +208,7 @@ impl Cursor<'_> {
let token_kind = match first_char {
c if is_whitespace(c) => self.whitespace(),
'0'..='9' => self.number(),
- '"' | '\'' => self.string(),
+ '"' | '\'' => self.string(first_char),
'.' => Dot,
'+' => match self.first() {
'=' => {
@@ -347,8 +347,8 @@ impl Cursor<'_> {
TokenKind::Literal(Value::Int)
}
- fn string(&mut self) -> TokenKind {
- self.eat_string();
+ fn string(&mut self, end: char) -> TokenKind {
+ self.eat_string(end);
TokenKind::Literal(Value::Str)
}
@@ -408,12 +408,10 @@ impl Cursor<'_> {
has_digits
}
- fn eat_string(&mut self) {
- // FIXME: double quoted strings could probably be ended by single quoted, and vice versa.
- // Possible fix: Pass the token of the string beginning down to this method and check against it.
+ fn eat_string(&mut self, end: char) {
loop {
match self.first() {
- '"' | '\'' => break,
+ ch if ch == end => break,
'\n' => panic!(
"String does not end on same line. At {}:{}",
self.pos().line,
diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs
index f11f59f..42881c4 100644
--- a/src/lexer/tests.rs
+++ b/src/lexer/tests.rs
@@ -139,19 +139,19 @@ fn test_tokenizing_without_whitespace() {
}
#[test]
-fn test_booleans() {
- let mut tokens = tokenize("true false").into_iter();
+fn test_string() {
+ let mut tokens = tokenize("'aaa' \"bbb\"").into_iter();
assert_eq!(
tokens.next().unwrap(),
Token {
- len: 4,
- kind: TokenKind::Keyword(Keyword::Boolean),
- raw: "true".to_owned(),
+ len: 5,
+ kind: TokenKind::Literal(Value::Str),
+ raw: "'aaa'".to_owned(),
pos: Position {
- raw: 3,
+ raw: 4,
line: 1,
- offset: 3
+ offset: 4
}
}
);
@@ -160,12 +160,45 @@ fn test_booleans() {
tokens.nth(1).unwrap(),
Token {
len: 5,
- kind: TokenKind::Keyword(Keyword::Boolean),
- raw: "false".to_owned(),
+ kind: TokenKind::Literal(Value::Str),
+ raw: "\"bbb\"".to_owned(),
+ pos: Position {
+ raw: 10,
+ line: 1,
+ offset: 10
+ }
+ }
+ );
+}
+
+#[test]
+fn test_string_markers_within_string() {
+ let mut tokens = tokenize("'\"aaa' \"'bbb\"").into_iter();
+
+ assert_eq!(
+ tokens.next().unwrap(),
+ Token {
+ len: 6,
+ kind: TokenKind::Literal(Value::Str),
+ raw: "'\"aaa'".to_owned(),
+ pos: Position {
+ raw: 5,
+ line: 1,
+ offset: 5
+ }
+ }
+ );
+
+ assert_eq!(
+ tokens.nth(1).unwrap(),
+ Token {
+ len: 6,
+ kind: TokenKind::Literal(Value::Str),
+ raw: "\"'bbb\"".to_owned(),
pos: Position {
- raw: 9,
+ raw: 12,
line: 1,
- offset: 9
+ offset: 12
}
}
);
--
2.30.1