Cristian Adrián Ontivero: 1 Parse arithmetic expressions with shifts 2 files changed, 118 insertions(+), 5 deletions(-)
I mean suffix, not prefix (thanks Drew!).
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~emersion/mrsh-dev/patches/3002/mbox | git am -3Learn more about email & git
We introduce the function arithmetic_word() to parse arithmetic expressions instead of reusing the general word(), and generalize word_list() to receive a pointer to function, so that word_list() may be used to parse a list of whatever type of word we need. This fixes #51, and enables properly parsing parenthesized expressions inside arithmetic expressions, e.g. $(((2+1)-1)). --- This is mostly a working proof of concept. There is (as might be expected) quite a bit of similarity between word() and arithmetic_word(), but I think that it would be better in the long term to remove the "end" parameter, and have a couple of different *_word() functions that are called whenever appropriate. As discussed, the alternative would be adding a mrsh_word_type parameter to word() and word_list(), but I think that this would eventually lead to a word() function with a lot of if-statements for each distinct context, and might lead to a more complex word() function (although preventing the repetition intrinsic to the *_word() functions alternative). What do you think?
+1, I like it. Yeah, I agree, the `end` parameter should go away. Is there anything that prevents this from being merged?
include/parser.h | 3 ++ parser/word.c | 120 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 118 insertions(+), 5 deletions(-) diff --git a/include/parser.h b/include/parser.h index 56e549f..d4b32b9 100644 --- a/include/parser.h +++ b/include/parser.h @@ -63,6 +63,8 @@ struct mrsh_parser { void *alias_user_data; }; +typedef struct mrsh_word * (*word_fn)(struct mrsh_parser *, char end);
Nitpick: the convention I've used so far is to use the `_func` prefix for function types.
+ size_t parser_peek(struct mrsh_parser *state, char *buf, size_t size); char parser_peek_char(struct mrsh_parser *state); size_t parser_read(struct mrsh_parser *state, char *buf, size_t size); @@ -90,5 +92,6 @@ size_t peek_word(struct mrsh_parser *state, char end); struct mrsh_word *expect_dollar(struct mrsh_parser *state); struct mrsh_word *back_quotes(struct mrsh_parser *state); struct mrsh_word *word(struct mrsh_parser *state, char end); +struct mrsh_word *arithmetic_word(struct mrsh_parser *state, char end); #endif diff --git a/parser/word.c b/parser/word.c index 3abede3..17b327a 100644 --- a/parser/word.c +++ b/parser/word.c @@ -173,7 +173,7 @@ char *read_token(struct mrsh_parser *state, size_t len, return tok; } -static struct mrsh_word *word_list(struct mrsh_parser *state, char end) { +static struct mrsh_word *word_list(struct mrsh_parser *state, char end, word_fn fn) { struct mrsh_array children = {0}; while (true) { @@ -181,7 +181,7 @@ static struct mrsh_word *word_list(struct mrsh_parser *state, char end) { break; } - struct mrsh_word *child = word(state, end); + struct mrsh_word *child = fn(state, end); if (child == NULL) { break; } @@ -309,7 +309,7 @@ static struct mrsh_word_parameter *expect_parameter_expression( return NULL; } op_range.end = state->pos; - arg = word_list(state, '}'); + arg = word_list(state, '}', word); } struct mrsh_position rbrace_pos = state->pos; @@ -355,7 +355,7 @@ static struct mrsh_word_arithmetic *expect_word_arithmetic( c = parser_read_char(state); assert(c == '('); - struct mrsh_word *body = word_list(state, ')'); + struct mrsh_word *body = word_list(state, ')', arithmetic_word); if (body == NULL) { if (!mrsh_parser_error(state, NULL)) { parser_set_error(state, "expected an arithmetic expression"); @@ -695,7 +695,117 @@ struct mrsh_word *word(struct mrsh_parser *state, char end) { } } +/* TODO remove end parameter when no *_word function takes it */ +struct mrsh_word *arithmetic_word(struct mrsh_parser *state, char end) { + if (!symbol(state, TOKEN)) { + return NULL; + } + + char c = parser_peek_char(state); + if (is_operator_start(c)) { + return NULL; + } + + char next[3] = {0}; + if (c == ')') { + parser_peek(state, next, sizeof(*next) * 2); + if (!strcmp(next, "))")) { + return NULL; + } + } + + struct mrsh_array children = {0}; + struct mrsh_buffer buf = {0}; + struct mrsh_position child_begin = {0}; + + while (true) { + if (!mrsh_position_valid(&child_begin)) { + child_begin = state->pos; + } + + parser_peek(state, next, sizeof(*next) * 2); + c = next[0]; + if (c == '\0' || c == '\n' || !strcmp(next, "))")) { + break; + } + + if (c == '$') { + push_buffer_word_string(state, &children, &buf, &child_begin); + struct mrsh_word *t = expect_dollar(state); + if (t == NULL) { + return NULL; + } + mrsh_array_add(&children, t); + continue; + } + + if (c == '`') { + push_buffer_word_string(state, &children, &buf, &child_begin); + struct mrsh_word *t = back_quotes(state); + if (t == NULL) { + return NULL; + } + mrsh_array_add(&children, t); + continue; + } + + // Quoting + if (c == '\'') { + push_buffer_word_string(state, &children, &buf, &child_begin); + struct mrsh_word *t = single_quotes(state); + if (t == NULL) { + return NULL; + } + mrsh_array_add(&children, t); + continue; + } + if (c == '"') { + push_buffer_word_string(state, &children, &buf, &child_begin); + struct mrsh_word *t = double_quotes(state); + if (t == NULL) { + return NULL; + } + mrsh_array_add(&children, t); + continue; + } + + if (c == '\\') { + // Unquoted backslash + parser_read_char(state); + c = parser_peek_char(state); + if (c == '\n') { + // Continuation line + read_continuation_line(state); + continue; + } + } else if (is_operator_start(c) || isblank(c)) { + if (strcmp(next, "<<") && strcmp(next, ">>")) { + break; + } + parser_read_char(state); + mrsh_buffer_append_char(&buf, c); + } + + parser_read_char(state); + mrsh_buffer_append_char(&buf, c); + } + + push_buffer_word_string(state, &children, &buf, &child_begin); + mrsh_buffer_finish(&buf); + + consume_symbol(state); + + if (children.len == 1) { + struct mrsh_word *word = children.data[0]; + mrsh_array_finish(&children); // TODO: don't allocate this array + return word; + } else { + struct mrsh_word_list *wl = mrsh_word_list_create(&children, false); + return &wl->word; + } +} + struct mrsh_word *mrsh_parse_word(struct mrsh_parser *state) { parser_begin(state); - return word_list(state, 0); + return word_list(state, 0, word); } -- 2.20.1