~emersion/mrsh-dev

Parse arithmetic expressions with shifts v2 PROPOSED

Cristian Adrián Ontivero: 1
 Parse arithmetic expressions with shifts

 2 files changed, 118 insertions(+), 5 deletions(-)
I mean suffix, not prefix (thanks Drew!).
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~emersion/mrsh-dev/patches/3002/mbox | git am -3
Learn more about email & git

[PATCH v2] Parse arithmetic expressions with shifts Export this patch

We introduce the function arithmetic_word() to parse arithmetic
expressions instead of reusing the general word(), and generalize
word_list() to receive a pointer to function, so that word_list() may be
used to parse a list of whatever type of word we need.

This fixes #51, and enables properly parsing parenthesized expressions
inside arithmetic expressions, e.g. $(((2+1)-1)).
---

This is mostly a working proof of concept. There is (as might be expected) quite
a bit of similarity between word() and arithmetic_word(), but I think that it
would be better in the long term to remove the "end" parameter, and have a
couple of different *_word() functions that are called whenever appropriate.

As discussed, the alternative would be adding a mrsh_word_type parameter to
word() and word_list(), but I think that this would eventually lead to a word()
function with a lot of if-statements for each distinct context, and might lead
to a more complex word() function (although preventing the repetition intrinsic
to the *_word() functions alternative).

What do you think?
 include/parser.h |   3 ++
 parser/word.c    | 120 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/include/parser.h b/include/parser.h
index 56e549f..d4b32b9 100644
--- a/include/parser.h
+++ b/include/parser.h
@@ -63,6 +63,8 @@ struct mrsh_parser {
	void *alias_user_data;
};

typedef struct mrsh_word * (*word_fn)(struct mrsh_parser *, char end);

size_t parser_peek(struct mrsh_parser *state, char *buf, size_t size);
char parser_peek_char(struct mrsh_parser *state);
size_t parser_read(struct mrsh_parser *state, char *buf, size_t size);
@@ -90,5 +92,6 @@ size_t peek_word(struct mrsh_parser *state, char end);
struct mrsh_word *expect_dollar(struct mrsh_parser *state);
struct mrsh_word *back_quotes(struct mrsh_parser *state);
struct mrsh_word *word(struct mrsh_parser *state, char end);
struct mrsh_word *arithmetic_word(struct mrsh_parser *state, char end);

#endif
diff --git a/parser/word.c b/parser/word.c
index 3abede3..17b327a 100644
--- a/parser/word.c
+++ b/parser/word.c
@@ -173,7 +173,7 @@ char *read_token(struct mrsh_parser *state, size_t len,
	return tok;
}

static struct mrsh_word *word_list(struct mrsh_parser *state, char end) {
static struct mrsh_word *word_list(struct mrsh_parser *state, char end, word_fn fn) {
	struct mrsh_array children = {0};

	while (true) {
@@ -181,7 +181,7 @@ static struct mrsh_word *word_list(struct mrsh_parser *state, char end) {
			break;
		}

		struct mrsh_word *child = word(state, end);
		struct mrsh_word *child = fn(state, end);
		if (child == NULL) {
			break;
		}
@@ -309,7 +309,7 @@ static struct mrsh_word_parameter *expect_parameter_expression(
			return NULL;
		}
		op_range.end = state->pos;
		arg = word_list(state, '}');
		arg = word_list(state, '}', word);
	}

	struct mrsh_position rbrace_pos = state->pos;
@@ -355,7 +355,7 @@ static struct mrsh_word_arithmetic *expect_word_arithmetic(
	c = parser_read_char(state);
	assert(c == '(');

	struct mrsh_word *body = word_list(state, ')');
	struct mrsh_word *body = word_list(state, ')', arithmetic_word);
	if (body == NULL) {
		if (!mrsh_parser_error(state, NULL)) {
			parser_set_error(state, "expected an arithmetic expression");
@@ -695,7 +695,117 @@ struct mrsh_word *word(struct mrsh_parser *state, char end) {
	}
}

/* TODO remove end parameter when no *_word function takes it */
struct mrsh_word *arithmetic_word(struct mrsh_parser *state, char end) {
	if (!symbol(state, TOKEN)) {
		return NULL;
	}

	char c = parser_peek_char(state);
	if (is_operator_start(c)) {
		return NULL;
	}

	char next[3] = {0};
	if (c == ')') {
		parser_peek(state, next, sizeof(*next) * 2);
		if (!strcmp(next, "))")) {
			return NULL;
		}
	}

	struct mrsh_array children = {0};
	struct mrsh_buffer buf = {0};
	struct mrsh_position child_begin = {0};

	while (true) {
		if (!mrsh_position_valid(&child_begin)) {
			child_begin = state->pos;
		}

		parser_peek(state, next, sizeof(*next) * 2);
		c = next[0];
		if (c == '\0' || c == '\n' || !strcmp(next, "))")) {
			break;
		}

		if (c == '$') {
			push_buffer_word_string(state, &children, &buf, &child_begin);
			struct mrsh_word *t = expect_dollar(state);
			if (t == NULL) {
				return NULL;
			}
			mrsh_array_add(&children, t);
			continue;
		}

		if (c == '`') {
			push_buffer_word_string(state, &children, &buf, &child_begin);
			struct mrsh_word *t = back_quotes(state);
			if (t == NULL) {
				return NULL;
			}
			mrsh_array_add(&children, t);
			continue;
		}

		// Quoting
		if (c == '\'') {
			push_buffer_word_string(state, &children, &buf, &child_begin);
			struct mrsh_word *t = single_quotes(state);
			if (t == NULL) {
				return NULL;
			}
			mrsh_array_add(&children, t);
			continue;
		}
		if (c == '"') {
			push_buffer_word_string(state, &children, &buf, &child_begin);
			struct mrsh_word *t = double_quotes(state);
			if (t == NULL) {
				return NULL;
			}
			mrsh_array_add(&children, t);
			continue;
		}

		if (c == '\\') {
			// Unquoted backslash
			parser_read_char(state);
			c = parser_peek_char(state);
			if (c == '\n') {
				// Continuation line
				read_continuation_line(state);
				continue;
			}
		} else if (is_operator_start(c) || isblank(c)) {
			if (strcmp(next, "<<") && strcmp(next, ">>")) {
				break;
			}
			parser_read_char(state);
			mrsh_buffer_append_char(&buf, c);
		}

		parser_read_char(state);
		mrsh_buffer_append_char(&buf, c);
	}

	push_buffer_word_string(state, &children, &buf, &child_begin);
	mrsh_buffer_finish(&buf);

	consume_symbol(state);

	if (children.len == 1) {
		struct mrsh_word *word = children.data[0];
		mrsh_array_finish(&children); // TODO: don't allocate this array
		return word;
	} else {
		struct mrsh_word_list *wl = mrsh_word_list_create(&children, false);
		return &wl->word;
	}
}

struct mrsh_word *mrsh_parse_word(struct mrsh_parser *state) {
	parser_begin(state);
	return word_list(state, 0);
	return word_list(state, 0, word);
}
-- 
2.20.1