~emersion/mrsh-dev

Mostly implement break and continue builtins v2 PROPOSED

I was about to push with minor edits (see below), but I wrote a basic
test and got it to segfault:

==12346==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000008 (pc 0x7f8893d614c5 bp 0x7ffde5aa28e0 sp 0x7ffde5aa28c0 T0)
==12346==The signal is caused by a READ memory access.
==12346==Hint: address points to the zero page.
    #0 0x7f8893d614c4 in task_poll ../shell/task/task.c:30
    #1 0x7f8893d5e64b in task_loop_clause_poll ../shell/task/loop_clause.c:50
    #2 0x7f8893d61572 in task_poll ../shell/task/task.c:31
    #3 0x7f8893d60885 in task_pipeline_poll ../shell/task/pipeline.c:123
    #4 0x7f8893d61572 in task_poll ../shell/task/task.c:31
    #5 0x7f8893d5dc20 in task_list_poll ../shell/task/list.c:27
    #6 0x7f8893d61572 in task_poll ../shell/task/task.c:31
    #7 0x7f8893d61955 in task_run ../shell/task/task.c:48
    #8 0x7f8893d5095b in mrsh_run_program ../shell/shell.c:145
    #9 0x55ed62bb767d in main ../main.c:132
    #10 0x7f8892f64ce2 in __libc_start_main (/usr/lib/libc.so.6+0x23ce2)
    #11 0x55ed62bb642d in _start (/home/simon/src/mrsh/build/mrsh+0x342d)

Can you check what happens?

Your edited commit and the test suite update are both in the "break"
branch:
https://git.sr.ht/~emersion/mrsh/log/break
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~emersion/mrsh-dev/%3C20190615164425.17025-1-sir%40cmpwn.com%3E/mbox | git am -3
Learn more about email & git

[PATCH v2] Mostly implement break and continue builtins Export this patch

The main thing that's missing here is moving the number of loops into
the call stack:

http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#break

To prevent you from breaking out of your caller's loop in a function.
---
I accidentally deleted your feedback on the last patch, so I can't reply
to it directly without suffering some annoyances.

This version addresses the mistake with checking for invalid loop
numbers.

> Seems like this file is missing.

break & continue use the same file, it checks argv[0] to adjust its
behavior.

> After thinking about it, I believe this should be in the context
> rather than the state. The reason is that we want this to work:

Well, builtins don't have access to the context. If we want to move some
things from state into context, we'll need to address that first. I
think that's out of scope for this patch.

> Hmm, this goto logic makes it very difficult to follow the flow. Can't
> we extract this to a function whose return value indicates whether we
> should return?

This went through 3 revisions before settling on the goto flow, it's the
simplest one I could come up with. Because tasks are designed to be
called repeatedly while polling on their children, this logic is just
going to be complicated. I've wondered before if it wouldn't be wise to
move the polling logic into the tasks instead of returning all the way
up the call chain to avoid cases like this. Anyway, I'll leave
refactoring this function further to you. The goto version is the best I
feel possible without overhauling large swaths of the code.

 builtin/break.c          | 35 ++++++++++++++++++++++
 builtin/builtin.c        |  2 ++
 include/builtin.h        |  1 +
 include/mrsh/shell.h     |  8 +++++
 include/shell/task.h     |  4 +++
 meson.build              |  1 +
 shell/shell.c            |  1 +
 shell/task/for_clause.c  | 49 +++++++++++++++++++++++++-----
 shell/task/loop_clause.c | 65 ++++++++++++++++++++++++++++++----------
 9 files changed, 142 insertions(+), 24 deletions(-)
 create mode 100644 builtin/break.c

diff --git a/builtin/break.c b/builtin/break.c
new file mode 100644
index 0000000..6e53f38
--- /dev/null
@@ -0,0 +1,35 @@
+#define _POSIX_C_SOURCE 200809L
+#include <mrsh/builtin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "builtin.h"
+#include "shell/shell.h"
+#include "shell/task.h"
+
+static const char break_usage[] = "usage: %s [n]\n";
+
+int builtin_break(struct mrsh_state *state, int argc, char *argv[]) {
+	if (argc > 2) {
+		fprintf(stderr, break_usage, argv[0]);
+		return 1;
+	}
+	int n = 1;
+	if (argc == 2) {
+		char *endptr;
+		n = strtol(argv[1], &endptr, 10);
+		if ((*endptr != '\0' && argv[1][0] == '\0') || n < 0) {
+			fprintf(stderr, "%s: invalid loop number '%s'\n", argv[0], argv[1]);
+			return 1;
+		}
+	}
+
+	if (n > state->nloops) {
+		n = state->nloops;
+	}
+
+	state->nloops -= n - 1;
+	state->branch_control =
+		strcmp(argv[0], "break") == 0 ? BRANCH_BREAK : BRANCH_CONTINUE;
+	return TASK_STATUS_INTERRUPTED;
+}
diff --git a/builtin/builtin.c b/builtin/builtin.c
index ce77af3..677d8f6 100644
--- a/builtin/builtin.c
@@ -18,8 +18,10 @@ static const struct builtin builtins[] = {
 	{ ":", builtin_colon, true },
 	{ "alias", builtin_alias, false },
 	{ "bg", builtin_bg, false },
+	{ "break", builtin_break, true },
 	{ "cd", builtin_cd, false },
 	{ "command", builtin_command, false },
+	{ "continue", builtin_break, true },
 	{ "eval", builtin_eval, true },
 	{ "exit", builtin_exit, true },
 	{ "export", builtin_export, true },
diff --git a/include/builtin.h b/include/builtin.h
index 1f60125..776207b 100644
--- a/include/builtin.h
+++ b/include/builtin.h
@@ -10,6 +10,7 @@ void print_escaped(const char *value);
 
 int builtin_alias(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_bg(struct mrsh_state *state, int argc, char *argv[]);
+int builtin_break(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_cd(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_command(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_colon(struct mrsh_state *state, int argc, char *argv[]);
diff --git a/include/mrsh/shell.h b/include/mrsh/shell.h
index d6ef789..e08dadc 100644
--- a/include/mrsh/shell.h
+++ b/include/mrsh/shell.h
@@ -83,6 +83,12 @@ struct mrsh_call_frame {
 
 struct mrsh_job;
 
+enum branch_control {
+	BRANCH_BREAK,
+	BRANCH_CONTINUE,
+	BRANCH_RETURN,
+};
+
 struct mrsh_state {
 	int exit;
 	int fd;
@@ -100,6 +106,8 @@ struct mrsh_state {
 	struct termios term_modes;
 	struct mrsh_array jobs; // struct mrsh_job *
 	struct mrsh_job *foreground_job;
+	enum branch_control branch_control;
+	int nloops;
 };
 
 void mrsh_function_destroy(struct mrsh_function *fn);
diff --git a/include/shell/task.h b/include/shell/task.h
index dc630ea..6e38d29 100644
--- a/include/shell/task.h
+++ b/include/shell/task.h
@@ -16,6 +16,10 @@
  * The task has been stopped and the job has been put in the background.
  */
 #define TASK_STATUS_STOPPED -3
+/**
+ * The task has been interrupted for some reason.
+ */
+#define TASK_STATUS_INTERRUPTED -4
 
 struct task_interface;
 
diff --git a/meson.build b/meson.build
index 9c1f67a..8f333da 100644
--- a/meson.build
+++ b/meson.build
@@ -74,6 +74,7 @@ lib_mrsh = library(
 		'buffer.c',
 		'builtin/alias.c',
 		'builtin/bg.c',
+		'builtin/break.c',
 		'builtin/builtin.c',
 		'builtin/cd.c',
 		'builtin/colon.c',
diff --git a/shell/shell.c b/shell/shell.c
index ec855b3..556f8ad 100644
--- a/shell/shell.c
+++ b/shell/shell.c
@@ -22,6 +22,7 @@ void mrsh_function_destroy(struct mrsh_function *fn) {
 void mrsh_state_init(struct mrsh_state *state) {
 	state->exit = -1;
 	state->fd = -1;
+	state->nloops = 0;
 	state->interactive = isatty(STDIN_FILENO);
 	state->options = state->interactive ? MRSH_OPT_INTERACTIVE : 0;
 	state->args = calloc(1, sizeof(struct mrsh_call_frame));
diff --git a/shell/task/for_clause.c b/shell/task/for_clause.c
index 6e9d904..69acac4 100644
--- a/shell/task/for_clause.c
+++ b/shell/task/for_clause.c
@@ -15,7 +15,8 @@ struct task_for_clause {
 		struct task *word, *body;
 	} tasks;
 	size_t index;
-	int last_body_status;
+	int exit_status;
+	int loop_num;
 };
 
 static void task_for_clause_destroy(struct task *task) {
@@ -27,20 +28,30 @@ static void task_for_clause_destroy(struct task *task) {
 
 static int task_for_clause_poll(struct task *task, struct context *ctx) {
 	struct task_for_clause *tfc = (struct task_for_clause *)task;
+	if (tfc->loop_num == -1) {
+		tfc->loop_num = ++ctx->state->nloops;
+	}
+
 	while (true) {
+		int status;
+
 		if (tfc->tasks.body) {
 			/* wait for body */
-			int body_status = task_poll(tfc->tasks.body, ctx);
-			if (body_status < 0) {
-				return body_status;
+			status = task_poll(tfc->tasks.body, ctx);
+			if (status == TASK_STATUS_INTERRUPTED) {
+				goto interrupt;
+			} else if (status < 0) {
+				return status;
 			}
 			task_destroy(tfc->tasks.body);
 			tfc->tasks.body = NULL;
 		} else if (tfc->tasks.word) {
 			/* wait for word */
-			int word_status = task_poll(tfc->tasks.word, ctx);
-			if (word_status < 0) {
-				return word_status;
+			status = task_poll(tfc->tasks.word, ctx);
+			if (status == TASK_STATUS_INTERRUPTED) {
+				goto interrupt;
+			} else if (status < 0) {
+				return status;
 			}
 			struct mrsh_word_string *word = (struct mrsh_word_string *)
 				tfc->ast.word_list->data[tfc->index - 1];
@@ -53,14 +64,34 @@ static int task_for_clause_poll(struct task *task, struct context *ctx) {
 		} else {
 			/* create a new word */
 			if (tfc->index == tfc->ast.word_list->len) {
-				return 0;
+				goto exit;
 			}
 			struct mrsh_word **word_ptr =
 				(struct mrsh_word **)&tfc->ast.word_list->data[tfc->index++];
 			tfc->tasks.word = task_word_create(
 				word_ptr, TILDE_EXPANSION_NAME);
 		}
+		continue;
+
+interrupt:
+		if (ctx->state->nloops < tfc->loop_num) {
+			/* break to parent loop */
+			return status;
+		}
+		if (ctx->state->branch_control == BRANCH_BREAK) {
+			tfc->exit_status = 0;
+			goto exit;
+		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
+			task_destroy(tfc->tasks.body);
+			tfc->tasks.body = NULL;
+		} else {
+			assert(0 && "Unknown task interruption cause");
+		}
 	}
+
+exit:
+	--ctx->state->nloops;
+	return tfc->exit_status;
 }
 
 static const struct task_interface task_for_clause_impl = {
@@ -76,5 +107,7 @@ struct task *task_for_clause_create(const char *name,
 	tfc->ast.word_list = word_list;
 	tfc->ast.body = body;
 	tfc->index = 0;
+	tfc->exit_status = 0;
+	tfc->loop_num = -1;
 	return &tfc->task;
 }
diff --git a/shell/task/loop_clause.c b/shell/task/loop_clause.c
index cfb9418..a8a14f8 100644
--- a/shell/task/loop_clause.c
+++ b/shell/task/loop_clause.c
@@ -1,3 +1,4 @@
+#include <assert.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include "shell/task.h"
@@ -11,7 +12,8 @@ struct task_loop_clause {
 		struct task *condition, *body;
 	} tasks;
 	bool until;
-	int last_body_status;
+	int exit_status;
+	int loop_num;
 };
 
 static void task_loop_clause_destroy(struct task *task) {
@@ -23,34 +25,64 @@ static void task_loop_clause_destroy(struct task *task) {
 
 static int task_loop_clause_poll(struct task *task, struct context *ctx) {
 	struct task_loop_clause *tlc = (struct task_loop_clause *)task;
+	if (tlc->loop_num == -1) {
+		tlc->loop_num = ++ctx->state->nloops;
+	}
 
 	while (ctx->state->exit == -1) {
+		int status;
+
 		if (tlc->tasks.condition) {
-			int condition_status = task_poll(tlc->tasks.condition, ctx);
-			if (condition_status < 0) {
-				return condition_status;
-			} else if (condition_status > 0 && !tlc->until) {
-				return tlc->last_body_status;
-			} else if (condition_status == 0 && tlc->until) {
-				return tlc->last_body_status;
+			status = task_poll(tlc->tasks.condition, ctx);
+			if (status == TASK_STATUS_INTERRUPTED) {
+				goto interrupt;
+			} else if (status < 0) {
+				return status;
+			} else if (status > 0 && !tlc->until) {
+				goto exit;
+			} else if (status == 0 && tlc->until) {
+				goto exit;
 			}
 			task_destroy(tlc->tasks.condition);
 			tlc->tasks.condition = NULL;
 		}
 
-		int body_status = task_poll(tlc->tasks.body, ctx);
-		if (body_status < 0) {
-			return body_status;
-		} else {
-			tlc->last_body_status = body_status;
+		status = task_poll(tlc->tasks.body, ctx);
+		if (status == TASK_STATUS_INTERRUPTED) {
+			goto interrupt;
+		} else if (status < 0) {
+			return status;
+		}
+
+		tlc->exit_status = status;
+		task_destroy(tlc->tasks.body);
+		tlc->tasks.condition =
+			task_for_command_list_array(tlc->ast.condition);
+		tlc->tasks.body = task_for_command_list_array(tlc->ast.body);
+		continue;
+
+interrupt:
+		if (ctx->state->nloops < tlc->loop_num) {
+			/* break to parent loop */
+			return status;
+		}
+		if (ctx->state->branch_control == BRANCH_BREAK) {
+			tlc->exit_status = 0;
+			goto exit;
+		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
 			task_destroy(tlc->tasks.body);
-			tlc->tasks.condition =
-				task_for_command_list_array(tlc->ast.condition);
-			tlc->tasks.body = task_for_command_list_array(tlc->ast.body);
+			tlc->tasks.body = NULL;
+		} else {
+			assert(0 && "Unknown task interruption cause");
 		}
 	}
 
+	--ctx->state->nloops;
 	return ctx->state->exit;
+
+exit:
+	--ctx->state->nloops;
+	return tlc->exit_status;
 }
 
 static const struct task_interface task_loop_clause_impl = {
@@ -67,5 +99,6 @@ struct task *task_loop_clause_create(const struct mrsh_array *condition,
 	tlc->tasks.condition = task_for_command_list_array(condition);
 	tlc->tasks.body = task_for_command_list_array(body);
 	tlc->until = until;
+	tlc->loop_num = -1;
 	return &tlc->task;
 }
-- 
2.22.0
I was about to push with minor edits (see below), but I wrote a basic
test and got it to segfault:

==12346==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000008 (pc 0x7f8893d614c5 bp 0x7ffde5aa28e0 sp 0x7ffde5aa28c0 T0)
==12346==The signal is caused by a READ memory access.
==12346==Hint: address points to the zero page.
    #0 0x7f8893d614c4 in task_poll ../shell/task/task.c:30
    #1 0x7f8893d5e64b in task_loop_clause_poll ../shell/task/loop_clause.c:50
    #2 0x7f8893d61572 in task_poll ../shell/task/task.c:31
    #3 0x7f8893d60885 in task_pipeline_poll ../shell/task/pipeline.c:123
    #4 0x7f8893d61572 in task_poll ../shell/task/task.c:31
    #5 0x7f8893d5dc20 in task_list_poll ../shell/task/list.c:27
    #6 0x7f8893d61572 in task_poll ../shell/task/task.c:31
    #7 0x7f8893d61955 in task_run ../shell/task/task.c:48
    #8 0x7f8893d5095b in mrsh_run_program ../shell/shell.c:145
    #9 0x55ed62bb767d in main ../main.c:132
    #10 0x7f8892f64ce2 in __libc_start_main (/usr/lib/libc.so.6+0x23ce2)
    #11 0x55ed62bb642d in _start (/home/simon/src/mrsh/build/mrsh+0x342d)

Can you check what happens?

Your edited commit and the test suite update are both in the "break"
branch:
https://git.sr.ht/~emersion/mrsh/log/break
View this thread in the archives