~emersion/mrsh-dev

1

[PATCH] Mostly implement break and continue builtins

Details
Message ID
<20190614215931.29615-1-sir@cmpwn.com>
Sender timestamp
1560549571
DKIM signature
pass
Download raw message
Patch: +142 -24
The main thing that's missing here is moving the number of loops into
the call stack:

http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#break

To prevent you from breaking out of your caller's loop in a function.
---
 builtin/break.c          | 35 ++++++++++++++++++++++
 builtin/builtin.c        |  2 ++
 include/builtin.h        |  1 +
 include/mrsh/shell.h     |  8 +++++
 include/shell/task.h     |  4 +++
 meson.build              |  1 +
 shell/shell.c            |  1 +
 shell/task/for_clause.c  | 49 +++++++++++++++++++++++++-----
 shell/task/loop_clause.c | 65 ++++++++++++++++++++++++++++++----------
 9 files changed, 142 insertions(+), 24 deletions(-)
 create mode 100644 builtin/break.c

diff --git a/builtin/break.c b/builtin/break.c
new file mode 100644
index 0000000..63667aa
--- /dev/null
@@ -0,0 +1,35 @@
+#define _POSIX_C_SOURCE 200809L
+#include <mrsh/builtin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "builtin.h"
+#include "shell/shell.h"
+#include "shell/task.h"
+
+static const char break_usage[] = "usage: %s [n]\n";
+
+int builtin_break(struct mrsh_state *state, int argc, char *argv[]) {
+	if (argc > 2) {
+		fprintf(stderr, break_usage, argv[0]);
+		return 1;
+	}
+	int n = 1;
+	if (argc == 2) {
+		char *endptr;
+		n = strtol(argv[1], &endptr, 10);
+		if ((*endptr != '\0' && argv[0][0] != '\0') || n < 0) {
+			fprintf(stderr, "%s: invalid loop number '%s'\n", argv[0], argv[1]);
+			return 1;
+		}
+	}
+
+	if (n > state->nloops) {
+		n = state->nloops;
+	}
+
+	state->nloops -= n - 1;
+	state->branch_control =
+		strcmp(argv[0], "break") == 0 ? BRANCH_BREAK : BRANCH_CONTINUE;
+	return TASK_STATUS_INTERRUPTED;
+}
diff --git a/builtin/builtin.c b/builtin/builtin.c
index ce77af3..677d8f6 100644
--- a/builtin/builtin.c
@@ -18,8 +18,10 @@ static const struct builtin builtins[] = {
 	{ ":", builtin_colon, true },
 	{ "alias", builtin_alias, false },
 	{ "bg", builtin_bg, false },
+	{ "break", builtin_break, true },
 	{ "cd", builtin_cd, false },
 	{ "command", builtin_command, false },
+	{ "continue", builtin_break, true },
 	{ "eval", builtin_eval, true },
 	{ "exit", builtin_exit, true },
 	{ "export", builtin_export, true },
diff --git a/include/builtin.h b/include/builtin.h
index 1f60125..776207b 100644
--- a/include/builtin.h
+++ b/include/builtin.h
@@ -10,6 +10,7 @@ void print_escaped(const char *value);
 
 int builtin_alias(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_bg(struct mrsh_state *state, int argc, char *argv[]);
+int builtin_break(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_cd(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_command(struct mrsh_state *state, int argc, char *argv[]);
 int builtin_colon(struct mrsh_state *state, int argc, char *argv[]);
diff --git a/include/mrsh/shell.h b/include/mrsh/shell.h
index d6ef789..e08dadc 100644
--- a/include/mrsh/shell.h
+++ b/include/mrsh/shell.h
@@ -83,6 +83,12 @@ struct mrsh_call_frame {
 
 struct mrsh_job;
 
+enum branch_control {
+	BRANCH_BREAK,
+	BRANCH_CONTINUE,
+	BRANCH_RETURN,
+};
+
 struct mrsh_state {
 	int exit;
 	int fd;
@@ -100,6 +106,8 @@ struct mrsh_state {
 	struct termios term_modes;
 	struct mrsh_array jobs; // struct mrsh_job *
 	struct mrsh_job *foreground_job;
+	enum branch_control branch_control;
+	int nloops;
 };
 
 void mrsh_function_destroy(struct mrsh_function *fn);
diff --git a/include/shell/task.h b/include/shell/task.h
index dc630ea..6e38d29 100644
--- a/include/shell/task.h
+++ b/include/shell/task.h
@@ -16,6 +16,10 @@
  * The task has been stopped and the job has been put in the background.
  */
 #define TASK_STATUS_STOPPED -3
+/**
+ * The task has been interrupted for some reason.
+ */
+#define TASK_STATUS_INTERRUPTED -4
 
 struct task_interface;
 
diff --git a/meson.build b/meson.build
index 9c1f67a..8f333da 100644
--- a/meson.build
+++ b/meson.build
@@ -74,6 +74,7 @@ lib_mrsh = library(
 		'buffer.c',
 		'builtin/alias.c',
 		'builtin/bg.c',
+		'builtin/break.c',
 		'builtin/builtin.c',
 		'builtin/cd.c',
 		'builtin/colon.c',
diff --git a/shell/shell.c b/shell/shell.c
index ec855b3..556f8ad 100644
--- a/shell/shell.c
+++ b/shell/shell.c
@@ -22,6 +22,7 @@ void mrsh_function_destroy(struct mrsh_function *fn) {
 void mrsh_state_init(struct mrsh_state *state) {
 	state->exit = -1;
 	state->fd = -1;
+	state->nloops = 0;
 	state->interactive = isatty(STDIN_FILENO);
 	state->options = state->interactive ? MRSH_OPT_INTERACTIVE : 0;
 	state->args = calloc(1, sizeof(struct mrsh_call_frame));
diff --git a/shell/task/for_clause.c b/shell/task/for_clause.c
index 6e9d904..69acac4 100644
--- a/shell/task/for_clause.c
+++ b/shell/task/for_clause.c
@@ -15,7 +15,8 @@ struct task_for_clause {
 		struct task *word, *body;
 	} tasks;
 	size_t index;
-	int last_body_status;
+	int exit_status;
+	int loop_num;
 };
 
 static void task_for_clause_destroy(struct task *task) {
@@ -27,20 +28,30 @@ static void task_for_clause_destroy(struct task *task) {
 
 static int task_for_clause_poll(struct task *task, struct context *ctx) {
 	struct task_for_clause *tfc = (struct task_for_clause *)task;
+	if (tfc->loop_num == -1) {
+		tfc->loop_num = ++ctx->state->nloops;
+	}
+
 	while (true) {
+		int status;
+
 		if (tfc->tasks.body) {
 			/* wait for body */
-			int body_status = task_poll(tfc->tasks.body, ctx);
-			if (body_status < 0) {
-				return body_status;
+			status = task_poll(tfc->tasks.body, ctx);
+			if (status == TASK_STATUS_INTERRUPTED) {
+				goto interrupt;
+			} else if (status < 0) {
+				return status;
 			}
 			task_destroy(tfc->tasks.body);
 			tfc->tasks.body = NULL;
 		} else if (tfc->tasks.word) {
 			/* wait for word */
-			int word_status = task_poll(tfc->tasks.word, ctx);
-			if (word_status < 0) {
-				return word_status;
+			status = task_poll(tfc->tasks.word, ctx);
+			if (status == TASK_STATUS_INTERRUPTED) {
+				goto interrupt;
+			} else if (status < 0) {
+				return status;
 			}
 			struct mrsh_word_string *word = (struct mrsh_word_string *)
 				tfc->ast.word_list->data[tfc->index - 1];
@@ -53,14 +64,34 @@ static int task_for_clause_poll(struct task *task, struct context *ctx) {
 		} else {
 			/* create a new word */
 			if (tfc->index == tfc->ast.word_list->len) {
-				return 0;
+				goto exit;
 			}
 			struct mrsh_word **word_ptr =
 				(struct mrsh_word **)&tfc->ast.word_list->data[tfc->index++];
 			tfc->tasks.word = task_word_create(
 				word_ptr, TILDE_EXPANSION_NAME);
 		}
+		continue;
+
+interrupt:
+		if (ctx->state->nloops < tfc->loop_num) {
+			/* break to parent loop */
+			return status;
+		}
+		if (ctx->state->branch_control == BRANCH_BREAK) {
+			tfc->exit_status = 0;
+			goto exit;
+		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
+			task_destroy(tfc->tasks.body);
+			tfc->tasks.body = NULL;
+		} else {
+			assert(0 && "Unknown task interruption cause");
+		}
 	}
+
+exit:
+	--ctx->state->nloops;
+	return tfc->exit_status;
 }
 
 static const struct task_interface task_for_clause_impl = {
@@ -76,5 +107,7 @@ struct task *task_for_clause_create(const char *name,
 	tfc->ast.word_list = word_list;
 	tfc->ast.body = body;
 	tfc->index = 0;
+	tfc->exit_status = 0;
+	tfc->loop_num = -1;
 	return &tfc->task;
 }
diff --git a/shell/task/loop_clause.c b/shell/task/loop_clause.c
index cfb9418..a8a14f8 100644
--- a/shell/task/loop_clause.c
+++ b/shell/task/loop_clause.c
@@ -1,3 +1,4 @@
+#include <assert.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include "shell/task.h"
@@ -11,7 +12,8 @@ struct task_loop_clause {
 		struct task *condition, *body;
 	} tasks;
 	bool until;
-	int last_body_status;
+	int exit_status;
+	int loop_num;
 };
 
 static void task_loop_clause_destroy(struct task *task) {
@@ -23,34 +25,64 @@ static void task_loop_clause_destroy(struct task *task) {
 
 static int task_loop_clause_poll(struct task *task, struct context *ctx) {
 	struct task_loop_clause *tlc = (struct task_loop_clause *)task;
+	if (tlc->loop_num == -1) {
+		tlc->loop_num = ++ctx->state->nloops;
+	}
 
 	while (ctx->state->exit == -1) {
+		int status;
+
 		if (tlc->tasks.condition) {
-			int condition_status = task_poll(tlc->tasks.condition, ctx);
-			if (condition_status < 0) {
-				return condition_status;
-			} else if (condition_status > 0 && !tlc->until) {
-				return tlc->last_body_status;
-			} else if (condition_status == 0 && tlc->until) {
-				return tlc->last_body_status;
+			status = task_poll(tlc->tasks.condition, ctx);
+			if (status == TASK_STATUS_INTERRUPTED) {
+				goto interrupt;
+			} else if (status < 0) {
+				return status;
+			} else if (status > 0 && !tlc->until) {
+				goto exit;
+			} else if (status == 0 && tlc->until) {
+				goto exit;
 			}
 			task_destroy(tlc->tasks.condition);
 			tlc->tasks.condition = NULL;
 		}
 
-		int body_status = task_poll(tlc->tasks.body, ctx);
-		if (body_status < 0) {
-			return body_status;
-		} else {
-			tlc->last_body_status = body_status;
+		status = task_poll(tlc->tasks.body, ctx);
+		if (status == TASK_STATUS_INTERRUPTED) {
+			goto interrupt;
+		} else if (status < 0) {
+			return status;
+		}
+
+		tlc->exit_status = status;
+		task_destroy(tlc->tasks.body);
+		tlc->tasks.condition =
+			task_for_command_list_array(tlc->ast.condition);
+		tlc->tasks.body = task_for_command_list_array(tlc->ast.body);
+		continue;
+
+interrupt:
+		if (ctx->state->nloops < tlc->loop_num) {
+			/* break to parent loop */
+			return status;
+		}
+		if (ctx->state->branch_control == BRANCH_BREAK) {
+			tlc->exit_status = 0;
+			goto exit;
+		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
 			task_destroy(tlc->tasks.body);
-			tlc->tasks.condition =
-				task_for_command_list_array(tlc->ast.condition);
-			tlc->tasks.body = task_for_command_list_array(tlc->ast.body);
+			tlc->tasks.body = NULL;
+		} else {
+			assert(0 && "Unknown task interruption cause");
 		}
 	}
 
+	--ctx->state->nloops;
 	return ctx->state->exit;
+
+exit:
+	--ctx->state->nloops;
+	return tlc->exit_status;
 }
 
 static const struct task_interface task_loop_clause_impl = {
@@ -67,5 +99,6 @@ struct task *task_loop_clause_create(const struct mrsh_array *condition,
 	tlc->tasks.condition = task_for_command_list_array(condition);
 	tlc->tasks.body = task_for_command_list_array(body);
 	tlc->until = until;
+	tlc->loop_num = -1;
 	return &tlc->task;
 }
-- 
2.22.0
Details
Message ID
<xdmlIcikak5qAi7NmaJa2tR_S-zW7Me7zD7E9S8wwulgbh21tTo3mxiFHhb-51jXBX0Qr7_QD2UaDH7CYP8F2wNm3WC9r0OV2GpJIZpQA4k=@emersion.fr>
In-Reply-To
<20190614215931.29615-1-sir@cmpwn.com> (view parent)
Sender timestamp
1560613657
DKIM signature
pass
Download raw message
> diff --git a/builtin/break.c b/builtin/break.c
> new file mode 100644
> index 0000000..63667aa
> --- /dev/null
> +++ b/builtin/break.c
> @@ -0,0 +1,35 @@
> +#define _POSIX_C_SOURCE 200809L
> +#include <mrsh/builtin.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include "builtin.h"
> +#include "shell/shell.h"
> +#include "shell/task.h"
> +
> +static const char break_usage[] = "usage: %s [n]\n";
> +
> +int builtin_break(struct mrsh_state *state, int argc, char *argv[]) {
> +	if (argc > 2) {
> +		fprintf(stderr, break_usage, argv[0]);
> +		return 1;
> +	}
> +	int n = 1;
> +	if (argc == 2) {
> +		char *endptr;
> +		n = strtol(argv[1], &endptr, 10);
> +		if ((*endptr != '\0' && argv[0][0] != '\0') || n < 0) {

Should be:

    endptr[0] != '\0' || argv[1][0] == '\0' || n < 0

since we want to fail if any of these conditions apply:

* The string is empty
* strtol hasn't consumed the whole string
* n < 0

> +			fprintf(stderr, "%s: invalid loop number '%s'\n", argv[0], argv[1]);
> +			return 1;
> +		}
> +	}
> +
> +	if (n > state->nloops) {
> +		n = state->nloops;
> +	}
> +
> +	state->nloops -= n - 1;
> +	state->branch_control =
> +		strcmp(argv[0], "break") == 0 ? BRANCH_BREAK : BRANCH_CONTINUE;
> +	return TASK_STATUS_INTERRUPTED;
> +}
> diff --git a/builtin/builtin.c b/builtin/builtin.c
> index ce77af3..677d8f6 100644
> --- a/builtin/builtin.c
> +++ b/builtin/builtin.c
> @@ -18,8 +18,10 @@ static const struct builtin builtins[] = {
>  	{ ":", builtin_colon, true },
>  	{ "alias", builtin_alias, false },
>  	{ "bg", builtin_bg, false },
> +	{ "break", builtin_break, true },
>  	{ "cd", builtin_cd, false },
>  	{ "command", builtin_command, false },
> +	{ "continue", builtin_break, true },

Seems like this file is missing.

>  	{ "eval", builtin_eval, true },
>  	{ "exit", builtin_exit, true },
>  	{ "export", builtin_export, true },
> diff --git a/include/builtin.h b/include/builtin.h
> index 1f60125..776207b 100644
> --- a/include/builtin.h
> +++ b/include/builtin.h
> @@ -10,6 +10,7 @@ void print_escaped(const char *value);
>
>  int builtin_alias(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_bg(struct mrsh_state *state, int argc, char *argv[]);
> +int builtin_break(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_cd(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_command(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_colon(struct mrsh_state *state, int argc, char *argv[]);
> diff --git a/include/mrsh/shell.h b/include/mrsh/shell.h
> index d6ef789..e08dadc 100644
> --- a/include/mrsh/shell.h
> +++ b/include/mrsh/shell.h
> @@ -83,6 +83,12 @@ struct mrsh_call_frame {
>
>  struct mrsh_job;
>
> +enum branch_control {
> +	BRANCH_BREAK,
> +	BRANCH_CONTINUE,
> +	BRANCH_RETURN,
> +};
> +
>  struct mrsh_state {
>  	int exit;
>  	int fd;
> @@ -100,6 +106,8 @@ struct mrsh_state {
>  	struct termios term_modes;
>  	struct mrsh_array jobs; // struct mrsh_job *
>  	struct mrsh_job *foreground_job;
> +	enum branch_control branch_control;
> +	int nloops;

After thinking about it, I believe this should be in the context rather than the
state. The reason is that we want this to work:

    {
    	# for loop with potentially break/continue statements
    } | {
    	# for loop with potentially break/continue statements
    }

In this situation, the state is shared between the two loops.

>  };
>
>  void mrsh_function_destroy(struct mrsh_function *fn);
> diff --git a/include/shell/task.h b/include/shell/task.h
> index dc630ea..6e38d29 100644
> --- a/include/shell/task.h
> +++ b/include/shell/task.h
> @@ -16,6 +16,10 @@
>   * The task has been stopped and the job has been put in the background.
>   */
>  #define TASK_STATUS_STOPPED -3
> +/**
> + * The task has been interrupted for some reason.
> + */
> +#define TASK_STATUS_INTERRUPTED -4

Adding this looks like a good idea to me.

>  struct task_interface;
>
> diff --git a/meson.build b/meson.build
> index 9c1f67a..8f333da 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -74,6 +74,7 @@ lib_mrsh = library(
>  		'buffer.c',
>  		'builtin/alias.c',
>  		'builtin/bg.c',
> +		'builtin/break.c',
>  		'builtin/builtin.c',
>  		'builtin/cd.c',
>  		'builtin/colon.c',
> diff --git a/shell/shell.c b/shell/shell.c
> index ec855b3..556f8ad 100644
> --- a/shell/shell.c
> +++ b/shell/shell.c
> @@ -22,6 +22,7 @@ void mrsh_function_destroy(struct mrsh_function *fn) {
>  void mrsh_state_init(struct mrsh_state *state) {
>  	state->exit = -1;
>  	state->fd = -1;
> +	state->nloops = 0;
>  	state->interactive = isatty(STDIN_FILENO);
>  	state->options = state->interactive ? MRSH_OPT_INTERACTIVE : 0;
>  	state->args = calloc(1, sizeof(struct mrsh_call_frame));
> diff --git a/shell/task/for_clause.c b/shell/task/for_clause.c
> index 6e9d904..69acac4 100644
> --- a/shell/task/for_clause.c
> +++ b/shell/task/for_clause.c
> @@ -15,7 +15,8 @@ struct task_for_clause {
>  		struct task *word, *body;
>  	} tasks;
>  	size_t index;
> -	int last_body_status;
> +	int exit_status;
> +	int loop_num;
>  };
>
>  static void task_for_clause_destroy(struct task *task) {
> @@ -27,20 +28,30 @@ static void task_for_clause_destroy(struct task *task) {
>
>  static int task_for_clause_poll(struct task *task, struct context *ctx) {
>  	struct task_for_clause *tfc = (struct task_for_clause *)task;
> +	if (tfc->loop_num == -1) {
> +		tfc->loop_num = ++ctx->state->nloops;
> +	}
> +
>  	while (true) {
> +		int status;
> +
>  		if (tfc->tasks.body) {
>  			/* wait for body */
> -			int body_status = task_poll(tfc->tasks.body, ctx);
> -			if (body_status < 0) {
> -				return body_status;
> +			status = task_poll(tfc->tasks.body, ctx);
> +			if (status == TASK_STATUS_INTERRUPTED) {
> +				goto interrupt;
> +			} else if (status < 0) {
> +				return status;
>  			}
>  			task_destroy(tfc->tasks.body);
>  			tfc->tasks.body = NULL;
>  		} else if (tfc->tasks.word) {
>  			/* wait for word */
> -			int word_status = task_poll(tfc->tasks.word, ctx);
> -			if (word_status < 0) {
> -				return word_status;
> +			status = task_poll(tfc->tasks.word, ctx);
> +			if (status == TASK_STATUS_INTERRUPTED) {
> +				goto interrupt;
> +			} else if (status < 0) {
> +				return status;
>  			}
>  			struct mrsh_word_string *word = (struct mrsh_word_string *)
>  				tfc->ast.word_list->data[tfc->index - 1];
> @@ -53,14 +64,34 @@ static int task_for_clause_poll(struct task *task, struct context *ctx) {
>  		} else {
>  			/* create a new word */
>  			if (tfc->index == tfc->ast.word_list->len) {
> -				return 0;
> +				goto exit;
>  			}
>  			struct mrsh_word **word_ptr =
>  				(struct mrsh_word **)&tfc->ast.word_list->data[tfc->index++];
>  			tfc->tasks.word = task_word_create(
>  				word_ptr, TILDE_EXPANSION_NAME);
>  		}
> +		continue;

Hmm, this goto logic makes it very difficult to follow the flow. Can't we
extract this to a function whose return value indicates whether we should
return?

> +
> +interrupt:
> +		if (ctx->state->nloops < tfc->loop_num) {
> +			/* break to parent loop */
> +			return status;
> +		}
> +		if (ctx->state->branch_control == BRANCH_BREAK) {
> +			tfc->exit_status = 0;
> +			goto exit;
> +		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
> +			task_destroy(tfc->tasks.body);
> +			tfc->tasks.body = NULL;
> +		} else {
> +			assert(0 && "Unknown task interruption cause");
> +		}
>  	}
> +
> +exit:
> +	--ctx->state->nloops;
> +	return tfc->exit_status;
>  }