~emersion/mrsh-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
1

[PATCH] Mostly implement break and continue builtins

Details
Message ID
<20190614215931.29615-1-sir@cmpwn.com>
Sender timestamp
1560549571
DKIM signature
pass
Download raw message
Patch: +142 -24
The main thing that's missing here is moving the number of loops into
the call stack:

http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#break

To prevent you from breaking out of your caller's loop in a function.
---
 builtin/break.c          | 35 ++++++++++++++++++++++
 builtin/builtin.c        |  2 ++
 include/builtin.h        |  1 +
 include/mrsh/shell.h     |  8 +++++
 include/shell/task.h     |  4 +++
 meson.build              |  1 +
 shell/shell.c            |  1 +
 shell/task/for_clause.c  | 49 +++++++++++++++++++++++++-----
 shell/task/loop_clause.c | 65 ++++++++++++++++++++++++++++++----------
 9 files changed, 142 insertions(+), 24 deletions(-)
 create mode 100644 builtin/break.c

diff --git a/builtin/break.c b/builtin/break.c
new file mode 100644
index 0000000..63667aa
--- /dev/null
@@ -0,0 +1,35 @@
#define _POSIX_C_SOURCE 200809L
#include <mrsh/builtin.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "builtin.h"
#include "shell/shell.h"
#include "shell/task.h"

static const char break_usage[] = "usage: %s [n]\n";

int builtin_break(struct mrsh_state *state, int argc, char *argv[]) {
	if (argc > 2) {
		fprintf(stderr, break_usage, argv[0]);
		return 1;
	}
	int n = 1;
	if (argc == 2) {
		char *endptr;
		n = strtol(argv[1], &endptr, 10);
		if ((*endptr != '\0' && argv[0][0] != '\0') || n < 0) {
			fprintf(stderr, "%s: invalid loop number '%s'\n", argv[0], argv[1]);
			return 1;
		}
	}

	if (n > state->nloops) {
		n = state->nloops;
	}

	state->nloops -= n - 1;
	state->branch_control =
		strcmp(argv[0], "break") == 0 ? BRANCH_BREAK : BRANCH_CONTINUE;
	return TASK_STATUS_INTERRUPTED;
}
diff --git a/builtin/builtin.c b/builtin/builtin.c
index ce77af3..677d8f6 100644
--- a/builtin/builtin.c
@@ -18,8 +18,10 @@ static const struct builtin builtins[] = {
	{ ":", builtin_colon, true },
	{ "alias", builtin_alias, false },
	{ "bg", builtin_bg, false },
	{ "break", builtin_break, true },
	{ "cd", builtin_cd, false },
	{ "command", builtin_command, false },
	{ "continue", builtin_break, true },
	{ "eval", builtin_eval, true },
	{ "exit", builtin_exit, true },
	{ "export", builtin_export, true },
diff --git a/include/builtin.h b/include/builtin.h
index 1f60125..776207b 100644
--- a/include/builtin.h
+++ b/include/builtin.h
@@ -10,6 +10,7 @@ void print_escaped(const char *value);

int builtin_alias(struct mrsh_state *state, int argc, char *argv[]);
int builtin_bg(struct mrsh_state *state, int argc, char *argv[]);
int builtin_break(struct mrsh_state *state, int argc, char *argv[]);
int builtin_cd(struct mrsh_state *state, int argc, char *argv[]);
int builtin_command(struct mrsh_state *state, int argc, char *argv[]);
int builtin_colon(struct mrsh_state *state, int argc, char *argv[]);
diff --git a/include/mrsh/shell.h b/include/mrsh/shell.h
index d6ef789..e08dadc 100644
--- a/include/mrsh/shell.h
+++ b/include/mrsh/shell.h
@@ -83,6 +83,12 @@ struct mrsh_call_frame {

struct mrsh_job;

enum branch_control {
	BRANCH_BREAK,
	BRANCH_CONTINUE,
	BRANCH_RETURN,
};

struct mrsh_state {
	int exit;
	int fd;
@@ -100,6 +106,8 @@ struct mrsh_state {
	struct termios term_modes;
	struct mrsh_array jobs; // struct mrsh_job *
	struct mrsh_job *foreground_job;
	enum branch_control branch_control;
	int nloops;
};

void mrsh_function_destroy(struct mrsh_function *fn);
diff --git a/include/shell/task.h b/include/shell/task.h
index dc630ea..6e38d29 100644
--- a/include/shell/task.h
+++ b/include/shell/task.h
@@ -16,6 +16,10 @@
 * The task has been stopped and the job has been put in the background.
 */
#define TASK_STATUS_STOPPED -3
/**
 * The task has been interrupted for some reason.
 */
#define TASK_STATUS_INTERRUPTED -4

struct task_interface;

diff --git a/meson.build b/meson.build
index 9c1f67a..8f333da 100644
--- a/meson.build
+++ b/meson.build
@@ -74,6 +74,7 @@ lib_mrsh = library(
		'buffer.c',
		'builtin/alias.c',
		'builtin/bg.c',
		'builtin/break.c',
		'builtin/builtin.c',
		'builtin/cd.c',
		'builtin/colon.c',
diff --git a/shell/shell.c b/shell/shell.c
index ec855b3..556f8ad 100644
--- a/shell/shell.c
+++ b/shell/shell.c
@@ -22,6 +22,7 @@ void mrsh_function_destroy(struct mrsh_function *fn) {
void mrsh_state_init(struct mrsh_state *state) {
	state->exit = -1;
	state->fd = -1;
	state->nloops = 0;
	state->interactive = isatty(STDIN_FILENO);
	state->options = state->interactive ? MRSH_OPT_INTERACTIVE : 0;
	state->args = calloc(1, sizeof(struct mrsh_call_frame));
diff --git a/shell/task/for_clause.c b/shell/task/for_clause.c
index 6e9d904..69acac4 100644
--- a/shell/task/for_clause.c
+++ b/shell/task/for_clause.c
@@ -15,7 +15,8 @@ struct task_for_clause {
		struct task *word, *body;
	} tasks;
	size_t index;
	int last_body_status;
	int exit_status;
	int loop_num;
};

static void task_for_clause_destroy(struct task *task) {
@@ -27,20 +28,30 @@ static void task_for_clause_destroy(struct task *task) {

static int task_for_clause_poll(struct task *task, struct context *ctx) {
	struct task_for_clause *tfc = (struct task_for_clause *)task;
	if (tfc->loop_num == -1) {
		tfc->loop_num = ++ctx->state->nloops;
	}

	while (true) {
		int status;

		if (tfc->tasks.body) {
			/* wait for body */
			int body_status = task_poll(tfc->tasks.body, ctx);
			if (body_status < 0) {
				return body_status;
			status = task_poll(tfc->tasks.body, ctx);
			if (status == TASK_STATUS_INTERRUPTED) {
				goto interrupt;
			} else if (status < 0) {
				return status;
			}
			task_destroy(tfc->tasks.body);
			tfc->tasks.body = NULL;
		} else if (tfc->tasks.word) {
			/* wait for word */
			int word_status = task_poll(tfc->tasks.word, ctx);
			if (word_status < 0) {
				return word_status;
			status = task_poll(tfc->tasks.word, ctx);
			if (status == TASK_STATUS_INTERRUPTED) {
				goto interrupt;
			} else if (status < 0) {
				return status;
			}
			struct mrsh_word_string *word = (struct mrsh_word_string *)
				tfc->ast.word_list->data[tfc->index - 1];
@@ -53,14 +64,34 @@ static int task_for_clause_poll(struct task *task, struct context *ctx) {
		} else {
			/* create a new word */
			if (tfc->index == tfc->ast.word_list->len) {
				return 0;
				goto exit;
			}
			struct mrsh_word **word_ptr =
				(struct mrsh_word **)&tfc->ast.word_list->data[tfc->index++];
			tfc->tasks.word = task_word_create(
				word_ptr, TILDE_EXPANSION_NAME);
		}
		continue;

interrupt:
		if (ctx->state->nloops < tfc->loop_num) {
			/* break to parent loop */
			return status;
		}
		if (ctx->state->branch_control == BRANCH_BREAK) {
			tfc->exit_status = 0;
			goto exit;
		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
			task_destroy(tfc->tasks.body);
			tfc->tasks.body = NULL;
		} else {
			assert(0 && "Unknown task interruption cause");
		}
	}

exit:
	--ctx->state->nloops;
	return tfc->exit_status;
}

static const struct task_interface task_for_clause_impl = {
@@ -76,5 +107,7 @@ struct task *task_for_clause_create(const char *name,
	tfc->ast.word_list = word_list;
	tfc->ast.body = body;
	tfc->index = 0;
	tfc->exit_status = 0;
	tfc->loop_num = -1;
	return &tfc->task;
}
diff --git a/shell/task/loop_clause.c b/shell/task/loop_clause.c
index cfb9418..a8a14f8 100644
--- a/shell/task/loop_clause.c
+++ b/shell/task/loop_clause.c
@@ -1,3 +1,4 @@
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include "shell/task.h"
@@ -11,7 +12,8 @@ struct task_loop_clause {
		struct task *condition, *body;
	} tasks;
	bool until;
	int last_body_status;
	int exit_status;
	int loop_num;
};

static void task_loop_clause_destroy(struct task *task) {
@@ -23,34 +25,64 @@ static void task_loop_clause_destroy(struct task *task) {

static int task_loop_clause_poll(struct task *task, struct context *ctx) {
	struct task_loop_clause *tlc = (struct task_loop_clause *)task;
	if (tlc->loop_num == -1) {
		tlc->loop_num = ++ctx->state->nloops;
	}

	while (ctx->state->exit == -1) {
		int status;

		if (tlc->tasks.condition) {
			int condition_status = task_poll(tlc->tasks.condition, ctx);
			if (condition_status < 0) {
				return condition_status;
			} else if (condition_status > 0 && !tlc->until) {
				return tlc->last_body_status;
			} else if (condition_status == 0 && tlc->until) {
				return tlc->last_body_status;
			status = task_poll(tlc->tasks.condition, ctx);
			if (status == TASK_STATUS_INTERRUPTED) {
				goto interrupt;
			} else if (status < 0) {
				return status;
			} else if (status > 0 && !tlc->until) {
				goto exit;
			} else if (status == 0 && tlc->until) {
				goto exit;
			}
			task_destroy(tlc->tasks.condition);
			tlc->tasks.condition = NULL;
		}

		int body_status = task_poll(tlc->tasks.body, ctx);
		if (body_status < 0) {
			return body_status;
		} else {
			tlc->last_body_status = body_status;
		status = task_poll(tlc->tasks.body, ctx);
		if (status == TASK_STATUS_INTERRUPTED) {
			goto interrupt;
		} else if (status < 0) {
			return status;
		}

		tlc->exit_status = status;
		task_destroy(tlc->tasks.body);
		tlc->tasks.condition =
			task_for_command_list_array(tlc->ast.condition);
		tlc->tasks.body = task_for_command_list_array(tlc->ast.body);
		continue;

interrupt:
		if (ctx->state->nloops < tlc->loop_num) {
			/* break to parent loop */
			return status;
		}
		if (ctx->state->branch_control == BRANCH_BREAK) {
			tlc->exit_status = 0;
			goto exit;
		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
			task_destroy(tlc->tasks.body);
			tlc->tasks.condition =
				task_for_command_list_array(tlc->ast.condition);
			tlc->tasks.body = task_for_command_list_array(tlc->ast.body);
			tlc->tasks.body = NULL;
		} else {
			assert(0 && "Unknown task interruption cause");
		}
	}

	--ctx->state->nloops;
	return ctx->state->exit;

exit:
	--ctx->state->nloops;
	return tlc->exit_status;
}

static const struct task_interface task_loop_clause_impl = {
@@ -67,5 +99,6 @@ struct task *task_loop_clause_create(const struct mrsh_array *condition,
	tlc->tasks.condition = task_for_command_list_array(condition);
	tlc->tasks.body = task_for_command_list_array(body);
	tlc->until = until;
	tlc->loop_num = -1;
	return &tlc->task;
}
-- 
2.22.0
Details
Message ID
<xdmlIcikak5qAi7NmaJa2tR_S-zW7Me7zD7E9S8wwulgbh21tTo3mxiFHhb-51jXBX0Qr7_QD2UaDH7CYP8F2wNm3WC9r0OV2GpJIZpQA4k=@emersion.fr>
In-Reply-To
<20190614215931.29615-1-sir@cmpwn.com> (view parent)
Sender timestamp
1560613657
DKIM signature
pass
Download raw message
> diff --git a/builtin/break.c b/builtin/break.c
> new file mode 100644
> index 0000000..63667aa
> --- /dev/null
> +++ b/builtin/break.c
> @@ -0,0 +1,35 @@
> +#define _POSIX_C_SOURCE 200809L
> +#include <mrsh/builtin.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include "builtin.h"
> +#include "shell/shell.h"
> +#include "shell/task.h"
> +
> +static const char break_usage[] = "usage: %s [n]\n";
> +
> +int builtin_break(struct mrsh_state *state, int argc, char *argv[]) {
> +	if (argc > 2) {
> +		fprintf(stderr, break_usage, argv[0]);
> +		return 1;
> +	}
> +	int n = 1;
> +	if (argc == 2) {
> +		char *endptr;
> +		n = strtol(argv[1], &endptr, 10);
> +		if ((*endptr != '\0' && argv[0][0] != '\0') || n < 0) {

Should be:

    endptr[0] != '\0' || argv[1][0] == '\0' || n < 0

since we want to fail if any of these conditions apply:

* The string is empty
* strtol hasn't consumed the whole string
* n < 0

> +			fprintf(stderr, "%s: invalid loop number '%s'\n", argv[0], argv[1]);
> +			return 1;
> +		}
> +	}
> +
> +	if (n > state->nloops) {
> +		n = state->nloops;
> +	}
> +
> +	state->nloops -= n - 1;
> +	state->branch_control =
> +		strcmp(argv[0], "break") == 0 ? BRANCH_BREAK : BRANCH_CONTINUE;
> +	return TASK_STATUS_INTERRUPTED;
> +}
> diff --git a/builtin/builtin.c b/builtin/builtin.c
> index ce77af3..677d8f6 100644
> --- a/builtin/builtin.c
> +++ b/builtin/builtin.c
> @@ -18,8 +18,10 @@ static const struct builtin builtins[] = {
>  	{ ":", builtin_colon, true },
>  	{ "alias", builtin_alias, false },
>  	{ "bg", builtin_bg, false },
> +	{ "break", builtin_break, true },
>  	{ "cd", builtin_cd, false },
>  	{ "command", builtin_command, false },
> +	{ "continue", builtin_break, true },

Seems like this file is missing.

>  	{ "eval", builtin_eval, true },
>  	{ "exit", builtin_exit, true },
>  	{ "export", builtin_export, true },
> diff --git a/include/builtin.h b/include/builtin.h
> index 1f60125..776207b 100644
> --- a/include/builtin.h
> +++ b/include/builtin.h
> @@ -10,6 +10,7 @@ void print_escaped(const char *value);
>
>  int builtin_alias(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_bg(struct mrsh_state *state, int argc, char *argv[]);
> +int builtin_break(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_cd(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_command(struct mrsh_state *state, int argc, char *argv[]);
>  int builtin_colon(struct mrsh_state *state, int argc, char *argv[]);
> diff --git a/include/mrsh/shell.h b/include/mrsh/shell.h
> index d6ef789..e08dadc 100644
> --- a/include/mrsh/shell.h
> +++ b/include/mrsh/shell.h
> @@ -83,6 +83,12 @@ struct mrsh_call_frame {
>
>  struct mrsh_job;
>
> +enum branch_control {
> +	BRANCH_BREAK,
> +	BRANCH_CONTINUE,
> +	BRANCH_RETURN,
> +};
> +
>  struct mrsh_state {
>  	int exit;
>  	int fd;
> @@ -100,6 +106,8 @@ struct mrsh_state {
>  	struct termios term_modes;
>  	struct mrsh_array jobs; // struct mrsh_job *
>  	struct mrsh_job *foreground_job;
> +	enum branch_control branch_control;
> +	int nloops;

After thinking about it, I believe this should be in the context rather than the
state. The reason is that we want this to work:

    {
    	# for loop with potentially break/continue statements
    } | {
    	# for loop with potentially break/continue statements
    }

In this situation, the state is shared between the two loops.

>  };
>
>  void mrsh_function_destroy(struct mrsh_function *fn);
> diff --git a/include/shell/task.h b/include/shell/task.h
> index dc630ea..6e38d29 100644
> --- a/include/shell/task.h
> +++ b/include/shell/task.h
> @@ -16,6 +16,10 @@
>   * The task has been stopped and the job has been put in the background.
>   */
>  #define TASK_STATUS_STOPPED -3
> +/**
> + * The task has been interrupted for some reason.
> + */
> +#define TASK_STATUS_INTERRUPTED -4

Adding this looks like a good idea to me.

>  struct task_interface;
>
> diff --git a/meson.build b/meson.build
> index 9c1f67a..8f333da 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -74,6 +74,7 @@ lib_mrsh = library(
>  		'buffer.c',
>  		'builtin/alias.c',
>  		'builtin/bg.c',
> +		'builtin/break.c',
>  		'builtin/builtin.c',
>  		'builtin/cd.c',
>  		'builtin/colon.c',
> diff --git a/shell/shell.c b/shell/shell.c
> index ec855b3..556f8ad 100644
> --- a/shell/shell.c
> +++ b/shell/shell.c
> @@ -22,6 +22,7 @@ void mrsh_function_destroy(struct mrsh_function *fn) {
>  void mrsh_state_init(struct mrsh_state *state) {
>  	state->exit = -1;
>  	state->fd = -1;
> +	state->nloops = 0;
>  	state->interactive = isatty(STDIN_FILENO);
>  	state->options = state->interactive ? MRSH_OPT_INTERACTIVE : 0;
>  	state->args = calloc(1, sizeof(struct mrsh_call_frame));
> diff --git a/shell/task/for_clause.c b/shell/task/for_clause.c
> index 6e9d904..69acac4 100644
> --- a/shell/task/for_clause.c
> +++ b/shell/task/for_clause.c
> @@ -15,7 +15,8 @@ struct task_for_clause {
>  		struct task *word, *body;
>  	} tasks;
>  	size_t index;
> -	int last_body_status;
> +	int exit_status;
> +	int loop_num;
>  };
>
>  static void task_for_clause_destroy(struct task *task) {
> @@ -27,20 +28,30 @@ static void task_for_clause_destroy(struct task *task) {
>
>  static int task_for_clause_poll(struct task *task, struct context *ctx) {
>  	struct task_for_clause *tfc = (struct task_for_clause *)task;
> +	if (tfc->loop_num == -1) {
> +		tfc->loop_num = ++ctx->state->nloops;
> +	}
> +
>  	while (true) {
> +		int status;
> +
>  		if (tfc->tasks.body) {
>  			/* wait for body */
> -			int body_status = task_poll(tfc->tasks.body, ctx);
> -			if (body_status < 0) {
> -				return body_status;
> +			status = task_poll(tfc->tasks.body, ctx);
> +			if (status == TASK_STATUS_INTERRUPTED) {
> +				goto interrupt;
> +			} else if (status < 0) {
> +				return status;
>  			}
>  			task_destroy(tfc->tasks.body);
>  			tfc->tasks.body = NULL;
>  		} else if (tfc->tasks.word) {
>  			/* wait for word */
> -			int word_status = task_poll(tfc->tasks.word, ctx);
> -			if (word_status < 0) {
> -				return word_status;
> +			status = task_poll(tfc->tasks.word, ctx);
> +			if (status == TASK_STATUS_INTERRUPTED) {
> +				goto interrupt;
> +			} else if (status < 0) {
> +				return status;
>  			}
>  			struct mrsh_word_string *word = (struct mrsh_word_string *)
>  				tfc->ast.word_list->data[tfc->index - 1];
> @@ -53,14 +64,34 @@ static int task_for_clause_poll(struct task *task, struct context *ctx) {
>  		} else {
>  			/* create a new word */
>  			if (tfc->index == tfc->ast.word_list->len) {
> -				return 0;
> +				goto exit;
>  			}
>  			struct mrsh_word **word_ptr =
>  				(struct mrsh_word **)&tfc->ast.word_list->data[tfc->index++];
>  			tfc->tasks.word = task_word_create(
>  				word_ptr, TILDE_EXPANSION_NAME);
>  		}
> +		continue;

Hmm, this goto logic makes it very difficult to follow the flow. Can't we
extract this to a function whose return value indicates whether we should
return?

> +
> +interrupt:
> +		if (ctx->state->nloops < tfc->loop_num) {
> +			/* break to parent loop */
> +			return status;
> +		}
> +		if (ctx->state->branch_control == BRANCH_BREAK) {
> +			tfc->exit_status = 0;
> +			goto exit;
> +		} else if (ctx->state->branch_control == BRANCH_CONTINUE) {
> +			task_destroy(tfc->tasks.body);
> +			tfc->tasks.body = NULL;
> +		} else {
> +			assert(0 && "Unknown task interruption cause");
> +		}
>  	}
> +
> +exit:
> +	--ctx->state->nloops;
> +	return tfc->exit_status;
>  }