~mpu/qbe

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
8 4

[PATCH] Add top level inline assembly.

Details
Message ID
<20211022040849.1916823-1-ac@acha.ninja>
DKIM signature
missing
Download raw message
Patch: +69 -5
---
 all.h          |  3 ++-
 doc/il.txt     | 14 ++++++++++++++
 gas.c          | 19 +++++++++++++++++++
 main.c         | 10 +++++++++-
 parse.c        | 16 ++++++++++++++--
 test/tlasm.ssa | 10 ++++++++++
 tools/lexh.c   |  2 +-
 7 files changed, 69 insertions(+), 5 deletions(-)
 create mode 100644 test/tlasm.ssa

diff --git a/all.h b/all.h
index 4b9eb0e..de5719d 100644
--- a/all.h
+++ b/all.h
@@ -458,7 +458,7 @@ bshas(BSet *bs, uint elt)

/* parse.c */
extern Op optab[NOp];
void parse(FILE *, char *, void (Dat *), void (Fn *));
void parse(FILE *, char *, void (Dat *), void (Fn *), void (char *));
void printfn(Fn *, FILE *);
void printref(Ref, Fn *, FILE *);
void err(char *, ...) __attribute__((noreturn));
@@ -516,6 +516,7 @@ void rega(Fn *);
/* gas.c */
extern char *gasloc;
extern char *gassym;
void gasemittlasm(char *, FILE *);
void gasemitdat(Dat *, FILE *);
int gasstash(void *, int);
void gasemitfin(FILE *);
diff --git a/doc/il.txt b/doc/il.txt
index d1ed755..8e68fa4 100644
--- a/doc/il.txt
+++ b/doc/il.txt
@@ -19,6 +19,7 @@
      * <@ Aggregate Types >
      * <@ Data >
      * <@ Functions >
      * <@ Inline Assembly >
  5. <@ Control >
      * <@ Blocks >
      * <@ Jumps >
@@ -384,6 +385,19 @@ is provided in the call instructions.
The syntax and semantics for the body of functions
are described in the <@ Control > section.

~ Inline Assembly
~~~~~~~~~~~~~~~~~

    `bnf
    ASMDEF :=
        'asm' '"' ... '"'

Top level inline assembly is copied verbatim into the output with the
following exceptions:

  * A single leading newline will stripped if present.
  * '\' followed by '"' or '\' will omit the leading '\'.

- 5. Control
------------

diff --git a/gas.c b/gas.c
index 8c31794..5e46f8d 100644
--- a/gas.c
+++ b/gas.c
@@ -3,6 +3,25 @@

char *gasloc, *gassym;

void gasemittlasm(char *s, FILE *f) {
	int esc;
	fputs("/* begin inline asm */\n", f);
	esc = 0;
	s += s[1] == '\n' ? 2 : 1;
	while (s[1]) {
		if (!esc && *s == '\\') {
			esc = 1;
			continue;
		}
		if (esc && *s != '"' && *s != '\\')
			fputc('\\', f);
		fputc(*s, f);
		esc = 0;
		s++;
	}
	fputs("/* end inline asm */\n", f);
}

void
gasemitdat(Dat *d, FILE *f)
{
diff --git a/main.c b/main.c
index abef591..390b122 100644
--- a/main.c
+++ b/main.c
@@ -38,6 +38,14 @@ char debug['Z'+1] = {
static FILE *outf;
static int dbg;

static void
tlasm(char *s)
{
	if (dbg)
		return;
	gasemittlasm(s, outf);
}

static void
data(Dat *d)
{
@@ -190,7 +198,7 @@ main(int ac, char *av[])
				exit(1);
			}
		}
		parse(inf, f, data, func);
		parse(inf, f, data, func, tlasm);
	} while (++optind < ac);

	if (!dbg) {
diff --git a/parse.c b/parse.c
index da88f58..b49e422 100644
--- a/parse.c
+++ b/parse.c
@@ -37,6 +37,7 @@ enum {
	Tjmp,
	Tjnz,
	Tret,
	Tasm,
	Texport,
	Tfunc,
	Ttype,
@@ -87,6 +88,7 @@ static char *kwmap[Ntok] = {
	[Tjmp] = "jmp",
	[Tjnz] = "jnz",
	[Tret] = "ret",
	[Tasm] = "asm",
	[Texport] = "export",
	[Tfunc] = "function",
	[Ttype] = "type",
@@ -109,7 +111,7 @@ enum {
	TMask = 16383, /* for temps hash */
	BMask = 8191, /* for blocks hash */

	K = 3233235, /* found using tools/lexh.c */
	K = 4331239, /* found using tools/lexh.c */
	M = 23,
};

@@ -785,6 +787,13 @@ typecheck(Fn *fn)
	}
}

static char *
parsetlasm(void) {
	if (nextnl() != Tstr)
		err("string literal expected");
	return tokval.str;
}

static Fn *
parsefn(int export)
{
@@ -1065,7 +1074,7 @@ Done:
}

void
parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
parse(FILE *f, char *path, void data(Dat *), void func(Fn *), void tlasm(char *))
{
	int t, export;

@@ -1081,6 +1090,9 @@ parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
		switch (nextnl()) {
		default:
			err("top-level definition expected");
		case Tasm:
			tlasm(parsetlasm());
			break;
		case Texport:
			export = 1;
			t = nextnl();
diff --git a/test/tlasm.ssa b/test/tlasm.ssa
new file mode 100644
index 0000000..eb42609
--- /dev/null
+++ b/test/tlasm.ssa
@@ -0,0 +1,10 @@

asm "
/* do nothing \\ \n\n \" */
"

export
function w $main() {
@start
	ret 0
}
diff --git a/tools/lexh.c b/tools/lexh.c
index 2ebb022..30874c9 100644
--- a/tools/lexh.c
+++ b/tools/lexh.c
@@ -24,7 +24,7 @@ char *tok[] = {
	"cgtd", "cged", "cned", "ceqd", "cod", "cuod",
	"vaarg", "vastart", "...", "env",

	"call", "phi", "jmp", "jnz", "ret", "export",
	"call", "phi", "jmp", "jnz", "ret", "asm", "export",
	"function", "type", "data", "section", "align",
	"l", "w", "h", "b", "d", "s", "z", "loadw", "loadl",
	"loads", "loadd", "alloc1", "alloc2",
-- 
2.33.0
Details
Message ID
<CF5OID13778E.3UE24V3WAOTUB@taiga>
In-Reply-To
<20211022040849.1916823-1-ac@acha.ninja> (view parent)
DKIM signature
fail
Download raw message
DKIM signature: fail
This seems to be of highly limited utility without the ability to map
temporaries to registers and back, ala GCC's inline assembly.
Details
Message ID
<4b098abd-e440-4ec1-a303-fcc330748323@acha.ninja>
In-Reply-To
<CF5OID13778E.3UE24V3WAOTUB@taiga> (view parent)
DKIM signature
missing
Download raw message
> This seems to be of highly limited utility without the ability to map
> temporaries to registers and back, ala GCC's inline assembly.

So to add context, this allows you to do things like implement 
__attribute__((weak)) and __attribute__((alias("sym"))) for cproc, both 
of which are used in musl libc. For example attribute alias maps to asm 
".set sym1, sym2".

This also allowed me to compile crt1.c from musl which uses top level 
asm in this way. In the end I was able to compile a patched musl using 
this and other patches to cproc.
Details
Message ID
<c1136de0-a0dd-496d-9f3a-c1685c406374@www.fastmail.com>
In-Reply-To
<4b098abd-e440-4ec1-a303-fcc330748323@acha.ninja> (view parent)
DKIM signature
pass
Download raw message
On Fri, Oct 22, 2021, at 23:54, Andrew Chambers wrote:
> This also allowed me to compile crt1.c from musl which uses top level 
> asm in this way. In the end I was able to compile a patched musl using 
> this and other patches to cproc.

Hi Andrew, thanks for your patch! Would you mind linking the patches
to cproc?

-- 
QC
Details
Message ID
<519c4519-28fe-5b47-2814-7fc7ca13b486@acha.ninja>
In-Reply-To
<c1136de0-a0dd-496d-9f3a-c1685c406374@www.fastmail.com> (view parent)
DKIM signature
missing
Download raw message
 > Hi Andrew, thanks for your patch! Would you mind linking the patches
 > to cproc?


Simple top level 'basic asm' - http://sprunge.us/NclFXg

I also absolutely brutalized musl libc here:

https://git.sr.ht/~ach/cprocmusl/commit/2d0e166e92637649b982e15ebf630deaf28a5a64
Quentin Rameau
Details
Message ID
<20211023055500.1daa632e@tpx.quinq.eu.org>
In-Reply-To
<20211022040849.1916823-1-ac@acha.ninja> (view parent)
DKIM signature
permerror
Download raw message
Hi AC,

> ---
>  all.h          |  3 ++-
>  doc/il.txt     | 14 ++++++++++++++
>  gas.c          | 19 +++++++++++++++++++
>  main.c         | 10 +++++++++-
>  parse.c        | 16 ++++++++++++++--
>  test/tlasm.ssa | 10 ++++++++++
>  tools/lexh.c   |  2 +-
>  7 files changed, 69 insertions(+), 5 deletions(-)
>  create mode 100644 test/tlasm.ssa
> 
> diff --git a/all.h b/all.h
> index 4b9eb0e..de5719d 100644
> --- a/all.h
> +++ b/all.h
> @@ -458,7 +458,7 @@ bshas(BSet *bs, uint elt)
>  
>  /* parse.c */
>  extern Op optab[NOp];
> -void parse(FILE *, char *, void (Dat *), void (Fn *));
> +void parse(FILE *, char *, void (Dat *), void (Fn *), void (char *));
>  void printfn(Fn *, FILE *);
>  void printref(Ref, Fn *, FILE *);
>  void err(char *, ...) __attribute__((noreturn));
> @@ -516,6 +516,7 @@ void rega(Fn *);
>  /* gas.c */
>  extern char *gasloc;
>  extern char *gassym;
> +void gasemittlasm(char *, FILE *);
>  void gasemitdat(Dat *, FILE *);
>  int gasstash(void *, int);
>  void gasemitfin(FILE *);
> diff --git a/doc/il.txt b/doc/il.txt
> index d1ed755..8e68fa4 100644
> --- a/doc/il.txt
> +++ b/doc/il.txt
> @@ -19,6 +19,7 @@
>        * <@ Aggregate Types >
>        * <@ Data >
>        * <@ Functions >
> +      * <@ Inline Assembly >
>    5. <@ Control >
>        * <@ Blocks >
>        * <@ Jumps >
> @@ -384,6 +385,19 @@ is provided in the call instructions.
>  The syntax and semantics for the body of functions
>  are described in the <@ Control > section.
>  
> +~ Inline Assembly
> +~~~~~~~~~~~~~~~~~
> +
> +    `bnf
> +    ASMDEF :=
> +        'asm' '"' ... '"'
> +
> +Top level inline assembly is copied verbatim into the output with the
> +following exceptions:
> +
> +  * A single leading newline will stripped if present.
> +  * '\' followed by '"' or '\' will omit the leading '\'.
> +
>  - 5. Control
>  ------------
>  
> diff --git a/gas.c b/gas.c
> index 8c31794..5e46f8d 100644
> --- a/gas.c
> +++ b/gas.c
> @@ -3,6 +3,25 @@
>  
>  char *gasloc, *gassym;
>  
> +void gasemittlasm(char *s, FILE *f) {
> +	int esc;
> +	fputs("/* begin inline asm */\n", f);
> +	esc = 0;
> +	s += s[1] == '\n' ? 2 : 1;

This would be UB if s is an empty string, could such thing happen?

> +	while (s[1]) {

This truncates the last char of s always.

> +		if (!esc && *s == '\\') {
> +			esc = 1;
> +			continue;
> +		}
> +		if (esc && *s != '"' && *s != '\\')
> +			fputc('\\', f);
> +		fputc(*s, f);
> +		esc = 0;
> +		s++;
> +	}
> +	fputs("/* end inline asm */\n", f);

It seems there's a missing newline before printing this string, or is
it part of s always?

> +}

I would suggest something like:

void gasemittlasm(char *s, FILE *f) {
	char c;

	puts("/* begin inline asm */");
	for (; *s; ++s) {
		if (*s == '\\') {
			c = s[1];
			if (c && (c == '"' || c == '\\'))
				++s;
		}
		fputc(*s, f);
	}
	puts("\n/* end inline asm */");
}

> +
>  void
>  gasemitdat(Dat *d, FILE *f)
>  {
> diff --git a/main.c b/main.c
> index abef591..390b122 100644
> --- a/main.c
> +++ b/main.c
> @@ -38,6 +38,14 @@ char debug['Z'+1] = {
>  static FILE *outf;
>  static int dbg;
>  
> +static void
> +tlasm(char *s)
> +{
> +	if (dbg)
> +		return;
> +	gasemittlasm(s, outf);
> +}
> +
>  static void
>  data(Dat *d)
>  {
> @@ -190,7 +198,7 @@ main(int ac, char *av[])
>  				exit(1);
>  			}
>  		}
> -		parse(inf, f, data, func);
> +		parse(inf, f, data, func, tlasm);
>  	} while (++optind < ac);
>  
>  	if (!dbg) {
> diff --git a/parse.c b/parse.c
> index da88f58..b49e422 100644
> --- a/parse.c
> +++ b/parse.c
> @@ -37,6 +37,7 @@ enum {
>  	Tjmp,
>  	Tjnz,
>  	Tret,
> +	Tasm,
>  	Texport,
>  	Tfunc,
>  	Ttype,
> @@ -87,6 +88,7 @@ static char *kwmap[Ntok] = {
>  	[Tjmp] = "jmp",
>  	[Tjnz] = "jnz",
>  	[Tret] = "ret",
> +	[Tasm] = "asm",
>  	[Texport] = "export",
>  	[Tfunc] = "function",
>  	[Ttype] = "type",
> @@ -109,7 +111,7 @@ enum {
>  	TMask = 16383, /* for temps hash */
>  	BMask = 8191, /* for blocks hash */
>  
> -	K = 3233235, /* found using tools/lexh.c */
> +	K = 4331239, /* found using tools/lexh.c */
>  	M = 23,
>  };
>  
> @@ -785,6 +787,13 @@ typecheck(Fn *fn)
>  	}
>  }
>  
> +static char *
> +parsetlasm(void) {
> +	if (nextnl() != Tstr)
> +		err("string literal expected");
> +	return tokval.str;
> +}
> +
>  static Fn *
>  parsefn(int export)
>  {
> @@ -1065,7 +1074,7 @@ Done:
>  }
>  
>  void
> -parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
> +parse(FILE *f, char *path, void data(Dat *), void func(Fn *), void tlasm(char *))
>  {
>  	int t, export;
>  
> @@ -1081,6 +1090,9 @@ parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
>  		switch (nextnl()) {
>  		default:
>  			err("top-level definition expected");
> +		case Tasm:
> +			tlasm(parsetlasm());
> +			break;
>  		case Texport:
>  			export = 1;
>  			t = nextnl();
> diff --git a/test/tlasm.ssa b/test/tlasm.ssa
> new file mode 100644
> index 0000000..eb42609
> --- /dev/null
> +++ b/test/tlasm.ssa
> @@ -0,0 +1,10 @@
> +
> +asm "
> +/* do nothing \\ \n\n \" */
> +"
> +
> +export
> +function w $main() {
> +@start
> +	ret 0
> +}
> diff --git a/tools/lexh.c b/tools/lexh.c
> index 2ebb022..30874c9 100644
> --- a/tools/lexh.c
> +++ b/tools/lexh.c
> @@ -24,7 +24,7 @@ char *tok[] = {
>  	"cgtd", "cged", "cned", "ceqd", "cod", "cuod",
>  	"vaarg", "vastart", "...", "env",
>  
> -	"call", "phi", "jmp", "jnz", "ret", "export",
> +	"call", "phi", "jmp", "jnz", "ret", "asm", "export",
>  	"function", "type", "data", "section", "align",
>  	"l", "w", "h", "b", "d", "s", "z", "loadw", "loadl",
>  	"loads", "loadd", "alloc1", "alloc2",
> -- 
> 2.33.0
> 
Details
Message ID
<6571bdd3-a2b7-2ea8-1709-0c4979750cc4@acha.ninja>
In-Reply-To
<20211023055500.1daa632e@tpx.quinq.eu.org> (view parent)
DKIM signature
missing
Download raw message
> This would be UB if s is an empty string, could such thing happen?
No - the string is surrounded by '"'.
> This truncates the last char of s always.
As intended.
> It seems there's a missing newline before printing this string, or is
> it part of s always?

This is a bug, i have fixed it locally by printing a newline only if the 
inline asm does not have one already.
Details
Message ID
<CF6KZSXLV1ZH.2B5VP5IS0FBF2@taiga>
In-Reply-To
<4b098abd-e440-4ec1-a303-fcc330748323@acha.ninja> (view parent)
DKIM signature
fail
Download raw message
DKIM signature: fail
On Fri Oct 22, 2021 at 11:54 PM CEST, Andrew Chambers wrote:
> So to add context, this allows you to do things like implement
> __attribute__((weak)) and __attribute__((alias("sym"))) for cproc, both
> of which are used in musl libc. For example attribute alias maps to asm
> ".set sym1, sym2".
>
> This also allowed me to compile crt1.c from musl which uses top level
> asm in this way. In the end I was able to compile a patched musl using
> this and other patches to cproc.

It would be nice to make sure that the syntax is extensible in the
future should someone come along to add register mapping to it, so that
we needn't break existing programs to do so.
Details
Message ID
<1f7cdd52-5f44-7cd9-ef29-8a940fb0eb47@acha.ninja>
In-Reply-To
<CF6KZSXLV1ZH.2B5VP5IS0FBF2@taiga> (view parent)
DKIM signature
missing
Download raw message
> It would be nice to make sure that the syntax is extensible in the
> future should someone come along to add register mapping to it, so that
> we needn't break existing programs to do so.


I suppose the main difference between top level assembly and function 
level assembly would be 2/3 extra arguments in the function context and 
perhaps some extra escaping rules with regard to input output arguments.

asm ".section foobar"

vs

function $foo() {
   asm "movl %0, %1", [%v1], [%v2]
}

My feeling is they would largely be compatible.
Reply to thread Export thread (mbox)