~mpu/qbe

6 4

[PATCH] Add module-level inline assembly

Details
Message ID
<01ad4ad3-b494-4b2e-b9af-0f299d7ef777@notgull.net>
DKIM signature
missing
Download raw message
This patch adds module-level inline assembly. The main idea is to
introduce an "asm" keyword, which takes a string which is echoed
straight into the output file. The only post-processing done on the
string is escaping it.

This feature roughly corresponds to the top-level "asm" macro in most C
compilers or the "global_asm!" macro in Rust. I've added documentation
of this feature to "il.txt" as well.

I have not implemented instruction-level inline assembly, since it is
more controversial and will likely impact more of the codebase.

This is my first patch for this codebase. I may not be aware of some
code idioms.

Signed-off-by: John Nunley <dev@notgull.net>
---
  all.h               |  2 +-
  doc/il.txt          | 16 ++++++++++++++++
  main.c              |  8 +++++++-
  parse.c             | 37 +++++++++++++++++++++++++++++++++++--
  test/global_asm.ssa | 30 ++++++++++++++++++++++++++++++
  tools/lexh.c        |  2 +-
  6 files changed, 90 insertions(+), 5 deletions(-)
  create mode 100644 test/global_asm.ssa

diff --git a/all.h b/all.h
index 97cc41c..c4e176d 100644
--- a/all.h
+++ b/all.h
@@ -515,7 +515,7 @@ bshas(BSet *bs, uint elt)

  /* parse.c */
  extern Op optab[NOp];
-void parse(FILE *, char *, void (char *), void (Dat *), void (Fn *));
+void parse(FILE *, char *, void (char *), void (Dat *), void (Fn *), 
void (const char *));
  void printfn(Fn *, FILE *);
  void printref(Ref, Fn *, FILE *);
  void err(char *, ...) __attribute__((noreturn));
diff --git a/doc/il.txt b/doc/il.txt
index 7ec5fd0..f37bb3e 100644
--- a/doc/il.txt
+++ b/doc/il.txt
@@ -510,6 +510,22 @@ necessary to compile a call is in the instruction 
itself.
  The syntax and semantics for the body of functions
  are described in the <@ Control > section.

+~ Module-Level Inline Assembly
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    `bnf
+    ASM := 'asm' '"' ... '"'
+
+It is often necessary to bypass QBE in order to invoke
+instructions or create assembly code that QBE cannot
+emit. For instance, libc implementations will need to
+invoke a "syscall" instruction, which QBE cannot emit.
+One possible way to invoke these instructions is with
+module-level inline assembly.
+
+The contents of the string are echoed directly to the
+output file that QBE is compiling.
+
  - 6. Control
  ------------

diff --git a/main.c b/main.c
index 5ecb4d0..ebbf067 100644
--- a/main.c
+++ b/main.c
@@ -113,6 +113,12 @@ dbgfile(char *fn)
      emitdbgfile(fn, outf);
  }

+static void
+glasm(const char *text)
+{
+    fprintf(outf, "%s", text);
+}
+
  int
  main(int ac, char *av[])
  {
@@ -187,7 +193,7 @@ main(int ac, char *av[])
                  exit(1);
              }
          }
-        parse(inf, f, dbgfile, data, func);
+        parse(inf, f, dbgfile, data, func, glasm);
          fclose(inf);
      } while (++optind < ac);

diff --git a/parse.c b/parse.c
index e896679..9e93fd3 100644
--- a/parse.c
+++ b/parse.c
@@ -41,6 +41,7 @@ enum Token {
      Talloc1,
      Talloc2,

+    Tasm,
      Tblit,
      Tcall,
      Tenv,
@@ -100,6 +101,7 @@ static char *kwmap[Ntok] = {
      [Tloadd] = "loadd",
      [Talloc1] = "alloc1",
      [Talloc2] = "alloc2",
+    [Tasm] = "asm",
      [Tblit] = "blit",
      [Tcall] = "call",
      [Tenv] = "env",
@@ -137,7 +139,7 @@ enum {
      TMask = 16383, /* for temps hash */
      BMask = 8191, /* for blocks hash */

-    K = 11183273, /* found using tools/lexh.c */
+    K = 15286327, /* found using tools/lexh.c */
      M = 23,
  };

@@ -1198,8 +1200,36 @@ parselnk(Lnk *lnk)
          }
  }

+static void
+parseglasm(void glasm(const char *))
+{
+    const char *t;
+    char *scont, *p;
+    int len;
+
+    expect(Tstr);
+    t = tokval.str;
+    assert(*t++ == '"');
+
+    len = strlen(t);
+    scont = malloc(len);
+    if (!scont)
+        die("unable to allocate memory");
+    memcpy(scont, t, len);
+
+    assert(scont[len-1] == '"');
+    scont[len-1] = '\0';
+
+    p = strtok(scont, "\\");
+    do glasm(p);
+    while ((p = strtok(NULL, "\\")));
+
+    glasm("\n");
+    free(scont);
+}
+
  void
-parse(FILE *f, char *path, void dbgfile(char *), void data(Dat *), void 
func(Fn *))
+parse(FILE *f, char *path, void dbgfile(char *), void data(Dat *), void 
func(Fn *), void glasm(const char *))
  {
      Lnk lnk;
      uint n;
@@ -1220,6 +1250,9 @@ parse(FILE *f, char *path, void dbgfile(char *), 
void data(Dat *), void func(Fn
              expect(Tstr);
              dbgfile(tokval.str);
              break;
+        case Tasm:
+            parseglasm(glasm);
+            break;
          case Tfunc:
              lnk.align = 16;
              func(parsefn(&lnk));
diff --git a/test/global_asm.ssa b/test/global_asm.ssa
new file mode 100644
index 0000000..4c25bed
--- /dev/null
+++ b/test/global_asm.ssa
@@ -0,0 +1,30 @@
+# tests global assembly
+
+asm "
+.data
+.balign 8
+x:
+    .ascii \"foobar\"
+
+.text
+.balign 16
+.globl asm_function
+asm_function:
+    ret
+.type asm_function, @function
+.size asm_function, .-asm_function
+"
+
+export function $call_it() {
+@start
+    call $asm_function ()
+    ret
+}
+
+# >>> driver
+# extern void call_it();
+# int main() {
+#     call_it();
+#     return 0;
+# }
+# <<<
diff --git a/tools/lexh.c b/tools/lexh.c
index efc30fe..fea2b5e 100644
--- a/tools/lexh.c
+++ b/tools/lexh.c
@@ -25,7 +25,7 @@ char *tok[] = {
      "cgtd", "cged", "cned", "ceqd", "cod", "cuod",
      "vaarg", "vastart", "...", "env", "dbgloc",

-    "call", "phi", "jmp", "jnz", "ret", "hlt", "export",
+    "asm", "call", "phi", "jmp", "jnz", "ret", "hlt", "export",
      "function", "type", "data", "section", "align", "dbgfile",
      "blit", "l", "w", "sh", "uh", "h", "sb", "ub", "b",
      "d", "s", "z", "loadw", "loadl", "loads", "loadd",
-- 
2.34.1
Details
Message ID
<D5ZXVIMMSDJH.7U9JIZ8T74IZ@d2evs.net>
In-Reply-To
<01ad4ad3-b494-4b2e-b9af-0f299d7ef777@notgull.net> (view parent)
DKIM signature
pass
Download raw message
i'm not sure this is all that helpful from hare's perspective - the main 
reason we'd want qbe to allow us to pass asm through is to be able to 
get rid of the function call overhead for asm bits, which this wouldn't 
allow. i don't know if this would be useful to other folks, but we 
already have the ability to add global-level assembly through separate 
.s files which are compiled into the final binary

(i might see this being useful for eg. passing through more complete 
debug info, but my understanding is that the current plan for that would 
be to make qbe smart enough to generate debug info itself)
Details
Message ID
<cabb9e0f-0212-43a7-b20d-b6014f4fe019@notgull.net>
In-Reply-To
<D5ZXVIMMSDJH.7U9JIZ8T74IZ@d2evs.net> (view parent)
DKIM signature
missing
Download raw message
> i'm not sure this is all that helpful from hare's perspective

I believe it is impossible to implement certain constructs in C and Rust
without this feature. This patch is mostly intended for them. If Hare
doesn't use it, that's okay.

Also, I appear to have messed up the patch formatting. git send-email
wasn't working for me, so I foolishly tried to just paste the patch into
Thunderbird and send it on. Let me try again.
Details
Message ID
<4D1131715D19743AC8FF0ABBE1564F9F@eigenstate.org>
In-Reply-To
<cabb9e0f-0212-43a7-b20d-b6014f4fe019@notgull.net> (view parent)
DKIM signature
permerror
Download raw message
Quoth John Nunley <dev@notgull.net>:
> > i'm not sure this is all that helpful from hare's perspective
> 
> I believe it is impossible to implement certain constructs in C and Rust
> without this feature. This patch is mostly intended for them. If Hare
> doesn't use it, that's okay.
> 
> Also, I appear to have messed up the patch formatting. git send-email
> wasn't working for me, so I foolishly tried to just paste the patch into
> Thunderbird and send it on. Let me try again.
> 

I don't believe C needs it for anything.
Details
Message ID
<a81eca08-a5b3-4a02-acd3-75c3eebc9c94@notgull.net>
In-Reply-To
<4D1131715D19743AC8FF0ABBE1564F9F@eigenstate.org> (view parent)
DKIM signature
missing
Download raw message
> I don't believe C needs it for anything.

C needs it for the top-level "asm" macro, right?

I guess you can push it into a ".S" file and compile that into the
executable, but that seems kind of hacky.
Details
Message ID
<CD92E43D263C9A892A44C06E87233A74@eigenstate.org>
In-Reply-To
<a81eca08-a5b3-4a02-acd3-75c3eebc9c94@notgull.net> (view parent)
DKIM signature
permerror
Download raw message
Quoth John Nunley <dev@notgull.net>:
> > I don't believe C needs it for anything.
> 
> C needs it for the top-level "asm" macro, right?
> 
> I guess you can push it into a ".S" file and compile that into the
> executable, but that seems kind of hacky.

C doesn't have a top level asm macro; there are some
compilers that have extensions for it, but not all
implementations have it, and the implementations are
not all compatible. They also have different levels
of integration with the register allocator.
Details
Message ID
<8855149d-57b1-4d48-a1a3-1288b4e762cd@app.fastmail.com>
In-Reply-To
<CD92E43D263C9A892A44C06E87233A74@eigenstate.org> (view parent)
DKIM signature
pass
Download raw message
My sentiment around inline asm is that we should
either support it properly within functions (that
is similar to handling regular calls), or not at
all. I'm not too interested in hacks that push
things verbatim to the assembler.
Reply to thread Export thread (mbox)