Quentin Carbonneaux: 4 parse sb,ub,sh,uh abi types add new target-specific abi0 pass refine width of parsb/ub/sh/uh ops new arm64_apple target 20 files changed, 393 insertions(+), 111 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~mpu/qbe/patches/35077/mbox | git am -3Learn more about email & git
--- all.h | 7 +++-- ops.h | 8 +++++ parse.c | 83 ++++++++++++++++++++++++++++++++++++---------------- tools/lexh.c | 7 +++-- 4 files changed, 74 insertions(+), 31 deletions(-) diff --git a/all.h b/all.h index 1ecea8e..d7b75b5 100644 --- a/all.h +++ b/all.h @@ -144,8 +144,9 @@ enum O { enum J { Jxxx, #define JMPS(X) \ - X(ret0) X(retw) X(retl) X(rets) \ - X(retd) X(retc) X(jmp) X(jnz) \ + X(retw) X(retl) X(rets) X(retd) \ + X(retsb) X(retub) X(retsh) X(retuh) \ + X(retc) X(ret0) X(jmp) X(jnz) \ X(jfieq) X(jfine) X(jfisge) X(jfisgt) \ X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \ X(jfiule) X(jfiult) X(jffeq) X(jffge) \ @@ -181,7 +182,7 @@ enum { #define isext(o) INRANGE(o, Oextsb, Oextuw) #define ispar(o) INRANGE(o, Opar, Opare) #define isarg(o) INRANGE(o, Oarg, Oargv) -#define isret(j) INRANGE(j, Jret0, Jretc) +#define isret(j) INRANGE(j, Jretw, Jret0) enum { Kx = -1, /* "top" class (see usecheck() and clsmerge()) */ diff --git a/ops.h b/ops.h index 285bc5c..3d65081 100644 --- a/ops.h +++ b/ops.h @@ -144,9 +144,17 @@ O(rnez, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0) /* Arguments, Parameters, and Calls */ O(par, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0) +O(parsb, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0) +O(parub, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0) +O(parsh, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0) +O(paruh, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0) O(parc, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0) O(pare, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0) O(arg, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0) V(0) +O(argsb, T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0) +O(argub, T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0) +O(argsh, T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0) +O(arguh, T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0) O(argc, T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0) V(0) O(arge, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0) O(argv, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0) diff --git a/parse.c b/parse.c index 1912c8b..13843c6 100644 --- a/parse.c +++ b/parse.c @@ -3,8 +3,15 @@ #include <stdarg.h> enum { - Ke = -2, /* Erroneous mode */ - Km = Kl, /* Memory pointer */ + Ksb = 4, /* matches Oarg/Opar/Jret */ + Kub, + Ksh, + Kuh, + Kc, + K0, + + Ke = -2, /* erroneous mode */ + Km = Kl, /* memory pointer */ }; Op optab[NOp] = { @@ -45,7 +52,11 @@ enum { Talign, Tl, Tw, + Tsh, + Tuh, Th, + Tsb, + Tub, Tb, Td, Ts, @@ -93,12 +104,16 @@ static char *kwmap[Ntok] = { [Tdata] = "data", [Tsection] = "section", [Talign] = "align", - [Tl] = "l", - [Tw] = "w", - [Th] = "h", + [Tsb] = "sb", + [Tub] = "ub", + [Tsh] = "sh", + [Tuh] = "uh", [Tb] = "b", - [Td] = "d", + [Th] = "h", + [Tw] = "w", + [Tl] = "l", [Ts] = "s", + [Td] = "d", [Tz] = "z", [Tdots] = "...", }; @@ -109,7 +124,7 @@ enum { TMask = 16383, /* for temps hash */ BMask = 8191, /* for blocks hash */ - K = 5041217, /* found using tools/lexh.c */ + K = 9583425, /* found using tools/lexh.c */ M = 23, }; @@ -427,7 +442,15 @@ parsecls(int *tyn) err("invalid class specifier"); case Ttyp: *tyn = findtyp(ntyp); - return 4; + return Kc; + case Tsb: + return Ksb; + case Tub: + return Kub; + case Tsh: + return Ksh; + case Tuh: + return Kuh; case Tw: return Kw; case Tl: @@ -482,16 +505,21 @@ parserefl(int arg) err("invalid argument"); if (!arg && rtype(r) != RTmp) err("invalid function parameter"); - if (k == 4) + if (env) + if (arg) + *curi = (Ins){Oarge, k, R, {r}}; + else + *curi = (Ins){Opare, k, r, {R}}; + else if (k == Kc) if (arg) *curi = (Ins){Oargc, Kl, R, {TYPE(ty), r}}; else *curi = (Ins){Oparc, Kl, r, {TYPE(ty)}}; - else if (env) + else if (k >= Ksb) if (arg) - *curi = (Ins){Oarge, k, R, {r}}; + *curi = (Ins){Oargsb+(k-Ksb), Kw, R, {r}}; else - *curi = (Ins){Opare, k, r, {R}}; + *curi = (Ins){Oparsb+(k-Ksb), Kw, r, {R}}; else if (arg) *curi = (Ins){Oarg, k, R, {r}}; @@ -578,14 +606,10 @@ parseline(PState ps) expect(Tnl); return PPhi; case Tret: - curb->jmp.type = (int[]){ - Jretw, Jretl, - Jrets, Jretd, - Jretc, Jret0 - }[rcls]; + curb->jmp.type = Jretw + rcls; if (peek() == Tnl) curb->jmp.type = Jret0; - else if (rcls < 5) { + else if (rcls != K0) { r = parseref(); if (req(r, R)) err("invalid return value"); @@ -632,11 +656,13 @@ DoOp: parserefl(1); op = Ocall; expect(Tnl); - if (k == 4) { + if (k == Kc) { k = Kl; arg[1] = TYPE(ty); } else arg[1] = R; + if (k >= Ksb) + k = Kw; goto Ins; } if (op == Tloadw) @@ -645,7 +671,7 @@ DoOp: op = Oload; if (op == Talloc1 || op == Talloc2) op = Oalloc; - if (k == 4) + if (k >= Ksb) err("size class must be w, l, s, or d"); if (op >= NPubOp) err("invalid instruction"); @@ -774,10 +800,13 @@ typecheck(Fn *fn) } r = b->jmp.arg; if (isret(b->jmp.type)) { - if (b->jmp.type == Jretc) { - if (!usecheck(r, Kl, fn)) - goto JErr; - } else if (!usecheck(r, b->jmp.type-Jretw, fn)) + if (b->jmp.type == Jretc) + k = Kl; + else if (b->jmp.type >= Jretsb) + k = Kw; + else + k = b->jmp.type - Jretw; + if (!usecheck(r, k, fn)) goto JErr; } if (b->jmp.type == Jjnz && !usecheck(r, Kw, fn)) @@ -818,7 +847,7 @@ parsefn(Lnk *lnk) if (peek() != Tglo) rcls = parsecls(&curf->retty); else - rcls = 5; + rcls = K0; if (next() != Tglo) err("function name expected"); strncpy(curf->name, tokval.str, NString-1); @@ -1266,6 +1295,10 @@ printfn(Fn *fn, FILE *f) } switch (b->jmp.type) { case Jret0: + case Jretsb: + case Jretub: + case Jretsh: + case Jretuh: case Jretw: case Jretl: case Jrets: diff --git a/tools/lexh.c b/tools/lexh.c index 8d0af21..1aea3e0 100644 --- a/tools/lexh.c +++ b/tools/lexh.c @@ -27,8 +27,9 @@ char *tok[] = { "call", "phi", "jmp", "jnz", "ret", "export", "function", "type", "data", "section", "align", - "l", "w", "h", "b", "d", "s", "z", "loadw", "loadl", - "loads", "loadd", "alloc1", "alloc2", + "l", "w", "sh", "uh", "h", "sb", "ub", "b", + "d", "s", "z", "loadw", "loadl", "loads", "loadd", + "alloc1", "alloc2", }; enum { @@ -69,7 +70,7 @@ main() th[i] = h; } - for (i=0; 1<<i < Ntok; ++i); + for (i=9; 1<<i < Ntok; ++i); M = 32 - i; for (;; --M) { -- 2.37.2
The general idea is to give abis a chance to talk before we've done all the optimizations. Currently, all targets eliminate {par,arg,ret}{sb,ub,...} during this pass. The forthcoming arm64_apple will, however, insert proper extensions during abi0. Moving forward abis can, for example, lower small-aggregates passing there so that memory optimizations can interact better with function calls. --- Makefile | 4 ++-- abi.c | 25 +++++++++++++++++++++++++ all.h | 9 ++++++++- amd64/targ.c | 3 ++- arm64/targ.c | 3 ++- main.c | 3 ++- rv64/targ.c | 3 ++- 7 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 abi.c diff --git a/Makefile b/Makefile index 9c9b401..64878c7 100644 --- a/Makefile +++ b/Makefile @@ -4,8 +4,8 @@ PREFIX = /usr/local BINDIR = $(PREFIX)/bin -COMMOBJ = main.o util.o parse.o cfg.o mem.o ssa.o alias.o load.o copy.o \ - fold.o live.o spill.o rega.o emit.o +COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \ + copy.o fold.o live.o spill.o rega.o emit.o AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o diff --git a/abi.c b/abi.c new file mode 100644 index 0000000..9c83497 --- /dev/null +++ b/abi.c @@ -0,0 +1,25 @@ +#include "all.h" + +/* eliminate sub-word abi op + * variants for targets that + * treat char/short/... as + * words with arbitrary high + * bits + */ +void +elimsb(Fn *fn) +{ + Blk *b; + Ins *i; + + for (b=fn->start; b; b=b->link) { + for (i=b->ins; i<&b->ins[b->nins]; i++) { + if (isargbh(i->op)) + i->op = Oarg; + if (isparbh(i->op)) + i->op = Opar; + } + if (isretbh(b->jmp.type)) + b->jmp.type = Jretw; + } +} diff --git a/all.h b/all.h index d7b75b5..04050d4 100644 --- a/all.h +++ b/all.h @@ -52,7 +52,8 @@ struct Target { bits (*retregs)(Ref, int[2]); bits (*argregs)(Ref, int[2]); int (*memargs)(int); - void (*abi)(Fn *); + void (*abi0)(Fn *); + void (*abi1)(Fn *); void (*isel)(Fn *); void (*emitfn)(Fn *, FILE *); void (*emitfin)(FILE *); @@ -183,6 +184,9 @@ enum { #define ispar(o) INRANGE(o, Opar, Opare) #define isarg(o) INRANGE(o, Oarg, Oargv) #define isret(j) INRANGE(j, Jretw, Jret0) +#define isparbh(o) INRANGE(o, Oparsb, Oparuh) +#define isargbh(o) INRANGE(o, Oargsb, Oarguh) +#define isretbh(j) INRANGE(j, Jretsb, Jretuh) enum { Kx = -1, /* "top" class (see usecheck() and clsmerge()) */ @@ -478,6 +482,9 @@ void printfn(Fn *, FILE *); void printref(Ref, Fn *, FILE *); void err(char *, ...) __attribute__((noreturn)); +/* abi.c */ +void elimsb(Fn *); + /* cfg.c */ Blk *blknew(void); void edgedel(Blk *, Blk **); diff --git a/amd64/targ.c b/amd64/targ.c index e58ba2f..74fba4d 100644 --- a/amd64/targ.c +++ b/amd64/targ.c @@ -24,7 +24,8 @@ amd64_memargs(int op) .retregs = amd64_sysv_retregs, \ .argregs = amd64_sysv_argregs, \ .memargs = amd64_memargs, \ - .abi = amd64_sysv_abi, \ + .abi0 = elimsb, \ + .abi1 = amd64_sysv_abi, \ .isel = amd64_isel, \ Target T_amd64_sysv = { diff --git a/arm64/targ.c b/arm64/targ.c index ddaee2f..6079236 100644 --- a/arm64/targ.c +++ b/arm64/targ.c @@ -38,7 +38,8 @@ Target T_arm64 = { .retregs = arm64_retregs, .argregs = arm64_argregs, .memargs = arm64_memargs, - .abi = arm64_abi, + .abi0 = elimsb, + .abi1 = arm64_abi, .isel = arm64_isel, .emitfn = arm64_emitfn, .emitfin = elf_emitfin, diff --git a/main.c b/main.c index e82b062..253d0c5 100644 --- a/main.c +++ b/main.c @@ -56,6 +56,7 @@ func(Fn *fn) fprintf(stderr, "\n> After parsing:\n"); printfn(fn, stderr); } + T.abi0(fn); fillrpo(fn); fillpreds(fn); filluse(fn); @@ -71,7 +72,7 @@ func(Fn *fn) copy(fn); filluse(fn); fold(fn); - T.abi(fn); + T.abi1(fn); fillpreds(fn); filluse(fn); T.isel(fn); diff --git a/rv64/targ.c b/rv64/targ.c index 70701db..c0e5e18 100644 --- a/rv64/targ.c +++ b/rv64/targ.c @@ -44,7 +44,8 @@ Target T_rv64 = { .retregs = rv64_retregs, .argregs = rv64_argregs, .memargs = rv64_memargs, - .abi = rv64_abi, + .abi0 = elimsb, + .abi1 = rv64_abi, .isel = rv64_isel, .emitfn = rv64_emitfn, .emitfin = elf_emitfin, -- 2.37.2
--- ssa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ssa.c b/ssa.c index 2de02d1..126113d 100644 --- a/ssa.c +++ b/ssa.c @@ -77,6 +77,8 @@ filluse(Fn *fn) if (!req(i->to, R)) { assert(rtype(i->to) == RTmp); w = WFull; + if (isparbh(i->op)) + w = Wsb + (i->op - Oparsb); if (isload(i->op) && i->op != Oload) w = Wsb + (i->op - Oloadsb); if (isext(i->op)) -- 2.37.2
Should make qbe work on apple arm-based hardware. --- Makefile | 9 +- arm64/abi.c | 233 ++++++++++++++++++++++++++++++++++++++++++-------- arm64/all.h | 3 + arm64/emit.c | 58 ++++++++----- arm64/targ.c | 38 +++++--- main.c | 2 + test/dark.ssa | 2 +- tools/test.sh | 2 +- 8 files changed, 274 insertions(+), 73 deletions(-) diff --git a/Makefile b/Makefile index 64878c7..674f850 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,14 @@ main.o: config.h config.h: @case `uname` in \ *Darwin*) \ - echo "#define Deftgt T_amd64_apple"; \ + case `uname -m` in \ + *arm64*) \ + echo "#define Deftgt T_arm64_apple";\ + ;; \ + *) \ + echo "#define Deftgt T_amd64_apple";\ + ;; \ + esac \ ;; \ *) \ case `uname -m` in \ diff --git a/arm64/abi.c b/arm64/abi.c index b2b5973..dfb95e3 100644 --- a/arm64/abi.c +++ b/arm64/abi.c @@ -1,5 +1,6 @@ #include "all.h" +typedef struct Abi Abi; typedef struct Class Class; typedef struct Insl Insl; typedef struct Params Params; @@ -9,6 +10,12 @@ enum { Cptr = 2, /* replaced by a pointer */ }; +struct Abi { + void (*vastart)(Fn *, Params, Ref); + void (*vaarg)(Fn *, Blk *, Ins *); + int apple; +}; + struct Class { char class; char ishfa; @@ -17,6 +24,7 @@ struct Class { uchar size; } hfa; uint size; + uint align; Typ *t; uchar nreg; uchar ngp; @@ -33,11 +41,15 @@ struct Insl { struct Params { uint ngp; uint nfp; - uint nstk; + uint stk; }; static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7}; static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7}; +static int store[] = { + [Kw] = Ostorew, [Kl] = Ostorel, + [Ks] = Ostores, [Kd] = Ostored +}; /* layout of call's second argument (RCall) * @@ -92,9 +104,10 @@ typclass(Class *c, Typ *t, int *gp, int *fp) c->class = 0; c->ngp = 0; c->nfp = 0; + c->align = 8; - if (t->align > 4) - err("alignments larger than 16 are not supported"); + if (t->align > 3) + err("alignments larger than 8 are not supported"); if (t->isdark || sz > 16 || sz == 0) { /* large structs are replaced by a @@ -130,10 +143,6 @@ typclass(Class *c, Typ *t, int *gp, int *fp) static void sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn) { - static int st[] = { - [Kw] = Ostorew, [Kl] = Ostorel, - [Ks] = Ostores, [Kd] = Ostored - }; uint n; uint64_t off; Ref r; @@ -143,7 +152,7 @@ sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn) for (n=0; n<nreg; n++) { tmp[n] = newtmp("abi", cls[n], fn); r = newtmp("abi", Kl, fn); - emit(st[cls[n]], 0, R, tmp[n], r); + emit(store[cls[n]], 0, R, tmp[n], r); emit(Oadd, Kl, r, mem, getcon(off, fn)); off += KWIDE(cls[n]) ? 8 : 4; } @@ -206,12 +215,13 @@ selret(Blk *b, Fn *fn) } static int -argsclass(Ins *i0, Ins *i1, Class *carg) +argsclass(Ins *i0, Ins *i1, Class *carg, int apple) { - int envc, ngp, nfp, *gp, *fp; + int va, envc, ngp, nfp, *gp, *fp; Class *c; Ins *i; + va = 0; envc = 0; gp = gpreg; fp = fpreg; @@ -219,10 +229,32 @@ argsclass(Ins *i0, Ins *i1, Class *carg) nfp = 8; for (i=i0, c=carg; i<i1; i++, c++) switch (i->op) { + case Oargsb: + case Oargub: + case Oparsb: + case Oparub: + c->size = 1; + goto Scalar; + case Oargsh: + case Oarguh: + case Oparsh: + case Oparuh: + c->size = 2; + goto Scalar; case Opar: case Oarg: - *c->cls = i->cls; c->size = 8; + if (apple && !KWIDE(i->cls)) + c->size = 4; + Scalar: + c->align = c->size; + *c->cls = i->cls; + if (va) { + c->class |= Cstk; + c->size = 8; + c->align = 8; + break; + } if (KBASE(i->cls) == 0 && ngp > 0) { ngp--; *c->reg = *gp++; @@ -258,6 +290,7 @@ argsclass(Ins *i0, Ins *i1, Class *carg) envc = 1; break; case Oargv: + va = apple != 0; break; default: die("unreachable"); @@ -327,18 +360,23 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp) *ilp = il; } +static uint +align(uint x, uint al) +{ + return (x + al-1) & -al; +} + static void -selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) +selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp, int apple) { Ins *i; Class *ca, *c, cr; - int cty; - uint n; - uint64_t stk, off; + int op, cty; + uint n, stk, off;; Ref r, rstk, tmp[4]; ca = alloc((i1-i0) * sizeof ca[0]); - cty = argsclass(i0, i1, ca); + cty = argsclass(i0, i1, ca, apple); stk = 0; for (i=i0, c=ca; i<i1; i++, c++) { @@ -347,10 +385,12 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) stkblob(i->arg[0], c, fn, ilp); i->op = Oarg; } - if (c->class & Cstk) + if (c->class & Cstk) { + stk = align(stk, c->align); stk += c->size; + } } - stk += stk & 15; + stk = align(stk, 16); rstk = getcon(stk, fn); if (stk) emit(Oadd, Kl, TMP(SP), TMP(SP), rstk); @@ -403,9 +443,16 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) for (i=i0, c=ca; i<i1; i++, c++) { if ((c->class & Cstk) == 0) continue; - if (i->op == Oarg) { + off = align(off, c->align); + if (i->op == Oarg || isargbh(i->op)) { r = newtmp("abi", Kl, fn); - emit(Ostorel, 0, R, i->arg[0], r); + switch (c->size) { + case 1: op = Ostoreb; break; + case 2: op = Ostoreh; break; + case 4: + case 8: op = store[*c->cls]; break; + } + emit(op, 0, R, i->arg[0], r); emit(Oadd, Kl, r, TMP(SP), getcon(off, fn)); } if (i->op == Oargc) @@ -421,18 +468,19 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) } static Params -selpar(Fn *fn, Ins *i0, Ins *i1) +selpar(Fn *fn, Ins *i0, Ins *i1, int apple) { Class *ca, *c, cr; Insl *il; Ins *i; - int n, s, cty; + int op, n, cty; + uint off; Ref r, tmp[16], *t; ca = alloc((i1-i0) * sizeof ca[0]); curi = &insb[NIns]; - cty = argsclass(i0, i1, ca); + cty = argsclass(i0, i1, ca, apple); fn->reg = arm64_argregs(CALL(cty), 0); il = 0; @@ -457,26 +505,33 @@ selpar(Fn *fn, Ins *i0, Ins *i1) } t = tmp; - s = 2; + off = 0; for (i=i0, c=ca; i<i1; i++, c++) if (i->op == Oparc && !(c->class & Cptr)) { if (c->class & Cstk) { - fn->tmp[i->to.val].slot = -s; - s += c->size / 8; + off = align(off, c->align); + fn->tmp[i->to.val].slot = -(off+2); + off += c->size; } else for (n=0; n<c->nreg; n++) { r = TMP(c->reg[n]); emit(Ocopy, c->cls[n], *t++, r, R); } } else if (c->class & Cstk) { - emit(Oload, *c->cls, i->to, SLOT(-s), R); - s++; + /* todo, check Cptr && Oparc && Cstk */ + off = align(off, c->align); + if (isparbh(i->op)) + op = Oloadsb + (i->op - Oparsb); + else + op = Oload; + emit(op, *c->cls, i->to, SLOT(-(off+2)), R); + off += c->size; } else { emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R); } return (Params){ - .nstk = s - 2, + .stk = align(off, 8), .ngp = (cty >> 5) & 15, .nfp = (cty >> 9) & 15 }; @@ -514,7 +569,24 @@ chpred(Blk *b, Blk *bp, Blk *bp1) } static void -selvaarg(Fn *fn, Blk *b, Ins *i) +apple_selvaarg(Fn *fn, Blk *b, Ins *i) +{ + Ref ap, stk, stk8, c8; + + (void)b; + c8 = getcon(8, fn); + ap = i->arg[0]; + stk8 = newtmp("abi", Kl, fn); + stk = newtmp("abi", Kl, fn); + + emit(Ostorel, 0, R, stk8, ap); + emit(Oadd, Kl, stk8, stk, c8); + emit(Oload, i->cls, i->to, stk, R); + emit(Oload, Kl, stk, ap, R); +} + +static void +arm64_selvaarg(Fn *fn, Blk *b, Ins *i) { Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap; Blk *b0, *bstk, *breg; @@ -607,7 +679,21 @@ selvaarg(Fn *fn, Blk *b, Ins *i) } static void -selvastart(Fn *fn, Params p, Ref ap) +apple_selvastart(Fn *fn, Params p, Ref ap) +{ + Ref off, stk, arg; + + off = getcon(p.stk, fn); + stk = newtmp("abi", Kl, fn); + arg = newtmp("abi", Kl, fn); + + emit(Ostorel, 0, R, arg, ap); + emit(Oadd, Kl, arg, stk, off); + emit(Oaddr, Kl, stk, SLOT(-1), R); +} + +static void +arm64_selvastart(Fn *fn, Params p, Ref ap) { Ref r0, r1, rsave; @@ -615,7 +701,7 @@ selvastart(Fn *fn, Params p, Ref ap) r0 = newtmp("abi", Kl, fn); emit(Ostorel, Kw, R, r0, ap); - emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn)); + emit(Oadd, Kl, r0, rsave, getcon(p.stk + 192, fn)); r0 = newtmp("abi", Kl, fn); r1 = newtmp("abi", Kl, fn); @@ -639,8 +725,8 @@ selvastart(Fn *fn, Params p, Ref ap) emit(Oadd, Kl, r0, ap, getcon(28, fn)); } -void -arm64_abi(Fn *fn) +static void +abi(Fn *fn, Abi abi) { Blk *b; Ins *i, *i0, *ip; @@ -655,7 +741,7 @@ arm64_abi(Fn *fn) for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++) if (!ispar(i->op)) break; - p = selpar(fn, b->ins, i); + p = selpar(fn, b->ins, i, abi.apple); n = b->nins - (i - b->ins) + (&insb[NIns] - curi); i0 = alloc(n * sizeof(Ins)); ip = icpy(ip = i0, curi, &insb[NIns] - curi); @@ -682,14 +768,14 @@ arm64_abi(Fn *fn) for (i0=i; i0>b->ins; i0--) if (!isarg((i0-1)->op)) break; - selcall(fn, i0, i, &il); + selcall(fn, i0, i, &il, abi.apple); i = i0; break; case Ovastart: - selvastart(fn, p, i->arg[0]); + abi.vastart(fn, p, i->arg[0]); break; case Ovaarg: - selvaarg(fn, b, i); + abi.vaarg(fn, b, i); break; case Oarg: case Oargc: @@ -707,3 +793,74 @@ arm64_abi(Fn *fn) printfn(fn, stderr); } } + +void +arm64_abi(Fn *fn) +{ + abi(fn, (Abi){ + arm64_selvastart, + arm64_selvaarg, + 0 + }); +} + +void +apple_abi(Fn *fn) +{ + abi(fn, (Abi){ + apple_selvastart, + apple_selvaarg, + 1 + }); +} + +/* abi0 for apple target; introduces + * necessery sign extension for arg + * passing & returns + */ +void +apple_extsb(Fn *fn) +{ + Blk *b; + Ins *i0, *i1, *i; + int j, op; + Ref r; + + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + j = b->jmp.type; + if (isretbh(j)) { + r = newtmp("abi", Kw, fn); + op = Oextsb + (j - Jretsb); + emit(op, Kw, r, b->jmp.arg, R); + b->jmp.arg = r; + } + for (i=&b->ins[b->nins]; i>b->ins;) { + emiti(*--i); + if (i->op != Ocall) + continue; + for (i0=i1=i; i0>b->ins; i0--) + if (!isarg((i0-1)->op)) + break; + for (i=i1; i>i0;) { + emiti(*--i); + if (isargbh(i->op)) { + i->to = newtmp("abi", Kl, fn); + curi->arg[0] = i->to; + } + } + for (i=i1; i>i0;) + if (isargbh((--i)->op)) { + op = Oextsb + (i->op - Oargsb); + emit(op, Kw, i->to, i->arg[0], R); + } + } + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + + if (debug['A']) { + fprintf(stderr, "\n> After apple_extsb:\n"); + printfn(fn, stderr); + } +} diff --git a/arm64/all.h b/arm64/all.h index ff2b3ff..6b7f43e 100644 --- a/arm64/all.h +++ b/arm64/all.h @@ -28,6 +28,8 @@ extern int arm64_rclob[]; bits arm64_retregs(Ref, int[2]); bits arm64_argregs(Ref, int[2]); void arm64_abi(Fn *); +void apple_extsb(Fn *); +void apple_abi(Fn *); /* isel.c */ int arm64_logimm(uint64_t, int); @@ -35,3 +37,4 @@ void arm64_isel(Fn *); /* emit.c */ void arm64_emitfn(Fn *, FILE *); +void apple_emitfn(Fn *, FILE *); diff --git a/arm64/emit.c b/arm64/emit.c index 55f5ce6..18c19d2 100644 --- a/arm64/emit.c +++ b/arm64/emit.c @@ -7,6 +7,7 @@ struct E { Fn *fn; uint64_t frame; uint padding; + int apple; }; #define CMP(X) \ @@ -144,10 +145,10 @@ slot(int s, E *e) if (s == -1) return 16 + e->frame; if (s < 0) { - if (e->fn->vararg) - return 16 + e->frame + 192 - (s+2)*8; + if (e->fn->vararg && !e->apple) + return 16 + e->frame + 192 - (s+2); else - return 16 + e->frame - (s+2)*8; + return 16 + e->frame - (s+2); } else return 16 + e->padding + 4 * s; } @@ -243,8 +244,16 @@ emitf(char *s, Ins *i, E *e) } static void -loadcon(Con *c, int r, int k, FILE *f) +loadcon(Con *c, int r, int k, E *e) { + static char *ldsym[][2] = { + /* arm64 */ + [0][0] = "\tadrp\t%s, %s%s%s\n", + [0][1] = "\tadd\t%s, %s, #:lo12:%s%s%s\n", + /* apple */ + [1][0] = "\tadrp\t%s, %s%s@page%s\n", + [1][1] = "\tadd\t%s, %s, %s%s@pageoff%s\n", + }; char *rn, *l, *p, off[32]; int64_t n; int w, sh; @@ -261,24 +270,22 @@ loadcon(Con *c, int r, int k, FILE *f) off[0] = 0; l = str(c->label); p = c->local ? T.asloc : l[0] == '"' ? "" : T.assym; - fprintf(f, "\tadrp\t%s, %s%s%s\n", - rn, p, l, off); - fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n", - rn, rn, p, l, off); + fprintf(e->f, ldsym[e->apple][0], rn, p, l, off); + fprintf(e->f, ldsym[e->apple][1], rn, rn, p, l, off); return; } assert(c->type == CBits); if (!w) n = (int32_t)n; if ((n | 0xffff) == -1 || arm64_logimm(n, k)) { - fprintf(f, "\tmov\t%s, #%"PRIi64"\n", rn, n); + fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n); } else { - fprintf(f, "\tmov\t%s, #%d\n", + fprintf(e->f, "\tmov\t%s, #%d\n", rn, (int)(n & 0xffff)); for (sh=16; n>>=16; sh+=16) { if ((!w && sh == 32) || sh == 64) break; - fprintf(f, "\tmovk\t%s, #0x%x, lsl #%d\n", + fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n", rn, (uint)(n & 0xffff), sh); } } @@ -358,7 +365,7 @@ emitins(Ins *i, E *e) switch (rtype(i->arg[0])) { case RCon: c = &e->fn->con[i->arg[0].val]; - loadcon(c, i->to.val, i->cls, e->f); + loadcon(c, i->to.val, i->cls, e); break; case RSlot: i->op = Oload; @@ -450,8 +457,8 @@ framelayout(E *e) */ -void -arm64_emitfn(Fn *fn, FILE *out) +static void +emitfn(E *e) { static char *ctoa[] = { #define X(c, s) [c] = s, @@ -463,13 +470,11 @@ arm64_emitfn(Fn *fn, FILE *out) uint64_t o; Blk *b, *t; Ins *i; - E *e; - emitlnk(fn->name, &fn->lnk, ".text", out); - e = &(E){.f = out, .fn = fn}; + emitlnk(e->fn->name, &e->fn->lnk, ".text", e->f); framelayout(e); - if (e->fn->vararg) { + if (e->fn->vararg && !e->apple) { for (n=7; n>=0; n--) fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n); for (n=7; n>=0; n-=2) @@ -531,7 +536,7 @@ arm64_emitfn(Fn *fn, FILE *out) if (e->fn->dynalloc) fputs("\tmov sp, x29\n", e->f); o = e->frame + 16; - if (e->fn->vararg) + if (e->fn->vararg && !e->apple) o += 192; if (o <= 504) fprintf(e->f, @@ -589,5 +594,18 @@ arm64_emitfn(Fn *fn, FILE *out) } } id0 += e->fn->nblk; - elf_emitfnfin(e->fn->name, e->f); +} + +void +arm64_emitfn(Fn *fn, FILE *out) +{ + emitfn(&(E){.f = out, .fn = fn, .apple = 0}); + elf_emitfnfin(fn->name, out); +} + +void +apple_emitfn(Fn *fn, FILE *out) +{ + fn->lnk.align = 4; + emitfn(&(E){.f = out, .fn = fn, .apple = 1}); } diff --git a/arm64/targ.c b/arm64/targ.c index 6079236..88c40f1 100644 --- a/arm64/targ.c +++ b/arm64/targ.c @@ -25,25 +25,39 @@ arm64_memargs(int op) return 0; } +#define ARM64_COMMON \ + .gpr0 = R0, \ + .ngpr = NGPR, \ + .fpr0 = V0, \ + .nfpr = NFPR, \ + .rglob = RGLOB, \ + .nrglob = 3, \ + .rsave = arm64_rsave, \ + .nrsave = {NGPS, NFPS}, \ + .retregs = arm64_retregs, \ + .argregs = arm64_argregs, \ + .memargs = arm64_memargs, \ + .isel = arm64_isel, \ + Target T_arm64 = { .name = "arm64", - .gpr0 = R0, - .ngpr = NGPR, - .fpr0 = V0, - .nfpr = NFPR, - .rglob = RGLOB, - .nrglob = 3, - .rsave = arm64_rsave, - .nrsave = {NGPS, NFPS}, - .retregs = arm64_retregs, - .argregs = arm64_argregs, - .memargs = arm64_memargs, .abi0 = elimsb, .abi1 = arm64_abi, - .isel = arm64_isel, .emitfn = arm64_emitfn, .emitfin = elf_emitfin, .asloc = ".L", + ARM64_COMMON +}; + +Target T_arm64_apple = { + .name = "arm64_apple", + .abi0 = apple_extsb, + .abi1 = apple_abi, + .emitfn = apple_emitfn, + .emitfin = macho_emitfin, + .asloc = "L", + .assym = "_", + ARM64_COMMON }; MAKESURE(globals_are_not_arguments, diff --git a/main.c b/main.c index 253d0c5..c028503 100644 --- a/main.c +++ b/main.c @@ -21,12 +21,14 @@ char debug['Z'+1] = { extern Target T_amd64_sysv; extern Target T_amd64_apple; extern Target T_arm64; +extern Target T_arm64_apple; extern Target T_rv64; static Target *tlist[] = { &T_amd64_sysv, &T_amd64_apple, &T_arm64, + &T_arm64_apple, &T_rv64, 0 }; diff --git a/test/dark.ssa b/test/dark.ssa index de58e4c..ed9ec21 100644 --- a/test/dark.ssa +++ b/test/dark.ssa @@ -1,4 +1,4 @@ -# skip arm64 rv64 +# skip arm64 arm64_apple rv64 # a hack example, # we use a dark type to get # a pointer to the stack. diff --git a/tools/test.sh b/tools/test.sh index 4653b83..9c0f9ee 100755 --- a/tools/test.sh +++ b/tools/test.sh @@ -70,7 +70,7 @@ init() { "") case `uname` in *Darwin*) - cc="cc -Wl,-no_pie" + cc="cc" ;; *OpenBSD*) cc="cc -nopie" -- 2.37.2