~mpu/qbe

A new arm64_apple target for m1 computers v1 PROPOSED

Quentin Carbonneaux: 4
 parse sb,ub,sh,uh abi types
 add new target-specific abi0 pass
 refine width of parsb/ub/sh/uh ops
 new arm64_apple target

 20 files changed, 393 insertions(+), 111 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~mpu/qbe/patches/35077/mbox | git am -3
Learn more about email & git

[PATCH 1/4] parse sb,ub,sh,uh abi types Export this patch

---
 all.h        |  7 +++--
 ops.h        |  8 +++++
 parse.c      | 83 ++++++++++++++++++++++++++++++++++++----------------
 tools/lexh.c |  7 +++--
 4 files changed, 74 insertions(+), 31 deletions(-)

diff --git a/all.h b/all.h
index 1ecea8e..d7b75b5 100644
--- a/all.h
+++ b/all.h
@@ -144,8 +144,9 @@ enum O {
enum J {
	Jxxx,
#define JMPS(X)                                 \
	X(ret0)   X(retw)   X(retl)   X(rets)   \
	X(retd)   X(retc)   X(jmp)    X(jnz)    \
	X(retw)   X(retl)   X(rets)   X(retd)   \
	X(retsb)  X(retub)  X(retsh)  X(retuh)  \
	X(retc)   X(ret0)   X(jmp)    X(jnz)    \
	X(jfieq)  X(jfine)  X(jfisge) X(jfisgt) \
	X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \
	X(jfiule) X(jfiult) X(jffeq)  X(jffge)  \
@@ -181,7 +182,7 @@ enum {
#define isext(o) INRANGE(o, Oextsb, Oextuw)
#define ispar(o) INRANGE(o, Opar, Opare)
#define isarg(o) INRANGE(o, Oarg, Oargv)
#define isret(j) INRANGE(j, Jret0, Jretc)
#define isret(j) INRANGE(j, Jretw, Jret0)

enum {
	Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
diff --git a/ops.h b/ops.h
index 285bc5c..3d65081 100644
--- a/ops.h
+++ b/ops.h
@@ -144,9 +144,17 @@ O(rnez,    T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0)

/* Arguments, Parameters, and Calls */
O(par,     T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parsb,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parub,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parsh,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(paruh,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parc,    T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
O(pare,    T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
O(arg,     T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argsb,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argub,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argsh,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(arguh,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argc,    T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0) V(0)
O(arge,    T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
O(argv,    T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
diff --git a/parse.c b/parse.c
index 1912c8b..13843c6 100644
--- a/parse.c
+++ b/parse.c
@@ -3,8 +3,15 @@
#include <stdarg.h>

enum {
	Ke = -2, /* Erroneous mode */
	Km = Kl, /* Memory pointer */
	Ksb = 4, /* matches Oarg/Opar/Jret */
	Kub,
	Ksh,
	Kuh,
	Kc,
	K0,

	Ke = -2, /* erroneous mode */
	Km = Kl, /* memory pointer */
};

Op optab[NOp] = {
@@ -45,7 +52,11 @@ enum {
	Talign,
	Tl,
	Tw,
	Tsh,
	Tuh,
	Th,
	Tsb,
	Tub,
	Tb,
	Td,
	Ts,
@@ -93,12 +104,16 @@ static char *kwmap[Ntok] = {
	[Tdata] = "data",
	[Tsection] = "section",
	[Talign] = "align",
	[Tl] = "l",
	[Tw] = "w",
	[Th] = "h",
	[Tsb] = "sb",
	[Tub] = "ub",
	[Tsh] = "sh",
	[Tuh] = "uh",
	[Tb] = "b",
	[Td] = "d",
	[Th] = "h",
	[Tw] = "w",
	[Tl] = "l",
	[Ts] = "s",
	[Td] = "d",
	[Tz] = "z",
	[Tdots] = "...",
};
@@ -109,7 +124,7 @@ enum {
	TMask = 16383, /* for temps hash */
	BMask = 8191, /* for blocks hash */

	K = 5041217, /* found using tools/lexh.c */
	K = 9583425, /* found using tools/lexh.c */
	M = 23,
};

@@ -427,7 +442,15 @@ parsecls(int *tyn)
		err("invalid class specifier");
	case Ttyp:
		*tyn = findtyp(ntyp);
		return 4;
		return Kc;
	case Tsb:
		return Ksb;
	case Tub:
		return Kub;
	case Tsh:
		return Ksh;
	case Tuh:
		return Kuh;
	case Tw:
		return Kw;
	case Tl:
@@ -482,16 +505,21 @@ parserefl(int arg)
			err("invalid argument");
		if (!arg && rtype(r) != RTmp)
			err("invalid function parameter");
		if (k == 4)
		if (env)
			if (arg)
				*curi = (Ins){Oarge, k, R, {r}};
			else
				*curi = (Ins){Opare, k, r, {R}};
		else if (k == Kc)
			if (arg)
				*curi = (Ins){Oargc, Kl, R, {TYPE(ty), r}};
			else
				*curi = (Ins){Oparc, Kl, r, {TYPE(ty)}};
		else if (env)
		else if (k >= Ksb)
			if (arg)
				*curi = (Ins){Oarge, k, R, {r}};
				*curi = (Ins){Oargsb+(k-Ksb), Kw, R, {r}};
			else
				*curi = (Ins){Opare, k, r, {R}};
				*curi = (Ins){Oparsb+(k-Ksb), Kw, r, {R}};
		else
			if (arg)
				*curi = (Ins){Oarg, k, R, {r}};
@@ -578,14 +606,10 @@ parseline(PState ps)
		expect(Tnl);
		return PPhi;
	case Tret:
		curb->jmp.type = (int[]){
			Jretw, Jretl,
			Jrets, Jretd,
			Jretc, Jret0
		}[rcls];
		curb->jmp.type = Jretw + rcls;
		if (peek() == Tnl)
			curb->jmp.type = Jret0;
		else if (rcls < 5) {
		else if (rcls != K0) {
			r = parseref();
			if (req(r, R))
				err("invalid return value");
@@ -632,11 +656,13 @@ DoOp:
		parserefl(1);
		op = Ocall;
		expect(Tnl);
		if (k == 4) {
		if (k == Kc) {
			k = Kl;
			arg[1] = TYPE(ty);
		} else
			arg[1] = R;
		if (k >= Ksb)
			k = Kw;
		goto Ins;
	}
	if (op == Tloadw)
@@ -645,7 +671,7 @@ DoOp:
		op = Oload;
	if (op == Talloc1 || op == Talloc2)
		op = Oalloc;
	if (k == 4)
	if (k >= Ksb)
		err("size class must be w, l, s, or d");
	if (op >= NPubOp)
		err("invalid instruction");
@@ -774,10 +800,13 @@ typecheck(Fn *fn)
			}
		r = b->jmp.arg;
		if (isret(b->jmp.type)) {
			if (b->jmp.type == Jretc) {
				if (!usecheck(r, Kl, fn))
					goto JErr;
			} else if (!usecheck(r, b->jmp.type-Jretw, fn))
			if (b->jmp.type == Jretc)
				k = Kl;
			else if (b->jmp.type >= Jretsb)
				k = Kw;
			else
				k = b->jmp.type - Jretw;
			if (!usecheck(r, k, fn))
				goto JErr;
		}
		if (b->jmp.type == Jjnz && !usecheck(r, Kw, fn))
@@ -818,7 +847,7 @@ parsefn(Lnk *lnk)
	if (peek() != Tglo)
		rcls = parsecls(&curf->retty);
	else
		rcls = 5;
		rcls = K0;
	if (next() != Tglo)
		err("function name expected");
	strncpy(curf->name, tokval.str, NString-1);
@@ -1266,6 +1295,10 @@ printfn(Fn *fn, FILE *f)
		}
		switch (b->jmp.type) {
		case Jret0:
		case Jretsb:
		case Jretub:
		case Jretsh:
		case Jretuh:
		case Jretw:
		case Jretl:
		case Jrets:
diff --git a/tools/lexh.c b/tools/lexh.c
index 8d0af21..1aea3e0 100644
--- a/tools/lexh.c
+++ b/tools/lexh.c
@@ -27,8 +27,9 @@ char *tok[] = {

	"call", "phi", "jmp", "jnz", "ret", "export",
	"function", "type", "data", "section", "align",
	"l", "w", "h", "b", "d", "s", "z", "loadw", "loadl",
	"loads", "loadd", "alloc1", "alloc2",
	"l", "w", "sh", "uh", "h", "sb", "ub", "b",
	"d", "s", "z", "loadw", "loadl", "loads", "loadd",
	"alloc1", "alloc2",

};
enum {
@@ -69,7 +70,7 @@ main()
		th[i] = h;
	}

	for (i=0; 1<<i < Ntok; ++i);
	for (i=9; 1<<i < Ntok; ++i);
	M = 32 - i;

	for (;; --M) {
-- 
2.37.2

[PATCH 2/4] add new target-specific abi0 pass Export this patch

The general idea is to give abis a
chance to talk before we've done all
the optimizations. Currently, all
targets eliminate {par,arg,ret}{sb,ub,...}
during this pass. The forthcoming
arm64_apple will, however, insert
proper extensions during abi0.

Moving forward abis can, for example,
lower small-aggregates passing there
so that memory optimizations can
interact better with function calls.
---
 Makefile     |  4 ++--
 abi.c        | 25 +++++++++++++++++++++++++
 all.h        |  9 ++++++++-
 amd64/targ.c |  3 ++-
 arm64/targ.c |  3 ++-
 main.c       |  3 ++-
 rv64/targ.c  |  3 ++-
 7 files changed, 43 insertions(+), 7 deletions(-)
 create mode 100644 abi.c

diff --git a/Makefile b/Makefile
index 9c9b401..64878c7 100644
--- a/Makefile
+++ b/Makefile
@@ -4,8 +4,8 @@
PREFIX = /usr/local
BINDIR = $(PREFIX)/bin

COMMOBJ  = main.o util.o parse.o cfg.o mem.o ssa.o alias.o load.o copy.o \
           fold.o live.o spill.o rega.o emit.o
COMMOBJ  = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
           copy.o fold.o live.o spill.o rega.o emit.o
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
RV64OBJ  = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
diff --git a/abi.c b/abi.c
new file mode 100644
index 0000000..9c83497
--- /dev/null
+++ b/abi.c
@@ -0,0 +1,25 @@
#include "all.h"

/* eliminate sub-word abi op
 * variants for targets that
 * treat char/short/... as
 * words with arbitrary high
 * bits
 */
void
elimsb(Fn *fn)
{
	Blk *b;
	Ins *i;

	for (b=fn->start; b; b=b->link) {
		for (i=b->ins; i<&b->ins[b->nins]; i++) {
			if (isargbh(i->op))
				i->op = Oarg;
			if (isparbh(i->op))
				i->op = Opar;
		}
		if (isretbh(b->jmp.type))
			b->jmp.type = Jretw;
	}
}
diff --git a/all.h b/all.h
index d7b75b5..04050d4 100644
--- a/all.h
+++ b/all.h
@@ -52,7 +52,8 @@ struct Target {
	bits (*retregs)(Ref, int[2]);
	bits (*argregs)(Ref, int[2]);
	int (*memargs)(int);
	void (*abi)(Fn *);
	void (*abi0)(Fn *);
	void (*abi1)(Fn *);
	void (*isel)(Fn *);
	void (*emitfn)(Fn *, FILE *);
	void (*emitfin)(FILE *);
@@ -183,6 +184,9 @@ enum {
#define ispar(o) INRANGE(o, Opar, Opare)
#define isarg(o) INRANGE(o, Oarg, Oargv)
#define isret(j) INRANGE(j, Jretw, Jret0)
#define isparbh(o) INRANGE(o, Oparsb, Oparuh)
#define isargbh(o) INRANGE(o, Oargsb, Oarguh)
#define isretbh(j) INRANGE(j, Jretsb, Jretuh)

enum {
	Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
@@ -478,6 +482,9 @@ void printfn(Fn *, FILE *);
void printref(Ref, Fn *, FILE *);
void err(char *, ...) __attribute__((noreturn));

/* abi.c */
void elimsb(Fn *);

/* cfg.c */
Blk *blknew(void);
void edgedel(Blk *, Blk **);
diff --git a/amd64/targ.c b/amd64/targ.c
index e58ba2f..74fba4d 100644
--- a/amd64/targ.c
+++ b/amd64/targ.c
@@ -24,7 +24,8 @@ amd64_memargs(int op)
	.retregs = amd64_sysv_retregs, \
	.argregs = amd64_sysv_argregs, \
	.memargs = amd64_memargs, \
	.abi = amd64_sysv_abi, \
	.abi0 = elimsb, \
	.abi1 = amd64_sysv_abi, \
	.isel = amd64_isel, \

Target T_amd64_sysv = {
diff --git a/arm64/targ.c b/arm64/targ.c
index ddaee2f..6079236 100644
--- a/arm64/targ.c
+++ b/arm64/targ.c
@@ -38,7 +38,8 @@ Target T_arm64 = {
	.retregs = arm64_retregs,
	.argregs = arm64_argregs,
	.memargs = arm64_memargs,
	.abi = arm64_abi,
	.abi0 = elimsb,
	.abi1 = arm64_abi,
	.isel = arm64_isel,
	.emitfn = arm64_emitfn,
	.emitfin = elf_emitfin,
diff --git a/main.c b/main.c
index e82b062..253d0c5 100644
--- a/main.c
+++ b/main.c
@@ -56,6 +56,7 @@ func(Fn *fn)
		fprintf(stderr, "\n> After parsing:\n");
		printfn(fn, stderr);
	}
	T.abi0(fn);
	fillrpo(fn);
	fillpreds(fn);
	filluse(fn);
@@ -71,7 +72,7 @@ func(Fn *fn)
	copy(fn);
	filluse(fn);
	fold(fn);
	T.abi(fn);
	T.abi1(fn);
	fillpreds(fn);
	filluse(fn);
	T.isel(fn);
diff --git a/rv64/targ.c b/rv64/targ.c
index 70701db..c0e5e18 100644
--- a/rv64/targ.c
+++ b/rv64/targ.c
@@ -44,7 +44,8 @@ Target T_rv64 = {
	.retregs = rv64_retregs,
	.argregs = rv64_argregs,
	.memargs = rv64_memargs,
	.abi = rv64_abi,
	.abi0 = elimsb,
	.abi1 = rv64_abi,
	.isel = rv64_isel,
	.emitfn = rv64_emitfn,
	.emitfin = elf_emitfin,
-- 
2.37.2

[PATCH 3/4] refine width of parsb/ub/sh/uh ops Export this patch

---
 ssa.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ssa.c b/ssa.c
index 2de02d1..126113d 100644
--- a/ssa.c
+++ b/ssa.c
@@ -77,6 +77,8 @@ filluse(Fn *fn)
			if (!req(i->to, R)) {
				assert(rtype(i->to) == RTmp);
				w = WFull;
				if (isparbh(i->op))
					w = Wsb + (i->op - Oparsb);
				if (isload(i->op) && i->op != Oload)
					w = Wsb + (i->op - Oloadsb);
				if (isext(i->op))
-- 
2.37.2

[PATCH 4/4] new arm64_apple target Export this patch

Should make qbe work on apple
arm-based hardware.
---
 Makefile      |   9 +-
 arm64/abi.c   | 233 ++++++++++++++++++++++++++++++++++++++++++--------
 arm64/all.h   |   3 +
 arm64/emit.c  |  58 ++++++++-----
 arm64/targ.c  |  38 +++++---
 main.c        |   2 +
 test/dark.ssa |   2 +-
 tools/test.sh |   2 +-
 8 files changed, 274 insertions(+), 73 deletions(-)

diff --git a/Makefile b/Makefile
index 64878c7..674f850 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,14 @@ main.o: config.h
config.h:
	@case `uname` in                               \
	*Darwin*)                                      \
		echo "#define Deftgt T_amd64_apple";   \
		case `uname -m` in                     \
		*arm64*)                               \
			echo "#define Deftgt T_arm64_apple";\
			;;                             \
		*)                                     \
			echo "#define Deftgt T_amd64_apple";\
			;;                             \
		esac                                   \
		;;                                     \
	*)                                             \
		case `uname -m` in                     \
diff --git a/arm64/abi.c b/arm64/abi.c
index b2b5973..dfb95e3 100644
--- a/arm64/abi.c
+++ b/arm64/abi.c
@@ -1,5 +1,6 @@
#include "all.h"

typedef struct Abi Abi;
typedef struct Class Class;
typedef struct Insl Insl;
typedef struct Params Params;
@@ -9,6 +10,12 @@ enum {
	Cptr = 2, /* replaced by a pointer */
};

struct Abi {
	void (*vastart)(Fn *, Params, Ref);
	void (*vaarg)(Fn *, Blk *, Ins *);
	int apple;
};

struct Class {
	char class;
	char ishfa;
@@ -17,6 +24,7 @@ struct Class {
		uchar size;
	} hfa;
	uint size;
	uint align;
	Typ *t;
	uchar nreg;
	uchar ngp;
@@ -33,11 +41,15 @@ struct Insl {
struct Params {
	uint ngp;
	uint nfp;
	uint nstk;
	uint stk;
};

static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
static int store[] = {
	[Kw] = Ostorew, [Kl] = Ostorel,
	[Ks] = Ostores, [Kd] = Ostored
};

/* layout of call's second argument (RCall)
 *
@@ -92,9 +104,10 @@ typclass(Class *c, Typ *t, int *gp, int *fp)
	c->class = 0;
	c->ngp = 0;
	c->nfp = 0;
	c->align = 8;

	if (t->align > 4)
		err("alignments larger than 16 are not supported");
	if (t->align > 3)
		err("alignments larger than 8 are not supported");

	if (t->isdark || sz > 16 || sz == 0) {
		/* large structs are replaced by a
@@ -130,10 +143,6 @@ typclass(Class *c, Typ *t, int *gp, int *fp)
static void
sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
{
	static int st[] = {
		[Kw] = Ostorew, [Kl] = Ostorel,
		[Ks] = Ostores, [Kd] = Ostored
	};
	uint n;
	uint64_t off;
	Ref r;
@@ -143,7 +152,7 @@ sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
	for (n=0; n<nreg; n++) {
		tmp[n] = newtmp("abi", cls[n], fn);
		r = newtmp("abi", Kl, fn);
		emit(st[cls[n]], 0, R, tmp[n], r);
		emit(store[cls[n]], 0, R, tmp[n], r);
		emit(Oadd, Kl, r, mem, getcon(off, fn));
		off += KWIDE(cls[n]) ? 8 : 4;
	}
@@ -206,12 +215,13 @@ selret(Blk *b, Fn *fn)
}

static int
argsclass(Ins *i0, Ins *i1, Class *carg)
argsclass(Ins *i0, Ins *i1, Class *carg, int apple)
{
	int envc, ngp, nfp, *gp, *fp;
	int va, envc, ngp, nfp, *gp, *fp;
	Class *c;
	Ins *i;

	va = 0;
	envc = 0;
	gp = gpreg;
	fp = fpreg;
@@ -219,10 +229,32 @@ argsclass(Ins *i0, Ins *i1, Class *carg)
	nfp = 8;
	for (i=i0, c=carg; i<i1; i++, c++)
		switch (i->op) {
		case Oargsb:
		case Oargub:
		case Oparsb:
		case Oparub:
			c->size = 1;
			goto Scalar;
		case Oargsh:
		case Oarguh:
		case Oparsh:
		case Oparuh:
			c->size = 2;
			goto Scalar;
		case Opar:
		case Oarg:
			*c->cls = i->cls;
			c->size = 8;
			if (apple && !KWIDE(i->cls))
				c->size = 4;
		Scalar:
			c->align = c->size;
			*c->cls = i->cls;
			if (va) {
				c->class |= Cstk;
				c->size = 8;
				c->align = 8;
				break;
			}
			if (KBASE(i->cls) == 0 && ngp > 0) {
				ngp--;
				*c->reg = *gp++;
@@ -258,6 +290,7 @@ argsclass(Ins *i0, Ins *i1, Class *carg)
			envc = 1;
			break;
		case Oargv:
			va = apple != 0;
			break;
		default:
			die("unreachable");
@@ -327,18 +360,23 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
	*ilp = il;
}

static uint
align(uint x, uint al)
{
	return (x + al-1) & -al;
}

static void
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp, int apple)
{
	Ins *i;
	Class *ca, *c, cr;
	int cty;
	uint n;
	uint64_t stk, off;
	int op, cty;
	uint n, stk, off;;
	Ref r, rstk, tmp[4];

	ca = alloc((i1-i0) * sizeof ca[0]);
	cty = argsclass(i0, i1, ca);
	cty = argsclass(i0, i1, ca, apple);

	stk = 0;
	for (i=i0, c=ca; i<i1; i++, c++) {
@@ -347,10 +385,12 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
			stkblob(i->arg[0], c, fn, ilp);
			i->op = Oarg;
		}
		if (c->class & Cstk)
		if (c->class & Cstk) {
			stk = align(stk, c->align);
			stk += c->size;
		}
	}
	stk += stk & 15;
	stk = align(stk, 16);
	rstk = getcon(stk, fn);
	if (stk)
		emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
@@ -403,9 +443,16 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
	for (i=i0, c=ca; i<i1; i++, c++) {
		if ((c->class & Cstk) == 0)
			continue;
		if (i->op == Oarg) {
		off = align(off, c->align);
		if (i->op == Oarg || isargbh(i->op)) {
			r = newtmp("abi", Kl, fn);
			emit(Ostorel, 0, R, i->arg[0], r);
			switch (c->size) {
			case 1: op = Ostoreb; break;
			case 2: op = Ostoreh; break;
			case 4:
			case 8: op = store[*c->cls]; break;
			}
			emit(op, 0, R, i->arg[0], r);
			emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
		}
		if (i->op == Oargc)
@@ -421,18 +468,19 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
}

static Params
selpar(Fn *fn, Ins *i0, Ins *i1)
selpar(Fn *fn, Ins *i0, Ins *i1, int apple)
{
	Class *ca, *c, cr;
	Insl *il;
	Ins *i;
	int n, s, cty;
	int op, n, cty;
	uint off;
	Ref r, tmp[16], *t;

	ca = alloc((i1-i0) * sizeof ca[0]);
	curi = &insb[NIns];

	cty = argsclass(i0, i1, ca);
	cty = argsclass(i0, i1, ca, apple);
	fn->reg = arm64_argregs(CALL(cty), 0);

	il = 0;
@@ -457,26 +505,33 @@ selpar(Fn *fn, Ins *i0, Ins *i1)
	}

	t = tmp;
	s = 2;
	off = 0;
	for (i=i0, c=ca; i<i1; i++, c++)
		if (i->op == Oparc && !(c->class & Cptr)) {
			if (c->class & Cstk) {
				fn->tmp[i->to.val].slot = -s;
				s += c->size / 8;
				off = align(off, c->align);
				fn->tmp[i->to.val].slot = -(off+2);
				off += c->size;
			} else
				for (n=0; n<c->nreg; n++) {
					r = TMP(c->reg[n]);
					emit(Ocopy, c->cls[n], *t++, r, R);
				}
		} else if (c->class & Cstk) {
			emit(Oload, *c->cls, i->to, SLOT(-s), R);
			s++;
			/* todo, check Cptr && Oparc && Cstk */
			off = align(off, c->align);
			if (isparbh(i->op))
				op = Oloadsb + (i->op - Oparsb);
			else
				op = Oload;
			emit(op, *c->cls, i->to, SLOT(-(off+2)), R);
			off += c->size;
		} else {
			emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
		}

	return (Params){
		.nstk = s - 2,
		.stk = align(off, 8),
		.ngp = (cty >> 5) & 15,
		.nfp = (cty >> 9) & 15
	};
@@ -514,7 +569,24 @@ chpred(Blk *b, Blk *bp, Blk *bp1)
}

static void
selvaarg(Fn *fn, Blk *b, Ins *i)
apple_selvaarg(Fn *fn, Blk *b, Ins *i)
{
	Ref ap, stk, stk8, c8;

	(void)b;
	c8 = getcon(8, fn);
	ap = i->arg[0];
	stk8 = newtmp("abi", Kl, fn);
	stk = newtmp("abi", Kl, fn);

	emit(Ostorel, 0, R, stk8, ap);
	emit(Oadd, Kl, stk8, stk, c8);
	emit(Oload, i->cls, i->to, stk, R);
	emit(Oload, Kl, stk, ap, R);
}

static void
arm64_selvaarg(Fn *fn, Blk *b, Ins *i)
{
	Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
	Blk *b0, *bstk, *breg;
@@ -607,7 +679,21 @@ selvaarg(Fn *fn, Blk *b, Ins *i)
}

static void
selvastart(Fn *fn, Params p, Ref ap)
apple_selvastart(Fn *fn, Params p, Ref ap)
{
	Ref off, stk, arg;

	off = getcon(p.stk, fn);
	stk = newtmp("abi", Kl, fn);
	arg = newtmp("abi", Kl, fn);

	emit(Ostorel, 0, R, arg, ap);
	emit(Oadd, Kl, arg, stk, off);
	emit(Oaddr, Kl, stk, SLOT(-1), R);
}

static void
arm64_selvastart(Fn *fn, Params p, Ref ap)
{
	Ref r0, r1, rsave;

@@ -615,7 +701,7 @@ selvastart(Fn *fn, Params p, Ref ap)

	r0 = newtmp("abi", Kl, fn);
	emit(Ostorel, Kw, R, r0, ap);
	emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn));
	emit(Oadd, Kl, r0, rsave, getcon(p.stk + 192, fn));

	r0 = newtmp("abi", Kl, fn);
	r1 = newtmp("abi", Kl, fn);
@@ -639,8 +725,8 @@ selvastart(Fn *fn, Params p, Ref ap)
	emit(Oadd, Kl, r0, ap, getcon(28, fn));
}

void
arm64_abi(Fn *fn)
static void
abi(Fn *fn, Abi abi)
{
	Blk *b;
	Ins *i, *i0, *ip;
@@ -655,7 +741,7 @@ arm64_abi(Fn *fn)
	for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
		if (!ispar(i->op))
			break;
	p = selpar(fn, b->ins, i);
	p = selpar(fn, b->ins, i, abi.apple);
	n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
	i0 = alloc(n * sizeof(Ins));
	ip = icpy(ip = i0, curi, &insb[NIns] - curi);
@@ -682,14 +768,14 @@ arm64_abi(Fn *fn)
				for (i0=i; i0>b->ins; i0--)
					if (!isarg((i0-1)->op))
						break;
				selcall(fn, i0, i, &il);
				selcall(fn, i0, i, &il, abi.apple);
				i = i0;
				break;
			case Ovastart:
				selvastart(fn, p, i->arg[0]);
				abi.vastart(fn, p, i->arg[0]);
				break;
			case Ovaarg:
				selvaarg(fn, b, i);
				abi.vaarg(fn, b, i);
				break;
			case Oarg:
			case Oargc:
@@ -707,3 +793,74 @@ arm64_abi(Fn *fn)
		printfn(fn, stderr);
	}
}

void
arm64_abi(Fn *fn)
{
	abi(fn, (Abi){
		arm64_selvastart,
		arm64_selvaarg,
		0
	});
}

void
apple_abi(Fn *fn)
{
	abi(fn, (Abi){
		apple_selvastart,
		apple_selvaarg,
		1
	});
}

/* abi0 for apple target; introduces
 * necessery sign extension for arg
 * passing & returns
 */
void
apple_extsb(Fn *fn)
{
	Blk *b;
	Ins *i0, *i1, *i;
	int j, op;
	Ref r;

	for (b=fn->start; b; b=b->link) {
		curi = &insb[NIns];
		j = b->jmp.type;
		if (isretbh(j)) {
			r = newtmp("abi", Kw, fn);
			op = Oextsb + (j - Jretsb);
			emit(op, Kw, r, b->jmp.arg, R);
			b->jmp.arg = r;
		}
		for (i=&b->ins[b->nins]; i>b->ins;) {
			emiti(*--i);
			if (i->op != Ocall)
				continue;
			for (i0=i1=i; i0>b->ins; i0--)
				if (!isarg((i0-1)->op))
					break;
			for (i=i1; i>i0;) {
				emiti(*--i);
				if (isargbh(i->op)) {
					i->to = newtmp("abi", Kl, fn);
					curi->arg[0] = i->to;
				}
			}
			for (i=i1; i>i0;)
				if (isargbh((--i)->op)) {
					op = Oextsb + (i->op - Oargsb);
					emit(op, Kw, i->to, i->arg[0], R);
				}
		}
		b->nins = &insb[NIns] - curi;
		idup(&b->ins, curi, b->nins);
	}

	if (debug['A']) {
		fprintf(stderr, "\n> After apple_extsb:\n");
		printfn(fn, stderr);
	}
}
diff --git a/arm64/all.h b/arm64/all.h
index ff2b3ff..6b7f43e 100644
--- a/arm64/all.h
+++ b/arm64/all.h
@@ -28,6 +28,8 @@ extern int arm64_rclob[];
bits arm64_retregs(Ref, int[2]);
bits arm64_argregs(Ref, int[2]);
void arm64_abi(Fn *);
void apple_extsb(Fn *);
void apple_abi(Fn *);

/* isel.c */
int arm64_logimm(uint64_t, int);
@@ -35,3 +37,4 @@ void arm64_isel(Fn *);

/* emit.c */
void arm64_emitfn(Fn *, FILE *);
void apple_emitfn(Fn *, FILE *);
diff --git a/arm64/emit.c b/arm64/emit.c
index 55f5ce6..18c19d2 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -7,6 +7,7 @@ struct E {
	Fn *fn;
	uint64_t frame;
	uint padding;
	int apple;
};

#define CMP(X) \
@@ -144,10 +145,10 @@ slot(int s, E *e)
	if (s == -1)
		return 16 + e->frame;
	if (s < 0) {
		if (e->fn->vararg)
			return 16 + e->frame + 192 - (s+2)*8;
		if (e->fn->vararg && !e->apple)
			return 16 + e->frame + 192 - (s+2);
		else
			return 16 + e->frame - (s+2)*8;
			return 16 + e->frame - (s+2);
	} else
		return 16 + e->padding + 4 * s;
}
@@ -243,8 +244,16 @@ emitf(char *s, Ins *i, E *e)
}

static void
loadcon(Con *c, int r, int k, FILE *f)
loadcon(Con *c, int r, int k, E *e)
{
	static char *ldsym[][2] = {
		/* arm64 */
		[0][0] = "\tadrp\t%s, %s%s%s\n",
		[0][1] = "\tadd\t%s, %s, #:lo12:%s%s%s\n",
		/* apple */
		[1][0] = "\tadrp\t%s, %s%s@page%s\n",
		[1][1] = "\tadd\t%s, %s, %s%s@pageoff%s\n",
	};
	char *rn, *l, *p, off[32];
	int64_t n;
	int w, sh;
@@ -261,24 +270,22 @@ loadcon(Con *c, int r, int k, FILE *f)
			off[0] = 0;
		l = str(c->label);
		p = c->local ? T.asloc : l[0] == '"' ? "" : T.assym;
		fprintf(f, "\tadrp\t%s, %s%s%s\n",
			rn, p, l, off);
		fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n",
			rn, rn, p, l, off);
		fprintf(e->f, ldsym[e->apple][0], rn, p, l, off);
		fprintf(e->f, ldsym[e->apple][1], rn, rn, p, l, off);
		return;
	}
	assert(c->type == CBits);
	if (!w)
		n = (int32_t)n;
	if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
		fprintf(f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
		fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
	} else {
		fprintf(f, "\tmov\t%s, #%d\n",
		fprintf(e->f, "\tmov\t%s, #%d\n",
			rn, (int)(n & 0xffff));
		for (sh=16; n>>=16; sh+=16) {
			if ((!w && sh == 32) || sh == 64)
				break;
			fprintf(f, "\tmovk\t%s, #0x%x, lsl #%d\n",
			fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n",
				rn, (uint)(n & 0xffff), sh);
		}
	}
@@ -358,7 +365,7 @@ emitins(Ins *i, E *e)
		switch (rtype(i->arg[0])) {
		case RCon:
			c = &e->fn->con[i->arg[0].val];
			loadcon(c, i->to.val, i->cls, e->f);
			loadcon(c, i->to.val, i->cls, e);
			break;
		case RSlot:
			i->op = Oload;
@@ -450,8 +457,8 @@ framelayout(E *e)

*/

void
arm64_emitfn(Fn *fn, FILE *out)
static void
emitfn(E *e)
{
	static char *ctoa[] = {
	#define X(c, s) [c] = s,
@@ -463,13 +470,11 @@ arm64_emitfn(Fn *fn, FILE *out)
	uint64_t o;
	Blk *b, *t;
	Ins *i;
	E *e;

	emitlnk(fn->name, &fn->lnk, ".text", out);
	e = &(E){.f = out, .fn = fn};
	emitlnk(e->fn->name, &e->fn->lnk, ".text", e->f);
	framelayout(e);

	if (e->fn->vararg) {
	if (e->fn->vararg && !e->apple) {
		for (n=7; n>=0; n--)
			fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
		for (n=7; n>=0; n-=2)
@@ -531,7 +536,7 @@ arm64_emitfn(Fn *fn, FILE *out)
			if (e->fn->dynalloc)
				fputs("\tmov sp, x29\n", e->f);
			o = e->frame + 16;
			if (e->fn->vararg)
			if (e->fn->vararg && !e->apple)
				o += 192;
			if (o <= 504)
				fprintf(e->f,
@@ -589,5 +594,18 @@ arm64_emitfn(Fn *fn, FILE *out)
		}
	}
	id0 += e->fn->nblk;
	elf_emitfnfin(e->fn->name, e->f);
}

void
arm64_emitfn(Fn *fn, FILE *out)
{
	emitfn(&(E){.f = out, .fn = fn, .apple = 0});
	elf_emitfnfin(fn->name, out);
}

void
apple_emitfn(Fn *fn, FILE *out)
{
	fn->lnk.align = 4;
	emitfn(&(E){.f = out, .fn = fn, .apple = 1});
}
diff --git a/arm64/targ.c b/arm64/targ.c
index 6079236..88c40f1 100644
--- a/arm64/targ.c
+++ b/arm64/targ.c
@@ -25,25 +25,39 @@ arm64_memargs(int op)
	return 0;
}

#define ARM64_COMMON \
	.gpr0 = R0, \
	.ngpr = NGPR, \
	.fpr0 = V0, \
	.nfpr = NFPR, \
	.rglob = RGLOB, \
	.nrglob = 3, \
	.rsave = arm64_rsave, \
	.nrsave = {NGPS, NFPS}, \
	.retregs = arm64_retregs, \
	.argregs = arm64_argregs, \
	.memargs = arm64_memargs, \
	.isel = arm64_isel, \

Target T_arm64 = {
	.name = "arm64",
	.gpr0 = R0,
	.ngpr = NGPR,
	.fpr0 = V0,
	.nfpr = NFPR,
	.rglob = RGLOB,
	.nrglob = 3,
	.rsave = arm64_rsave,
	.nrsave = {NGPS, NFPS},
	.retregs = arm64_retregs,
	.argregs = arm64_argregs,
	.memargs = arm64_memargs,
	.abi0 = elimsb,
	.abi1 = arm64_abi,
	.isel = arm64_isel,
	.emitfn = arm64_emitfn,
	.emitfin = elf_emitfin,
	.asloc = ".L",
	ARM64_COMMON
};

Target T_arm64_apple = {
	.name = "arm64_apple",
	.abi0 = apple_extsb,
	.abi1 = apple_abi,
	.emitfn = apple_emitfn,
	.emitfin = macho_emitfin,
	.asloc = "L",
	.assym = "_",
	ARM64_COMMON
};

MAKESURE(globals_are_not_arguments,
diff --git a/main.c b/main.c
index 253d0c5..c028503 100644
--- a/main.c
+++ b/main.c
@@ -21,12 +21,14 @@ char debug['Z'+1] = {
extern Target T_amd64_sysv;
extern Target T_amd64_apple;
extern Target T_arm64;
extern Target T_arm64_apple;
extern Target T_rv64;

static Target *tlist[] = {
	&T_amd64_sysv,
	&T_amd64_apple,
	&T_arm64,
	&T_arm64_apple,
	&T_rv64,
	0
};
diff --git a/test/dark.ssa b/test/dark.ssa
index de58e4c..ed9ec21 100644
--- a/test/dark.ssa
+++ b/test/dark.ssa
@@ -1,4 +1,4 @@
# skip arm64 rv64
# skip arm64 arm64_apple rv64
# a hack example,
# we use a dark type to get
# a pointer to the stack.
diff --git a/tools/test.sh b/tools/test.sh
index 4653b83..9c0f9ee 100755
--- a/tools/test.sh
+++ b/tools/test.sh
@@ -70,7 +70,7 @@ init() {
	"")
		case `uname` in
		*Darwin*)
			cc="cc -Wl,-no_pie"
			cc="cc"
			;;
		*OpenBSD*)
			cc="cc -nopie"
-- 
2.37.2