~mpu/qbe

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
5 2

A new arm64_apple target for m1 computers

Details
Message ID
<20220901214250.1701511-1-quentin@c9x.me>
DKIM signature
missing
Download raw message
Dear qbe hackers,

The following patches add a new arm64_apple target
that can be used on the latest Apple hardware. The
differences with the regular arm64 backend are, first,
the assembly syntax, and second, a subtly different
and loosely-documented ABI.

I would like to merge these patches in master but,
before doing so, I wanted to let you take a look and
share thoughts.

Here are couple interesting things that I would like
to highlight.

While all existing ABIs pass char/shorts in full
words with insignificant high bits, Apple is fussy
and decided that, when in registers, they should be
properly sign/zero extended, and that they would take
only 1/2 bytes on the stack. So as to save space. That
mandates for new "ABI types", so I added sb, ub,
sh, uh. On all pre-existing ABIs they are equivalent
to w, but Apple uses the sign & width information.
As a frontend writer, you'll now have to use them
if you want the code you generate to work on Apple
hardware. I'll probably amend the IL spec to require
their use when interacting with C functions that have
take/return sub-word integers.

I pondered a bit about the invariants of these
sb/ub/... abi types. My conclusion is the following:
To avoid incurring overhead on targets that don't
care about high bits, we must not make guarantees
about high bits and require frontends to sign/zero
extend function arguments and returns explicitly,
if needed. On arm64_apple, the extensions will be
eliminated because I taught qbe that parsb/... come
with high bits set. But on any other target, the
extensions will stay. We get the best code in both
cases.

To accomodate for the new abi types, I introduced
a target-specific 'abi0' pass. On existing targets
it just eliminates sub-word variants and replaces
them with word-sized instructions. On apple_arm64,
it adds the required extensions.

I wanted to introduce abi0 for a while, because
it will let us perform transformations such as:

    type :pair = { w, w }
    ...
    call $f(:pair %foo)

  -->

    %abi.0 =w load %foo
    %foo.4 =l add %foo, 4
    %abi.1 =w load %foo
    call $f(w %abi.1, w %abi.1)

(on targets that'd pass the struct in two arguments)

Performing this transformation early on is going to
enable the memory optimization to improve code around
function calls, especially when small aggregates
are involved.

To wrap it up, I hope I demonstrated that, even if
you don't care about the Apple support, the work that
was done sets us up for new horizontal gains.


Cheers.

[PATCH 1/4] parse sb,ub,sh,uh abi types

Details
Message ID
<20220901214250.1701511-2-quentin@c9x.me>
In-Reply-To
<20220901214250.1701511-1-quentin@c9x.me> (view parent)
DKIM signature
missing
Download raw message
Patch: +74 -31
---
 all.h        |  7 +++--
 ops.h        |  8 +++++
 parse.c      | 83 ++++++++++++++++++++++++++++++++++++----------------
 tools/lexh.c |  7 +++--
 4 files changed, 74 insertions(+), 31 deletions(-)

diff --git a/all.h b/all.h
index 1ecea8e..d7b75b5 100644
--- a/all.h
+++ b/all.h
@@ -144,8 +144,9 @@ enum O {
enum J {
	Jxxx,
#define JMPS(X)                                 \
	X(ret0)   X(retw)   X(retl)   X(rets)   \
	X(retd)   X(retc)   X(jmp)    X(jnz)    \
	X(retw)   X(retl)   X(rets)   X(retd)   \
	X(retsb)  X(retub)  X(retsh)  X(retuh)  \
	X(retc)   X(ret0)   X(jmp)    X(jnz)    \
	X(jfieq)  X(jfine)  X(jfisge) X(jfisgt) \
	X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \
	X(jfiule) X(jfiult) X(jffeq)  X(jffge)  \
@@ -181,7 +182,7 @@ enum {
#define isext(o) INRANGE(o, Oextsb, Oextuw)
#define ispar(o) INRANGE(o, Opar, Opare)
#define isarg(o) INRANGE(o, Oarg, Oargv)
#define isret(j) INRANGE(j, Jret0, Jretc)
#define isret(j) INRANGE(j, Jretw, Jret0)

enum {
	Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
diff --git a/ops.h b/ops.h
index 285bc5c..3d65081 100644
--- a/ops.h
+++ b/ops.h
@@ -144,9 +144,17 @@ O(rnez,    T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0)

/* Arguments, Parameters, and Calls */
O(par,     T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parsb,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parub,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parsh,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(paruh,   T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
O(parc,    T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
O(pare,    T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
O(arg,     T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argsb,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argub,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argsh,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(arguh,   T(w,e,e,e, x,x,x,x), 0) X(0, 0, 0) V(0)
O(argc,    T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0) V(0)
O(arge,    T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
O(argv,    T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
diff --git a/parse.c b/parse.c
index 1912c8b..13843c6 100644
--- a/parse.c
+++ b/parse.c
@@ -3,8 +3,15 @@
#include <stdarg.h>

enum {
	Ke = -2, /* Erroneous mode */
	Km = Kl, /* Memory pointer */
	Ksb = 4, /* matches Oarg/Opar/Jret */
	Kub,
	Ksh,
	Kuh,
	Kc,
	K0,

	Ke = -2, /* erroneous mode */
	Km = Kl, /* memory pointer */
};

Op optab[NOp] = {
@@ -45,7 +52,11 @@ enum {
	Talign,
	Tl,
	Tw,
	Tsh,
	Tuh,
	Th,
	Tsb,
	Tub,
	Tb,
	Td,
	Ts,
@@ -93,12 +104,16 @@ static char *kwmap[Ntok] = {
	[Tdata] = "data",
	[Tsection] = "section",
	[Talign] = "align",
	[Tl] = "l",
	[Tw] = "w",
	[Th] = "h",
	[Tsb] = "sb",
	[Tub] = "ub",
	[Tsh] = "sh",
	[Tuh] = "uh",
	[Tb] = "b",
	[Td] = "d",
	[Th] = "h",
	[Tw] = "w",
	[Tl] = "l",
	[Ts] = "s",
	[Td] = "d",
	[Tz] = "z",
	[Tdots] = "...",
};
@@ -109,7 +124,7 @@ enum {
	TMask = 16383, /* for temps hash */
	BMask = 8191, /* for blocks hash */

	K = 5041217, /* found using tools/lexh.c */
	K = 9583425, /* found using tools/lexh.c */
	M = 23,
};

@@ -427,7 +442,15 @@ parsecls(int *tyn)
		err("invalid class specifier");
	case Ttyp:
		*tyn = findtyp(ntyp);
		return 4;
		return Kc;
	case Tsb:
		return Ksb;
	case Tub:
		return Kub;
	case Tsh:
		return Ksh;
	case Tuh:
		return Kuh;
	case Tw:
		return Kw;
	case Tl:
@@ -482,16 +505,21 @@ parserefl(int arg)
			err("invalid argument");
		if (!arg && rtype(r) != RTmp)
			err("invalid function parameter");
		if (k == 4)
		if (env)
			if (arg)
				*curi = (Ins){Oarge, k, R, {r}};
			else
				*curi = (Ins){Opare, k, r, {R}};
		else if (k == Kc)
			if (arg)
				*curi = (Ins){Oargc, Kl, R, {TYPE(ty), r}};
			else
				*curi = (Ins){Oparc, Kl, r, {TYPE(ty)}};
		else if (env)
		else if (k >= Ksb)
			if (arg)
				*curi = (Ins){Oarge, k, R, {r}};
				*curi = (Ins){Oargsb+(k-Ksb), Kw, R, {r}};
			else
				*curi = (Ins){Opare, k, r, {R}};
				*curi = (Ins){Oparsb+(k-Ksb), Kw, r, {R}};
		else
			if (arg)
				*curi = (Ins){Oarg, k, R, {r}};
@@ -578,14 +606,10 @@ parseline(PState ps)
		expect(Tnl);
		return PPhi;
	case Tret:
		curb->jmp.type = (int[]){
			Jretw, Jretl,
			Jrets, Jretd,
			Jretc, Jret0
		}[rcls];
		curb->jmp.type = Jretw + rcls;
		if (peek() == Tnl)
			curb->jmp.type = Jret0;
		else if (rcls < 5) {
		else if (rcls != K0) {
			r = parseref();
			if (req(r, R))
				err("invalid return value");
@@ -632,11 +656,13 @@ DoOp:
		parserefl(1);
		op = Ocall;
		expect(Tnl);
		if (k == 4) {
		if (k == Kc) {
			k = Kl;
			arg[1] = TYPE(ty);
		} else
			arg[1] = R;
		if (k >= Ksb)
			k = Kw;
		goto Ins;
	}
	if (op == Tloadw)
@@ -645,7 +671,7 @@ DoOp:
		op = Oload;
	if (op == Talloc1 || op == Talloc2)
		op = Oalloc;
	if (k == 4)
	if (k >= Ksb)
		err("size class must be w, l, s, or d");
	if (op >= NPubOp)
		err("invalid instruction");
@@ -774,10 +800,13 @@ typecheck(Fn *fn)
			}
		r = b->jmp.arg;
		if (isret(b->jmp.type)) {
			if (b->jmp.type == Jretc) {
				if (!usecheck(r, Kl, fn))
					goto JErr;
			} else if (!usecheck(r, b->jmp.type-Jretw, fn))
			if (b->jmp.type == Jretc)
				k = Kl;
			else if (b->jmp.type >= Jretsb)
				k = Kw;
			else
				k = b->jmp.type - Jretw;
			if (!usecheck(r, k, fn))
				goto JErr;
		}
		if (b->jmp.type == Jjnz && !usecheck(r, Kw, fn))
@@ -818,7 +847,7 @@ parsefn(Lnk *lnk)
	if (peek() != Tglo)
		rcls = parsecls(&curf->retty);
	else
		rcls = 5;
		rcls = K0;
	if (next() != Tglo)
		err("function name expected");
	strncpy(curf->name, tokval.str, NString-1);
@@ -1266,6 +1295,10 @@ printfn(Fn *fn, FILE *f)
		}
		switch (b->jmp.type) {
		case Jret0:
		case Jretsb:
		case Jretub:
		case Jretsh:
		case Jretuh:
		case Jretw:
		case Jretl:
		case Jrets:
diff --git a/tools/lexh.c b/tools/lexh.c
index 8d0af21..1aea3e0 100644
--- a/tools/lexh.c
+++ b/tools/lexh.c
@@ -27,8 +27,9 @@ char *tok[] = {

	"call", "phi", "jmp", "jnz", "ret", "export",
	"function", "type", "data", "section", "align",
	"l", "w", "h", "b", "d", "s", "z", "loadw", "loadl",
	"loads", "loadd", "alloc1", "alloc2",
	"l", "w", "sh", "uh", "h", "sb", "ub", "b",
	"d", "s", "z", "loadw", "loadl", "loads", "loadd",
	"alloc1", "alloc2",

};
enum {
@@ -69,7 +70,7 @@ main()
		th[i] = h;
	}

	for (i=0; 1<<i < Ntok; ++i);
	for (i=9; 1<<i < Ntok; ++i);
	M = 32 - i;

	for (;; --M) {
-- 
2.37.2

[PATCH 3/4] refine width of parsb/ub/sh/uh ops

Details
Message ID
<20220901214250.1701511-4-quentin@c9x.me>
In-Reply-To
<20220901214250.1701511-1-quentin@c9x.me> (view parent)
DKIM signature
missing
Download raw message
Patch: +2 -0
---
 ssa.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ssa.c b/ssa.c
index 2de02d1..126113d 100644
--- a/ssa.c
+++ b/ssa.c
@@ -77,6 +77,8 @@ filluse(Fn *fn)
			if (!req(i->to, R)) {
				assert(rtype(i->to) == RTmp);
				w = WFull;
				if (isparbh(i->op))
					w = Wsb + (i->op - Oparsb);
				if (isload(i->op) && i->op != Oload)
					w = Wsb + (i->op - Oloadsb);
				if (isext(i->op))
-- 
2.37.2

[PATCH 2/4] add new target-specific abi0 pass

Details
Message ID
<20220901214250.1701511-3-quentin@c9x.me>
In-Reply-To
<20220901214250.1701511-1-quentin@c9x.me> (view parent)
DKIM signature
missing
Download raw message
Patch: +43 -7
The general idea is to give abis a
chance to talk before we've done all
the optimizations. Currently, all
targets eliminate {par,arg,ret}{sb,ub,...}
during this pass. The forthcoming
arm64_apple will, however, insert
proper extensions during abi0.

Moving forward abis can, for example,
lower small-aggregates passing there
so that memory optimizations can
interact better with function calls.
---
 Makefile     |  4 ++--
 abi.c        | 25 +++++++++++++++++++++++++
 all.h        |  9 ++++++++-
 amd64/targ.c |  3 ++-
 arm64/targ.c |  3 ++-
 main.c       |  3 ++-
 rv64/targ.c  |  3 ++-
 7 files changed, 43 insertions(+), 7 deletions(-)
 create mode 100644 abi.c

diff --git a/Makefile b/Makefile
index 9c9b401..64878c7 100644
--- a/Makefile
+++ b/Makefile
@@ -4,8 +4,8 @@
PREFIX = /usr/local
BINDIR = $(PREFIX)/bin

COMMOBJ  = main.o util.o parse.o cfg.o mem.o ssa.o alias.o load.o copy.o \
           fold.o live.o spill.o rega.o emit.o
COMMOBJ  = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
           copy.o fold.o live.o spill.o rega.o emit.o
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
RV64OBJ  = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
diff --git a/abi.c b/abi.c
new file mode 100644
index 0000000..9c83497
--- /dev/null
+++ b/abi.c
@@ -0,0 +1,25 @@
#include "all.h"

/* eliminate sub-word abi op
 * variants for targets that
 * treat char/short/... as
 * words with arbitrary high
 * bits
 */
void
elimsb(Fn *fn)
{
	Blk *b;
	Ins *i;

	for (b=fn->start; b; b=b->link) {
		for (i=b->ins; i<&b->ins[b->nins]; i++) {
			if (isargbh(i->op))
				i->op = Oarg;
			if (isparbh(i->op))
				i->op = Opar;
		}
		if (isretbh(b->jmp.type))
			b->jmp.type = Jretw;
	}
}
diff --git a/all.h b/all.h
index d7b75b5..04050d4 100644
--- a/all.h
+++ b/all.h
@@ -52,7 +52,8 @@ struct Target {
	bits (*retregs)(Ref, int[2]);
	bits (*argregs)(Ref, int[2]);
	int (*memargs)(int);
	void (*abi)(Fn *);
	void (*abi0)(Fn *);
	void (*abi1)(Fn *);
	void (*isel)(Fn *);
	void (*emitfn)(Fn *, FILE *);
	void (*emitfin)(FILE *);
@@ -183,6 +184,9 @@ enum {
#define ispar(o) INRANGE(o, Opar, Opare)
#define isarg(o) INRANGE(o, Oarg, Oargv)
#define isret(j) INRANGE(j, Jretw, Jret0)
#define isparbh(o) INRANGE(o, Oparsb, Oparuh)
#define isargbh(o) INRANGE(o, Oargsb, Oarguh)
#define isretbh(j) INRANGE(j, Jretsb, Jretuh)

enum {
	Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
@@ -478,6 +482,9 @@ void printfn(Fn *, FILE *);
void printref(Ref, Fn *, FILE *);
void err(char *, ...) __attribute__((noreturn));

/* abi.c */
void elimsb(Fn *);

/* cfg.c */
Blk *blknew(void);
void edgedel(Blk *, Blk **);
diff --git a/amd64/targ.c b/amd64/targ.c
index e58ba2f..74fba4d 100644
--- a/amd64/targ.c
+++ b/amd64/targ.c
@@ -24,7 +24,8 @@ amd64_memargs(int op)
	.retregs = amd64_sysv_retregs, \
	.argregs = amd64_sysv_argregs, \
	.memargs = amd64_memargs, \
	.abi = amd64_sysv_abi, \
	.abi0 = elimsb, \
	.abi1 = amd64_sysv_abi, \
	.isel = amd64_isel, \

Target T_amd64_sysv = {
diff --git a/arm64/targ.c b/arm64/targ.c
index ddaee2f..6079236 100644
--- a/arm64/targ.c
+++ b/arm64/targ.c
@@ -38,7 +38,8 @@ Target T_arm64 = {
	.retregs = arm64_retregs,
	.argregs = arm64_argregs,
	.memargs = arm64_memargs,
	.abi = arm64_abi,
	.abi0 = elimsb,
	.abi1 = arm64_abi,
	.isel = arm64_isel,
	.emitfn = arm64_emitfn,
	.emitfin = elf_emitfin,
diff --git a/main.c b/main.c
index e82b062..253d0c5 100644
--- a/main.c
+++ b/main.c
@@ -56,6 +56,7 @@ func(Fn *fn)
		fprintf(stderr, "\n> After parsing:\n");
		printfn(fn, stderr);
	}
	T.abi0(fn);
	fillrpo(fn);
	fillpreds(fn);
	filluse(fn);
@@ -71,7 +72,7 @@ func(Fn *fn)
	copy(fn);
	filluse(fn);
	fold(fn);
	T.abi(fn);
	T.abi1(fn);
	fillpreds(fn);
	filluse(fn);
	T.isel(fn);
diff --git a/rv64/targ.c b/rv64/targ.c
index 70701db..c0e5e18 100644
--- a/rv64/targ.c
+++ b/rv64/targ.c
@@ -44,7 +44,8 @@ Target T_rv64 = {
	.retregs = rv64_retregs,
	.argregs = rv64_argregs,
	.memargs = rv64_memargs,
	.abi = rv64_abi,
	.abi0 = elimsb,
	.abi1 = rv64_abi,
	.isel = rv64_isel,
	.emitfn = rv64_emitfn,
	.emitfin = elf_emitfin,
-- 
2.37.2

[PATCH 4/4] new arm64_apple target

Details
Message ID
<20220901214250.1701511-5-quentin@c9x.me>
In-Reply-To
<20220901214250.1701511-1-quentin@c9x.me> (view parent)
DKIM signature
missing
Download raw message
Patch: +274 -73
Should make qbe work on apple
arm-based hardware.
---
 Makefile      |   9 +-
 arm64/abi.c   | 233 ++++++++++++++++++++++++++++++++++++++++++--------
 arm64/all.h   |   3 +
 arm64/emit.c  |  58 ++++++++-----
 arm64/targ.c  |  38 +++++---
 main.c        |   2 +
 test/dark.ssa |   2 +-
 tools/test.sh |   2 +-
 8 files changed, 274 insertions(+), 73 deletions(-)

diff --git a/Makefile b/Makefile
index 64878c7..674f850 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,14 @@ main.o: config.h
config.h:
	@case `uname` in                               \
	*Darwin*)                                      \
		echo "#define Deftgt T_amd64_apple";   \
		case `uname -m` in                     \
		*arm64*)                               \
			echo "#define Deftgt T_arm64_apple";\
			;;                             \
		*)                                     \
			echo "#define Deftgt T_amd64_apple";\
			;;                             \
		esac                                   \
		;;                                     \
	*)                                             \
		case `uname -m` in                     \
diff --git a/arm64/abi.c b/arm64/abi.c
index b2b5973..dfb95e3 100644
--- a/arm64/abi.c
+++ b/arm64/abi.c
@@ -1,5 +1,6 @@
#include "all.h"

typedef struct Abi Abi;
typedef struct Class Class;
typedef struct Insl Insl;
typedef struct Params Params;
@@ -9,6 +10,12 @@ enum {
	Cptr = 2, /* replaced by a pointer */
};

struct Abi {
	void (*vastart)(Fn *, Params, Ref);
	void (*vaarg)(Fn *, Blk *, Ins *);
	int apple;
};

struct Class {
	char class;
	char ishfa;
@@ -17,6 +24,7 @@ struct Class {
		uchar size;
	} hfa;
	uint size;
	uint align;
	Typ *t;
	uchar nreg;
	uchar ngp;
@@ -33,11 +41,15 @@ struct Insl {
struct Params {
	uint ngp;
	uint nfp;
	uint nstk;
	uint stk;
};

static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
static int store[] = {
	[Kw] = Ostorew, [Kl] = Ostorel,
	[Ks] = Ostores, [Kd] = Ostored
};

/* layout of call's second argument (RCall)
 *
@@ -92,9 +104,10 @@ typclass(Class *c, Typ *t, int *gp, int *fp)
	c->class = 0;
	c->ngp = 0;
	c->nfp = 0;
	c->align = 8;

	if (t->align > 4)
		err("alignments larger than 16 are not supported");
	if (t->align > 3)
		err("alignments larger than 8 are not supported");

	if (t->isdark || sz > 16 || sz == 0) {
		/* large structs are replaced by a
@@ -130,10 +143,6 @@ typclass(Class *c, Typ *t, int *gp, int *fp)
static void
sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
{
	static int st[] = {
		[Kw] = Ostorew, [Kl] = Ostorel,
		[Ks] = Ostores, [Kd] = Ostored
	};
	uint n;
	uint64_t off;
	Ref r;
@@ -143,7 +152,7 @@ sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
	for (n=0; n<nreg; n++) {
		tmp[n] = newtmp("abi", cls[n], fn);
		r = newtmp("abi", Kl, fn);
		emit(st[cls[n]], 0, R, tmp[n], r);
		emit(store[cls[n]], 0, R, tmp[n], r);
		emit(Oadd, Kl, r, mem, getcon(off, fn));
		off += KWIDE(cls[n]) ? 8 : 4;
	}
@@ -206,12 +215,13 @@ selret(Blk *b, Fn *fn)
}

static int
argsclass(Ins *i0, Ins *i1, Class *carg)
argsclass(Ins *i0, Ins *i1, Class *carg, int apple)
{
	int envc, ngp, nfp, *gp, *fp;
	int va, envc, ngp, nfp, *gp, *fp;
	Class *c;
	Ins *i;

	va = 0;
	envc = 0;
	gp = gpreg;
	fp = fpreg;
@@ -219,10 +229,32 @@ argsclass(Ins *i0, Ins *i1, Class *carg)
	nfp = 8;
	for (i=i0, c=carg; i<i1; i++, c++)
		switch (i->op) {
		case Oargsb:
		case Oargub:
		case Oparsb:
		case Oparub:
			c->size = 1;
			goto Scalar;
		case Oargsh:
		case Oarguh:
		case Oparsh:
		case Oparuh:
			c->size = 2;
			goto Scalar;
		case Opar:
		case Oarg:
			*c->cls = i->cls;
			c->size = 8;
			if (apple && !KWIDE(i->cls))
				c->size = 4;
		Scalar:
			c->align = c->size;
			*c->cls = i->cls;
			if (va) {
				c->class |= Cstk;
				c->size = 8;
				c->align = 8;
				break;
			}
			if (KBASE(i->cls) == 0 && ngp > 0) {
				ngp--;
				*c->reg = *gp++;
@@ -258,6 +290,7 @@ argsclass(Ins *i0, Ins *i1, Class *carg)
			envc = 1;
			break;
		case Oargv:
			va = apple != 0;
			break;
		default:
			die("unreachable");
@@ -327,18 +360,23 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
	*ilp = il;
}

static uint
align(uint x, uint al)
{
	return (x + al-1) & -al;
}

static void
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp, int apple)
{
	Ins *i;
	Class *ca, *c, cr;
	int cty;
	uint n;
	uint64_t stk, off;
	int op, cty;
	uint n, stk, off;;
	Ref r, rstk, tmp[4];

	ca = alloc((i1-i0) * sizeof ca[0]);
	cty = argsclass(i0, i1, ca);
	cty = argsclass(i0, i1, ca, apple);

	stk = 0;
	for (i=i0, c=ca; i<i1; i++, c++) {
@@ -347,10 +385,12 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
			stkblob(i->arg[0], c, fn, ilp);
			i->op = Oarg;
		}
		if (c->class & Cstk)
		if (c->class & Cstk) {
			stk = align(stk, c->align);
			stk += c->size;
		}
	}
	stk += stk & 15;
	stk = align(stk, 16);
	rstk = getcon(stk, fn);
	if (stk)
		emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
@@ -403,9 +443,16 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
	for (i=i0, c=ca; i<i1; i++, c++) {
		if ((c->class & Cstk) == 0)
			continue;
		if (i->op == Oarg) {
		off = align(off, c->align);
		if (i->op == Oarg || isargbh(i->op)) {
			r = newtmp("abi", Kl, fn);
			emit(Ostorel, 0, R, i->arg[0], r);
			switch (c->size) {
			case 1: op = Ostoreb; break;
			case 2: op = Ostoreh; break;
			case 4:
			case 8: op = store[*c->cls]; break;
			}
			emit(op, 0, R, i->arg[0], r);
			emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
		}
		if (i->op == Oargc)
@@ -421,18 +468,19 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
}

static Params
selpar(Fn *fn, Ins *i0, Ins *i1)
selpar(Fn *fn, Ins *i0, Ins *i1, int apple)
{
	Class *ca, *c, cr;
	Insl *il;
	Ins *i;
	int n, s, cty;
	int op, n, cty;
	uint off;
	Ref r, tmp[16], *t;

	ca = alloc((i1-i0) * sizeof ca[0]);
	curi = &insb[NIns];

	cty = argsclass(i0, i1, ca);
	cty = argsclass(i0, i1, ca, apple);
	fn->reg = arm64_argregs(CALL(cty), 0);

	il = 0;
@@ -457,26 +505,33 @@ selpar(Fn *fn, Ins *i0, Ins *i1)
	}

	t = tmp;
	s = 2;
	off = 0;
	for (i=i0, c=ca; i<i1; i++, c++)
		if (i->op == Oparc && !(c->class & Cptr)) {
			if (c->class & Cstk) {
				fn->tmp[i->to.val].slot = -s;
				s += c->size / 8;
				off = align(off, c->align);
				fn->tmp[i->to.val].slot = -(off+2);
				off += c->size;
			} else
				for (n=0; n<c->nreg; n++) {
					r = TMP(c->reg[n]);
					emit(Ocopy, c->cls[n], *t++, r, R);
				}
		} else if (c->class & Cstk) {
			emit(Oload, *c->cls, i->to, SLOT(-s), R);
			s++;
			/* todo, check Cptr && Oparc && Cstk */
			off = align(off, c->align);
			if (isparbh(i->op))
				op = Oloadsb + (i->op - Oparsb);
			else
				op = Oload;
			emit(op, *c->cls, i->to, SLOT(-(off+2)), R);
			off += c->size;
		} else {
			emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
		}

	return (Params){
		.nstk = s - 2,
		.stk = align(off, 8),
		.ngp = (cty >> 5) & 15,
		.nfp = (cty >> 9) & 15
	};
@@ -514,7 +569,24 @@ chpred(Blk *b, Blk *bp, Blk *bp1)
}

static void
selvaarg(Fn *fn, Blk *b, Ins *i)
apple_selvaarg(Fn *fn, Blk *b, Ins *i)
{
	Ref ap, stk, stk8, c8;

	(void)b;
	c8 = getcon(8, fn);
	ap = i->arg[0];
	stk8 = newtmp("abi", Kl, fn);
	stk = newtmp("abi", Kl, fn);

	emit(Ostorel, 0, R, stk8, ap);
	emit(Oadd, Kl, stk8, stk, c8);
	emit(Oload, i->cls, i->to, stk, R);
	emit(Oload, Kl, stk, ap, R);
}

static void
arm64_selvaarg(Fn *fn, Blk *b, Ins *i)
{
	Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
	Blk *b0, *bstk, *breg;
@@ -607,7 +679,21 @@ selvaarg(Fn *fn, Blk *b, Ins *i)
}

static void
selvastart(Fn *fn, Params p, Ref ap)
apple_selvastart(Fn *fn, Params p, Ref ap)
{
	Ref off, stk, arg;

	off = getcon(p.stk, fn);
	stk = newtmp("abi", Kl, fn);
	arg = newtmp("abi", Kl, fn);

	emit(Ostorel, 0, R, arg, ap);
	emit(Oadd, Kl, arg, stk, off);
	emit(Oaddr, Kl, stk, SLOT(-1), R);
}

static void
arm64_selvastart(Fn *fn, Params p, Ref ap)
{
	Ref r0, r1, rsave;

@@ -615,7 +701,7 @@ selvastart(Fn *fn, Params p, Ref ap)

	r0 = newtmp("abi", Kl, fn);
	emit(Ostorel, Kw, R, r0, ap);
	emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn));
	emit(Oadd, Kl, r0, rsave, getcon(p.stk + 192, fn));

	r0 = newtmp("abi", Kl, fn);
	r1 = newtmp("abi", Kl, fn);
@@ -639,8 +725,8 @@ selvastart(Fn *fn, Params p, Ref ap)
	emit(Oadd, Kl, r0, ap, getcon(28, fn));
}

void
arm64_abi(Fn *fn)
static void
abi(Fn *fn, Abi abi)
{
	Blk *b;
	Ins *i, *i0, *ip;
@@ -655,7 +741,7 @@ arm64_abi(Fn *fn)
	for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
		if (!ispar(i->op))
			break;
	p = selpar(fn, b->ins, i);
	p = selpar(fn, b->ins, i, abi.apple);
	n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
	i0 = alloc(n * sizeof(Ins));
	ip = icpy(ip = i0, curi, &insb[NIns] - curi);
@@ -682,14 +768,14 @@ arm64_abi(Fn *fn)
				for (i0=i; i0>b->ins; i0--)
					if (!isarg((i0-1)->op))
						break;
				selcall(fn, i0, i, &il);
				selcall(fn, i0, i, &il, abi.apple);
				i = i0;
				break;
			case Ovastart:
				selvastart(fn, p, i->arg[0]);
				abi.vastart(fn, p, i->arg[0]);
				break;
			case Ovaarg:
				selvaarg(fn, b, i);
				abi.vaarg(fn, b, i);
				break;
			case Oarg:
			case Oargc:
@@ -707,3 +793,74 @@ arm64_abi(Fn *fn)
		printfn(fn, stderr);
	}
}

void
arm64_abi(Fn *fn)
{
	abi(fn, (Abi){
		arm64_selvastart,
		arm64_selvaarg,
		0
	});
}

void
apple_abi(Fn *fn)
{
	abi(fn, (Abi){
		apple_selvastart,
		apple_selvaarg,
		1
	});
}

/* abi0 for apple target; introduces
 * necessery sign extension for arg
 * passing & returns
 */
void
apple_extsb(Fn *fn)
{
	Blk *b;
	Ins *i0, *i1, *i;
	int j, op;
	Ref r;

	for (b=fn->start; b; b=b->link) {
		curi = &insb[NIns];
		j = b->jmp.type;
		if (isretbh(j)) {
			r = newtmp("abi", Kw, fn);
			op = Oextsb + (j - Jretsb);
			emit(op, Kw, r, b->jmp.arg, R);
			b->jmp.arg = r;
		}
		for (i=&b->ins[b->nins]; i>b->ins;) {
			emiti(*--i);
			if (i->op != Ocall)
				continue;
			for (i0=i1=i; i0>b->ins; i0--)
				if (!isarg((i0-1)->op))
					break;
			for (i=i1; i>i0;) {
				emiti(*--i);
				if (isargbh(i->op)) {
					i->to = newtmp("abi", Kl, fn);
					curi->arg[0] = i->to;
				}
			}
			for (i=i1; i>i0;)
				if (isargbh((--i)->op)) {
					op = Oextsb + (i->op - Oargsb);
					emit(op, Kw, i->to, i->arg[0], R);
				}
		}
		b->nins = &insb[NIns] - curi;
		idup(&b->ins, curi, b->nins);
	}

	if (debug['A']) {
		fprintf(stderr, "\n> After apple_extsb:\n");
		printfn(fn, stderr);
	}
}
diff --git a/arm64/all.h b/arm64/all.h
index ff2b3ff..6b7f43e 100644
--- a/arm64/all.h
+++ b/arm64/all.h
@@ -28,6 +28,8 @@ extern int arm64_rclob[];
bits arm64_retregs(Ref, int[2]);
bits arm64_argregs(Ref, int[2]);
void arm64_abi(Fn *);
void apple_extsb(Fn *);
void apple_abi(Fn *);

/* isel.c */
int arm64_logimm(uint64_t, int);
@@ -35,3 +37,4 @@ void arm64_isel(Fn *);

/* emit.c */
void arm64_emitfn(Fn *, FILE *);
void apple_emitfn(Fn *, FILE *);
diff --git a/arm64/emit.c b/arm64/emit.c
index 55f5ce6..18c19d2 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -7,6 +7,7 @@ struct E {
	Fn *fn;
	uint64_t frame;
	uint padding;
	int apple;
};

#define CMP(X) \
@@ -144,10 +145,10 @@ slot(int s, E *e)
	if (s == -1)
		return 16 + e->frame;
	if (s < 0) {
		if (e->fn->vararg)
			return 16 + e->frame + 192 - (s+2)*8;
		if (e->fn->vararg && !e->apple)
			return 16 + e->frame + 192 - (s+2);
		else
			return 16 + e->frame - (s+2)*8;
			return 16 + e->frame - (s+2);
	} else
		return 16 + e->padding + 4 * s;
}
@@ -243,8 +244,16 @@ emitf(char *s, Ins *i, E *e)
}

static void
loadcon(Con *c, int r, int k, FILE *f)
loadcon(Con *c, int r, int k, E *e)
{
	static char *ldsym[][2] = {
		/* arm64 */
		[0][0] = "\tadrp\t%s, %s%s%s\n",
		[0][1] = "\tadd\t%s, %s, #:lo12:%s%s%s\n",
		/* apple */
		[1][0] = "\tadrp\t%s, %s%s@page%s\n",
		[1][1] = "\tadd\t%s, %s, %s%s@pageoff%s\n",
	};
	char *rn, *l, *p, off[32];
	int64_t n;
	int w, sh;
@@ -261,24 +270,22 @@ loadcon(Con *c, int r, int k, FILE *f)
			off[0] = 0;
		l = str(c->label);
		p = c->local ? T.asloc : l[0] == '"' ? "" : T.assym;
		fprintf(f, "\tadrp\t%s, %s%s%s\n",
			rn, p, l, off);
		fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n",
			rn, rn, p, l, off);
		fprintf(e->f, ldsym[e->apple][0], rn, p, l, off);
		fprintf(e->f, ldsym[e->apple][1], rn, rn, p, l, off);
		return;
	}
	assert(c->type == CBits);
	if (!w)
		n = (int32_t)n;
	if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
		fprintf(f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
		fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
	} else {
		fprintf(f, "\tmov\t%s, #%d\n",
		fprintf(e->f, "\tmov\t%s, #%d\n",
			rn, (int)(n & 0xffff));
		for (sh=16; n>>=16; sh+=16) {
			if ((!w && sh == 32) || sh == 64)
				break;
			fprintf(f, "\tmovk\t%s, #0x%x, lsl #%d\n",
			fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n",
				rn, (uint)(n & 0xffff), sh);
		}
	}
@@ -358,7 +365,7 @@ emitins(Ins *i, E *e)
		switch (rtype(i->arg[0])) {
		case RCon:
			c = &e->fn->con[i->arg[0].val];
			loadcon(c, i->to.val, i->cls, e->f);
			loadcon(c, i->to.val, i->cls, e);
			break;
		case RSlot:
			i->op = Oload;
@@ -450,8 +457,8 @@ framelayout(E *e)

*/

void
arm64_emitfn(Fn *fn, FILE *out)
static void
emitfn(E *e)
{
	static char *ctoa[] = {
	#define X(c, s) [c] = s,
@@ -463,13 +470,11 @@ arm64_emitfn(Fn *fn, FILE *out)
	uint64_t o;
	Blk *b, *t;
	Ins *i;
	E *e;

	emitlnk(fn->name, &fn->lnk, ".text", out);
	e = &(E){.f = out, .fn = fn};
	emitlnk(e->fn->name, &e->fn->lnk, ".text", e->f);
	framelayout(e);

	if (e->fn->vararg) {
	if (e->fn->vararg && !e->apple) {
		for (n=7; n>=0; n--)
			fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
		for (n=7; n>=0; n-=2)
@@ -531,7 +536,7 @@ arm64_emitfn(Fn *fn, FILE *out)
			if (e->fn->dynalloc)
				fputs("\tmov sp, x29\n", e->f);
			o = e->frame + 16;
			if (e->fn->vararg)
			if (e->fn->vararg && !e->apple)
				o += 192;
			if (o <= 504)
				fprintf(e->f,
@@ -589,5 +594,18 @@ arm64_emitfn(Fn *fn, FILE *out)
		}
	}
	id0 += e->fn->nblk;
	elf_emitfnfin(e->fn->name, e->f);
}

void
arm64_emitfn(Fn *fn, FILE *out)
{
	emitfn(&(E){.f = out, .fn = fn, .apple = 0});
	elf_emitfnfin(fn->name, out);
}

void
apple_emitfn(Fn *fn, FILE *out)
{
	fn->lnk.align = 4;
	emitfn(&(E){.f = out, .fn = fn, .apple = 1});
}
diff --git a/arm64/targ.c b/arm64/targ.c
index 6079236..88c40f1 100644
--- a/arm64/targ.c
+++ b/arm64/targ.c
@@ -25,25 +25,39 @@ arm64_memargs(int op)
	return 0;
}

#define ARM64_COMMON \
	.gpr0 = R0, \
	.ngpr = NGPR, \
	.fpr0 = V0, \
	.nfpr = NFPR, \
	.rglob = RGLOB, \
	.nrglob = 3, \
	.rsave = arm64_rsave, \
	.nrsave = {NGPS, NFPS}, \
	.retregs = arm64_retregs, \
	.argregs = arm64_argregs, \
	.memargs = arm64_memargs, \
	.isel = arm64_isel, \

Target T_arm64 = {
	.name = "arm64",
	.gpr0 = R0,
	.ngpr = NGPR,
	.fpr0 = V0,
	.nfpr = NFPR,
	.rglob = RGLOB,
	.nrglob = 3,
	.rsave = arm64_rsave,
	.nrsave = {NGPS, NFPS},
	.retregs = arm64_retregs,
	.argregs = arm64_argregs,
	.memargs = arm64_memargs,
	.abi0 = elimsb,
	.abi1 = arm64_abi,
	.isel = arm64_isel,
	.emitfn = arm64_emitfn,
	.emitfin = elf_emitfin,
	.asloc = ".L",
	ARM64_COMMON
};

Target T_arm64_apple = {
	.name = "arm64_apple",
	.abi0 = apple_extsb,
	.abi1 = apple_abi,
	.emitfn = apple_emitfn,
	.emitfin = macho_emitfin,
	.asloc = "L",
	.assym = "_",
	ARM64_COMMON
};

MAKESURE(globals_are_not_arguments,
diff --git a/main.c b/main.c
index 253d0c5..c028503 100644
--- a/main.c
+++ b/main.c
@@ -21,12 +21,14 @@ char debug['Z'+1] = {
extern Target T_amd64_sysv;
extern Target T_amd64_apple;
extern Target T_arm64;
extern Target T_arm64_apple;
extern Target T_rv64;

static Target *tlist[] = {
	&T_amd64_sysv,
	&T_amd64_apple,
	&T_arm64,
	&T_arm64_apple,
	&T_rv64,
	0
};
diff --git a/test/dark.ssa b/test/dark.ssa
index de58e4c..ed9ec21 100644
--- a/test/dark.ssa
+++ b/test/dark.ssa
@@ -1,4 +1,4 @@
# skip arm64 rv64
# skip arm64 arm64_apple rv64
# a hack example,
# we use a dark type to get
# a pointer to the stack.
diff --git a/tools/test.sh b/tools/test.sh
index 4653b83..9c0f9ee 100755
--- a/tools/test.sh
+++ b/tools/test.sh
@@ -70,7 +70,7 @@ init() {
	"")
		case `uname` in
		*Darwin*)
			cc="cc -Wl,-no_pie"
			cc="cc"
			;;
		*OpenBSD*)
			cc="cc -nopie"
-- 
2.37.2
Details
Message ID
<CMLE8FF97BMM.3QRLDQKVZDJE@taiga>
In-Reply-To
<20220901214250.1701511-1-quentin@c9x.me> (view parent)
DKIM signature
missing
Download raw message
On Thu Sep 1, 2022 at 11:42 PM CEST, Quentin Carbonneaux wrote:
> To wrap it up, I hope I demonstrated that, even if
> you don't care about the Apple support, the work that
> was done sets us up for new horizontal gains.

I don't care about Apple support, and I am indeed excited about these
improvements. Nice work.
Reply to thread Export thread (mbox)