~mpu/qbe

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
2 2

[PATCH] add field offset syntax for load and store

Details
Message ID
<20230724194512.15738-2-jonne@yyny.dev>
DKIM signature
missing
Download raw message
Patch: +54 -8
---
This has been my no. 1 pain point with QBE. Debugging invalid loads and
stores has caused me many problems because the add (and mul)
instructions to calculate the address are interleaved.
The syntax is limited to loads and stores because adding constants to
non-addresses is usually specified explicitly in the front-end.

+%b
+%b*N

would be nice too, but +N deals with the most common case of
a static field access, and this syntax is also already supported in
'data' definitions aswell.

 parse.c         | 36 ++++++++++++++++++++++++++++--------
 test/access.ssa | 26 ++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 8 deletions(-)
 create mode 100644 test/access.ssa

diff --git a/parse.c b/parse.c
index c54448f..3b74a97 100644
--- a/parse.c
+++ b/parse.c
@@ -399,16 +399,20 @@ tmpref(char *v)
}

static Ref
parseref()
parseref(int e)
{
	Con c;
	Ref r;

	memset(&c, 0, sizeof c);
	switch (next()) {
	default:
		return R;
	case Ttmp:
		return tmpref(tokval.str);
		r = tmpref(tokval.str);
		if (!e || peek() != Tplus)
			return r;
		goto Offset;
	case Tint:
		c.type = CBits;
		c.bits.i = tokval.num;
@@ -430,7 +434,23 @@ parseref()
	case Tglo:
		c.type = CAddr;
		c.sym.id = intern(tokval.str);
		break;
		if (!e || peek() != Tplus)
			break;
		r = newcon(&c, curf);
Offset:
		next();
		if (next() != Tint)
			err("invalid token after offset in ref");
		c.type = CBits;
		c.bits.i = tokval.num;
		if (curi - insb >= NIns)
			err("too many instructions");
		curi->op = Oadd;
		curi->cls = Kl;
		curi->to = newtmp("off", Kl, curf);
		curi->arg[0] = r;
		curi->arg[1] = newcon(&c, curf);
		return (curi++)->to;
	}
	return newcon(&c, curf);
}
@@ -510,7 +530,7 @@ parserefl(int arg)
			k = parsecls(&ty);
			break;
		}
		r = parseref();
		r = parseref(0);
		if (req(r, R))
			err("invalid argument");
		if (!arg && rtype(r) != RTmp)
@@ -622,7 +642,7 @@ parseline(PState ps)
		if (peek() == Tnl)
			curb->jmp.type = Jret0;
		else if (rcls != K0) {
			r = parseref();
			r = parseref(0);
			if (req(r, R))
				err("invalid return value");
			curb->jmp.arg = r;
@@ -633,7 +653,7 @@ parseline(PState ps)
		goto Jump;
	case Tjnz:
		curb->jmp.type = Jjnz;
		r = parseref();
		r = parseref(0);
		if (req(r, R))
			err("invalid argument for jnz jump");
		curb->jmp.arg = r;
@@ -662,7 +682,7 @@ parseline(PState ps)
	op = next();
DoOp:
	if (op == Tcall) {
		arg[0] = parseref();
		arg[0] = parseref(0);
		parserefl(1);
		op = Ocall;
		expect(Tnl);
@@ -694,7 +714,7 @@ DoOp:
				expect(Tlbl);
				blk[i] = findblk(tokval.str);
			}
			arg[i] = parseref();
			arg[i] = parseref((isstore(op) && i == 1) || isload(op));
			if (req(arg[i], R))
				err("invalid instruction argument");
			i++;
diff --git a/test/access.ssa b/test/access.ssa
new file mode 100644
index 0000000..3e050e8
--- /dev/null
+++ b/test/access.ssa
@@ -0,0 +1,26 @@
export function $main() {
@start
	%v =l alloc4 16
	storel 34, %v+0
	storel 35, %v+8

	%a =l loadl %v+0
	%b =l loadl %v+8
	%c =l add %a, %b
	call $printf(l $fmt, ..., l %a, l %b, l %c)

	%a =l loadl $v+0
	%b =l loadl $v+8
	%c =l add %a, %b
	call $printf(l $fmt, ..., l %a, l %b, l %c)

	ret
}

data $v = { l 199, l 221 }
data $fmt = { b "%d + %d = %d", b 10, b 0 }

# >>> output
# 34 + 35 = 69
# 199 + 221 = 420
# <<<
-- 
2.41.0
Details
Message ID
<a4d87354-08db-468b-b7da-80c5501414e9@app.fastmail.com>
In-Reply-To
<20230724194512.15738-2-jonne@yyny.dev> (view parent)
DKIM signature
missing
Download raw message
Hi,

That is not getting in qbe, or any variation on the theme. I'd like to keep syntactic sugar out of the IL and consider it rather like some human readable serialization format, faithfully reflecting the internal representation.

For your readability problem I suggest you have a single way of emitting loads and stores, and maybe add a comment in the generated IL that makes it clear for you what's happening.

Best
Details
Message ID
<73825aa0-a01b-9a01-961d-4912e888bb2f@yyny.dev>
In-Reply-To
<a4d87354-08db-468b-b7da-80c5501414e9@app.fastmail.com> (view parent)
DKIM signature
missing
Download raw message
 > I'd like to keep syntactic sugar out of the IL.

A respectable choice.

 > I suggest you have a single way of emitting loads and stores

I have exactly that, but they operate on syntax nodes. There are a few
places in the codegen that implicitly load a field.
(Examples: Bounds checking, tagged unions)

 > and maybe add a comment in the generated IL that makes it clear for
 > you what's happening.

I initially solved my problem by _removing_ all comments and rewriting
the loads/stores to encode their own offsets.
That made the problem clear to me, and hence the patch.
(Side note: debug info would have really helped as well)

---

Anyway, I have fixed (most) codegen bugs quite a while ago now, and my
codegen does not produce QBE syntax, but a QBE compatible representation
(to allow for compile time execution, bounds checking, etc.).
I can easily continue doing the syntactical sugar on my end, and
although I think this syntactical sugar might help people, I respect
your choice to keep the QBE IR simple, that is after all why I chose to
use it in the first place :)
Reply to thread Export thread (mbox)