~mpu/qbe

Fun experiment - simplify int mul/udiv/urem of 2^N into shl/shr/and. v1 PROPOSED

Roland Paterson-Jones: 1
 Fun experiment - simplify int mul/udiv/urem of 2^N into shl/shr/and.

 1 files changed, 57 insertions(+), 1 deletions(-)
Ok, doh!
Next
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~mpu/qbe/patches/53279/mbox | git am -3
Learn more about email & git

[PATCH] Fun experiment - simplify int mul/udiv/urem of 2^N into shl/shr/and. Export this patch

It works in the sense that it passes the "standard" test suite.

(cproc bootstrap, hare[c] make test, roland units, linpack/coremark run)

However linpack benchmark is now notably slower. Coremark is ~2% faster.

As noticed before, linmark timing is dubious, and maybe my cheap (AMD) laptop
prefers mul to shl.

I did struggle a lot with the *new semantics of static void ins() here as you
will see in the patch. Seems like a special case for blit?

Now I'm really struggling with the *new semantics. Will try to grok it but
this works only as fall-through into default:???
---
 simpl.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/simpl.c b/simpl.c
index 7001301..3089f09 100644
--- a/simpl.c
+++ b/simpl.c
@@ -30,15 +30,52 @@ blit(Ref sd[2], int sz, Fn *fn)
		}
}

static int
isconbits(Fn *fn, Ref r)
{
	return rtype(r) == RCon && fn->con[r.val].type == CBits;
}

static int
ispow2(uint64_t v)
{
	return (v & (v - 1)) == 0;
}

static int LOG2_TAB64[64] = {
	63,  0, 58,  1, 59, 47, 53,  2,
	60, 39, 48, 27, 54, 33, 42,  3,
	61, 51, 37, 40, 49, 18, 28, 20,
	55, 30, 34, 11, 43, 14, 22,  4,
	62, 57, 46, 52, 38, 26, 32, 41,
	50, 36, 17, 19, 29, 10, 13, 21,
	56, 45, 25, 31, 35, 16,  9, 12,
	44, 24, 15,  8, 23,  7,  6,  5};

/* caveat: only works on 64-bit systems */
static int log2_64 (uint64_t value)
{
	assert(sizeof(unsigned long) == 8);
	value |= value >> 1;
	value |= value >> 2;
	value |= value >> 4;
	value |= value >> 8;
	value |= value >> 16;
	value |= value >> 32;
	return LOG2_TAB64[((value - (value >> 1))*0x07EDD5E59A4E28C2UL) >> 58];
}

static void
ins(Ins **pi, int *new, Blk *b, Fn *fn)
{
	ulong ni;
	Con *c;
	Ins *i;
	int shift;

	i = *pi;
	/* simplify more instructions here;
	 * copy 0 into xor, mul 2^n into shift,
	 * copy 0 into xor, mul 2^n into shift (DONE),
	 * bit rotations, ... */
	switch (i->op) {
	case Oblit1:
@@ -54,6 +91,25 @@ ins(Ins **pi, int *new, Blk *b, Fn *fn)
		blit((i-1)->arg, rsval(i->arg[0]), fn);
		*pi = i-1;
		break;
	case Omul:
	case Oudiv:
	case Ourem:
		if (!KBASE(i->cls))
		if (isconbits(fn, i->arg[1])) {
			c = &fn->con[i->arg[1].val];
			if (ispow2(c->bits.i)) {
				shift = log2_64(c->bits.i);
				if (i->op == Ourem) {
					i->op = Oand;
					i->arg[1] = getcon((1ul << shift)-1, fn);
				} else {
					i->op = i->op == Omul ? Oshl : Oshr;
					i->arg[1] = getcon(shift, fn);
				}
			}
		}
		/* fall thru - and compiler warning for such */

	default:
		if (*new)
			emiti(*i);
-- 
2.34.1