This thread contains a patchset. You're looking at the original emails,
but you may wish to use the patch review UI.
Review patch
4
3
[PATCH qbe v2 1/2] implement unsigned -> float casts
amd64 lacks an instruction for this so it has to be implemented with
signed -> float casts:
- Word casting is done by zero-extending the word to a long and then doing
a regular signed cast.
- Long casting is done by dividing by two with correct rounding if the
highest bit is set and casting that to float, then adding
1 to mantissa with integer addition
---
v1 -> v2:
Error reporting logic in test/fpcnv was wrong. I also made it a bit
more informative. Sorry for the noise.
amd64/isel.c | 45 +++++++++++++++++++++++++++++++++++++++++++ --
arm64/emit.c | 2 ++
doc/il.txt | 10 +++++++ ---
fold.c | 4 ++++
ops.h | 2 ++
test/fpcnv.ssa | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 107 insertions(+), 5 deletions(-)
diff --git a/amd64/isel.c b/amd64/isel.c
index 607c176..e2d0443 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -204,8 +204,8 @@ selcmp(Ref arg[2], int k, int swap, Fn *fn)
static void
sel(Ins i, ANum *an, Fn *fn)
{
- Ref r0, r1;
- int x, k, kc, swap;
+ Ref r0, r1, tmp[7];
+ int x, j, k, kc, swap;
int64_t sz;
Ins *i0, *i1;
@@ -269,6 +269,47 @@ sel(Ins i, ANum *an, Fn *fn)
emit(Ocopy, Kw, TMP(RCX), r0, R);
fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
break;
+ case Ouwtof:
+ r0 = newtmp("utof", Kl, fn);
+ emit(Osltof, k, i.to, r0, R);
+ emit(Oextuw, Kl, r0, i.arg[0], R);
+ fixarg(&curi->arg[0], k, curi, fn);
+ break;
+ case Oultof:
+ /*
+ %mask =l and %arg.0, 1
+ %isbig =l shr %arg.0, 63
+ %divided =l shr %arg.0, %isbig
+ %or =l or %mask, %divided
+ %float =d sltof %or
+ %cast =l cast %float
+ %addend =l shl %isbig, 52
+ %sum =l add %cast, %addend
+ %result =d cast %sum
+ */
+ r0 = newtmp("utof", k, fn);
+ if (k == Ks)
+ kc = Kw;
+ else
+ kc = Kl;
+ for (j=0; j<4; j++)
+ tmp[j] = newtmp("utof", Kl, fn);
+ for (; j<7; j++)
+ tmp[j] = newtmp("utof", kc, fn);
+ emit(Ocast, k, i.to, tmp[6], R);
+ emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
+ emit(Oshl, kc, tmp[5], tmp[1], getcon(k == Ks ? 23 : 52, fn));
+ emit(Ocast, kc, tmp[4], r0, R);
+
+ emit(Osltof, k, r0, tmp[3], R);
+ emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
+ emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
+ sel(*curi++, an, fn);
+ emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
+ fixarg(&curi->arg[0], Kl, curi, fn);
+ emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
+ fixarg(&curi->arg[0], Kl, curi, fn);
+ break;
case Onop:
break;
case Ostored:
diff --git a/arm64/emit.c b/arm64/emit.c
index 3c27cd8..3903a7f 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -89,7 +89,9 @@ static struct {
{ Ostosi, Ka, "fcvtzs %=, %S0" },
{ Odtosi, Ka, "fcvtzs %=, %D0" },
{ Oswtof, Ka, "scvtf %=, %W0" },
+ { Ouwtof, Ka, "ucvtf %=, %W0" },
{ Osltof, Ka, "scvtf %=, %L0" },
+ { Oultof, Ka, "ucvtf %=, %L0" },
{ Ocall, Kw, "blr %L0" },
{ Oacmp, Ki, "cmp %0, %1" },
diff --git a/doc/il.txt b/doc/il.txt
index df6d07c..bce938f 100644
--- a/doc/il.txt
+++ b/doc/il.txt
@@ -714,7 +714,9 @@ or convert a floating point into an integer and vice versa.
* `stosi` -- `I(ss)`
* `dtosi` -- `I(dd)`
* `swtof` -- `F(ww)`
+ * `uwtof` -- `F(ww)`
* `sltof` -- `F(ll)`
+ * `ultof` -- `F(ll)`
Extending the precision of a temporary is done using the
`ext` family of instructions. Because QBE types do not
@@ -733,9 +735,9 @@ zero.
Converting between signed integers and floating points is
done using `stosi` (single to signed integer), `dtosi`
(double to signed integer), `swtof` (signed word to float),
- and `sltof` (signed long to float). These instructions
- only handle signed integers, conversion to and from
- unsigned types are not yet supported.
+ `uwtof` (unsigned word to float), `sltof` (signed long
+ to float) and `ultof` (unsigned long to float). Conversion
+ from unsigned types is not yet supported.
Because of <@ Subtyping >, there is no need to have an
instruction to lower the precision of an integer temporary.
@@ -1006,8 +1008,10 @@ instructions unless you know exactly what you are doing.
* `extuh`
* `extuw`
* `sltof`
+ * `ultof`
* `stosi`
* `swtof`
+ * `uwtof`
* `truncd`
* <@ Cast and Copy > :
diff --git a/fold.c b/fold.c
index 50a862e..f7dbc4e 100644
--- a/fold.c
+++ b/fold.c
@@ -467,7 +467,9 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
case Odiv: xd = ld / rd; break;
case Omul: xd = ld * rd; break;
case Oswtof: xd = (int32_t)cl->bits.i; break;
+ case Ouwtof: xd = (uint32_t)cl->bits.i; break;
case Osltof: xd = (int64_t)cl->bits.i; break;
+ case Oultof: xd = (uint64_t)cl->bits.i; break;
case Oexts: xd = cl->bits.s; break;
case Ocast: xd = ld; break;
default: die("unreachable");
@@ -483,7 +485,9 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
case Odiv: xs = ls / rs; break;
case Omul: xs = ls * rs; break;
case Oswtof: xs = (int32_t)cl->bits.i; break;
+ case Ouwtof: xs = (uint32_t)cl->bits.i; break;
case Osltof: xs = (int64_t)cl->bits.i; break;
+ case Oultof: xs = (uint64_t)cl->bits.i; break;
case Otruncd: xs = cl->bits.d; break;
case Ocast: xs = ls; break;
default: die("unreachable");
diff --git a/ops.h b/ops.h
index 114310c..809b208 100644
--- a/ops.h
+++ b/ops.h
@@ -97,7 +97,9 @@ O(truncd, T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
O(stosi, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
O(dtosi, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
O(swtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
+ O(uwtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
O(sltof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
+ O(ultof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
O(cast, T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1)
/* Stack Allocation */
diff --git a/test/fpcnv.ssa b/test/fpcnv.ssa
index d9851d8..4dac489 100644
--- a/test/fpcnv.ssa
+++ b/test/fpcnv.ssa
@@ -17,13 +17,62 @@ function d $ftrunc(d %f) {
ret %rt
}
+ export
+ function s $wtos(w %w) {
+ @start
+ %rt =s uwtof %w
+ ret %rt
+ }
+ export
+ function d $wtod(w %w) {
+ @start
+ %rt =d uwtof %w
+ ret %rt
+ }
+
+ export
+ function s $ltos(l %l) {
+ @start
+ %rt =s ultof %l
+ ret %rt
+ }
+ export
+ function d $ltod(l %l) {
+ @start
+ %rt =d ultof %l
+ ret %rt
+ }
+
# >>> driver
# extern float fneg(float);
# extern double ftrunc(double);
+ #
+ # extern float wtos(unsigned int);
+ # extern double wtod(unsigned int);
+ # extern float ltos(long long unsigned int);
+ # extern double ltod(long long unsigned int);
+ #
+ # unsigned long long iin[] = { 0, 1, 16, 234987, 427386245, 0x7fff0000,
+ # 0xffff0000, 23602938196141, 72259248152500195, 9589010795705032704ull,
+ # 0xdcf5fbe299d0148aull, 0xffffffff00000000ull, -1 };
+ #
# int main() {
+ # int i;
+ #
# if (fneg(1.23f) != -1.23f) return 1;
# if (ftrunc(3.1415) != 3.0) return 2;
# if (ftrunc(-1.234) != -1.0) return 3;
+ #
+ # for (i=0; i<sizeof(iin)/sizeof(iin[0]); i++) {
+ # if (wtos(iin[i]) != (float) (unsigned int)iin[i])
+ # return 4;
+ # if (wtod(iin[i]) != (double)(unsigned int)iin[i])
+ # return 5;
+ # if (ltos(iin[i]) != (float) iin[i])
+ # return 6;
+ # if (ltod(iin[i]) != (double)iin[i])
+ # return 7;
+ # }
# return 0;
# }
# <<<
--
2.34.1
[PATCH qbe v2 2/2] implement float -> unsigned casts
amd64 lacks instruction for this so it has to be implemented with
float -> signed casts. The approach is borrowed from llvm.
---
v1 -> v2:
Error reporting logic in test/fpcnv.ssa was wrong. I also made it a
bit more informative.
amd64/isel.c | 26 +++++++++++++++++++++++++
arm64/emit.c | 2 ++
doc/il.txt | 16 ++++++++++ ------
fold.c | 2 ++
ops.h | 2 ++
test/fpcnv.ssa | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 93 insertions(+), 6 deletions(-)
diff --git a/amd64/isel.c b/amd64/isel.c
index e2d0443..826cb2d 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -310,6 +310,32 @@ sel(Ins i, ANum *an, Fn *fn)
emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
fixarg(&curi->arg[0], Kl, curi, fn);
break;
+ case Ostoui:
+ i.op = Ostosi;
+ r0 = newtmp("ftou", Ks, fn);
+ goto Oftoui;
+ case Odtoui:
+ i.op = Odtosi;
+ r0 = newtmp("ftou", Kd, fn);
+ Oftoui:
+ if (k == Kw) {
+ goto Emit;
+ }
+ for (j=0; j<4; j++)
+ tmp[j] = newtmp("ftou", Kl, fn);
+ emit(Oor, Kl, i.to, tmp[0], tmp[3]);
+ emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
+ emit(i.op, Kl, tmp[2], r0, R);
+ if (i.op == Ostosi)
+ emit(Oadd, Ks, r0, getcon(0xdf000000, fn), i.arg[0]);
+ else
+ emit(Oadd, Kd, r0, getcon(0xc3e0000000000000, fn), i.arg[0]);
+ i1 = curi;
+ fixarg(&i1->arg[0], i.op == Ostosi ? Ks : Kd, i1, fn);
+ fixarg(&i1->arg[1], i.op == Ostosi ? Ks : Kd, i1, fn);
+ emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
+ emit(i.op, Kl, tmp[0], i.arg[0], R);
+ fixarg(&curi->arg[0], Kl, curi, fn);
case Onop:
break;
case Ostored:
diff --git a/arm64/emit.c b/arm64/emit.c
index 3903a7f..be0318a 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -87,7 +87,9 @@ static struct {
{ Ocast, Ks, "fmov %=, %W0" },
{ Ocast, Kd, "fmov %=, %L0" },
{ Ostosi, Ka, "fcvtzs %=, %S0" },
+ { Ostoui, Ka, "fcvtzu %=, %S0" },
{ Odtosi, Ka, "fcvtzs %=, %D0" },
+ { Odtoui, Ka, "fcvtzu %=, %D0" },
{ Oswtof, Ka, "scvtf %=, %W0" },
{ Ouwtof, Ka, "ucvtf %=, %W0" },
{ Osltof, Ka, "scvtf %=, %L0" },
diff --git a/doc/il.txt b/doc/il.txt
index bce938f..3ab2549 100644
--- a/doc/il.txt
+++ b/doc/il.txt
@@ -712,7 +712,9 @@ or convert a floating point into an integer and vice versa.
* `exts` -- `d(s)`
* `truncd` -- `s(d)`
* `stosi` -- `I(ss)`
+ * `stoui` -- `I(ss)`
* `dtosi` -- `I(dd)`
+ * `dtoui` -- `I(dd)`
* `swtof` -- `F(ww)`
* `uwtof` -- `F(ww)`
* `sltof` -- `F(ll)`
@@ -732,12 +734,12 @@ argument of `truncd` cannot be represented as a
single-precision floating point, it is truncated towards
zero.
- Converting between signed integers and floating points is
- done using `stosi` (single to signed integer), `dtosi`
- (double to signed integer), `swtof` (signed word to float),
- `uwtof` (unsigned word to float), `sltof` (signed long
- to float) and `ultof` (unsigned long to float). Conversion
- from unsigned types is not yet supported.
+ Converting between signed integers and floating points is done
+ using `stosi` (single to signed integer), `stoui` (single to
+ unsigned integer`, `dtosi` (double to signed integer), `dtoui`
+ (double to unsigned integer), `swtof` (signed word to float),
+ `uwtof` (unsigned word to float), `sltof` (signed long to
+ float) and `ultof` (unsigned long to float).
Because of <@ Subtyping >, there is no need to have an
instruction to lower the precision of an integer temporary.
@@ -1000,6 +1002,7 @@ instructions unless you know exactly what you are doing.
* <@ Conversions >:
* `dtosi`
+ * `dtoui`
* `exts`
* `extsb`
* `extsh`
@@ -1010,6 +1013,7 @@ instructions unless you know exactly what you are doing.
* `sltof`
* `ultof`
* `stosi`
+ * `stoui`
* `swtof`
* `uwtof`
* `truncd`
diff --git a/fold.c b/fold.c
index f7dbc4e..072051f 100644
--- a/fold.c
+++ b/fold.c
@@ -386,7 +386,9 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
case Oextsw: x = (int32_t)l.u; break;
case Oextuw: x = (uint32_t)l.u; break;
case Ostosi: x = w ? (int64_t)cl->bits.s : (int32_t)cl->bits.s; break;
+ case Ostoui: x = w ? (uint64_t)cl->bits.s : (uint32_t)cl->bits.s; break;
case Odtosi: x = w ? (int64_t)cl->bits.d : (int32_t)cl->bits.d; break;
+ case Odtoui: x = w ? (uint64_t)cl->bits.d : (uint32_t)cl->bits.d; break;
case Ocast:
x = l.u;
if (cl->type == CAddr) {
diff --git a/ops.h b/ops.h
index 809b208..04f5b7d 100644
--- a/ops.h
+++ b/ops.h
@@ -95,7 +95,9 @@ O(extuw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
O(exts, T(e,e,e,s, e,e,e,x), 1) X(0, 0, 1)
O(truncd, T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
O(stosi, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
+ O(stoui, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
O(dtosi, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
+ O(dtoui, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
O(swtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
O(uwtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
O(sltof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
diff --git a/test/fpcnv.ssa b/test/fpcnv.ssa
index 4dac489..3036168 100644
--- a/test/fpcnv.ssa
+++ b/test/fpcnv.ssa
@@ -43,7 +43,37 @@ function d $ltod(l %l) {
ret %rt
}
+ export
+ function w $stow(s %f) {
+ @start
+ %rt =w stoui %f
+ ret %rt
+ }
+ export
+ function w $dtow(d %f) {
+ @start
+ %rt =w dtoui %f
+ ret %rt
+ }
+
+ export
+ function l $stol(s %f) {
+ @start
+ %rt =l stoui %f
+ ret %rt
+ }
+ export
+ function l $dtol(d %f) {
+ @start
+ %rt =l dtoui %f
+ ret %rt
+ }
+
+
+
# >>> driver
+ # #include<limits.h>
+ #
# extern float fneg(float);
# extern double ftrunc(double);
#
@@ -52,10 +82,19 @@ function d $ltod(l %l) {
# extern float ltos(long long unsigned int);
# extern double ltod(long long unsigned int);
#
+ # extern unsigned int stow(float);
+ # extern unsigned int dtow(double);
+ # extern unsigned long long stol(float);
+ # extern unsigned long long dtol(double);
+ #
# unsigned long long iin[] = { 0, 1, 16, 234987, 427386245, 0x7fff0000,
# 0xffff0000, 23602938196141, 72259248152500195, 9589010795705032704ull,
# 0xdcf5fbe299d0148aull, 0xffffffff00000000ull, -1 };
#
+ # double fin[] = { 0.17346516197824458, 442.0760005466251, 4342856.879893436,
+ # 98547543006.49626, 236003043787688.3, 9.499222733527032e+18,
+ # 1.1936266170755652e+19 };
+ #
# int main() {
# int i;
#
@@ -72,6 +111,18 @@ function d $ltod(l %l) {
# return 6;
# if (ltod(iin[i]) != (double)iin[i])
# return 7;
+ # }
+ # for (i=0; i<sizeof(fin)/sizeof(fin[0]); i++) {
+ # if (stol((float)fin[i]) != (unsigned long long)(float)fin[i])
+ # return 8;
+ # if (dtol(fin[i]) != (unsigned long long)fin[i])
+ # return 9;
+ # if((unsigned long long)fin[i] > UINT_MAX)
+ # continue;
+ # if (stow((float)fin[i]) != (unsigned int)(float)fin[i])
+ # return 10;
+ # if (dtow(fin[i]) != (unsigned int)fin[i])
+ # return 11;
# }
# return 0;
# }
--
2.34.1
Re: [PATCH qbe v2 2/2] implement float -> unsigned casts
Thanks, I applied and pushed both of your patches!
Re: [PATCH qbe v2 2/2] implement float -> unsigned casts
Bor Grošelj Simić <bor.groseljsimic@telemach.net> wrote:
> amd64 lacks instruction for this so it has to be implemented with
> float -> signed casts. The approach is borrowed from llvm.
A cproc user recently reported an issue[0] with double to unsigned
conversion. Here's a simple reproducer:
data $x = align 8 { d d_0xffffffff }
data $fmt = align 1 { b "%x\n\000" }
export
function w $main() {
@start
%dx =d loadd $x
%ux =w dtoui %dx
%r =w call $printf(l $fmt, ..., w %ux)
ret 0
}
This should print 0xffffffff since this value can be represented
in both double and unsigned. I confirmed that after reverting the
cproc commit[1] that switched to dtoui from its own routine, it
prints the correct result.
Looking into the qbe code a little bit, I think it just assumes
that for dtoui/ftoui, if the result class is Kw, it can just change
the instruction to dtosi/stosi. However, this doesn't work for any
value larger than INT_MAX but still within UINT_MAX, and I noticed
that fpcnv doesn't test any values in this range.
In cproc for these types of conversions I just did a conversion
from float/double to a temporary with class l, and then used that
as a w result, since signed 64-bit can represent all values of
unsigned 32-bit. I think the same can be done here, and below is a
patch (apply with --scissor) that seems to work. I'm not sure if
it's ok to just change i.cls like this, or if an additional copy
instruction is needed.
[0] https://github.com/oasislinux/oasis/issues/63
[1] https://git.sr.ht/~mcf/cproc/commit/5dcd9299333326013be09062029e59c51b097786
-- >8 --
From: Michael Forney <mforney@mforney.org>
Date: Fri, 25 Aug 2023 15:04:47 -0700
Subject: [PATCH] Fix conversion from float/double to unsigned int
signed int can't represent all the values of unsigned int, so we
need to do the conversion to signed long, and use the lower 32 bits
as the result.
---
amd64/isel.c | 2 ++
test/fpcnv.ssa | 2 + -
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/amd64/isel.c b/amd64/isel.c
index 0d2affc..8f16c3c 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -349,11 +349,13 @@ sel(Ins i, ANum *an, Fn *fn)
break;
case Ostoui:
i.op = Ostosi;
+ i.cls = Kl;
kc = Ks;
tmp[4] = getcon(0xdf000000, fn);
goto Oftoui;
case Odtoui:
i.op = Odtosi;
+ i.cls = Kl;
kc = Kd;
tmp[4] = getcon(0xc3e0000000000000, fn);
Oftoui:
diff --git a/test/fpcnv.ssa b/test/fpcnv.ssa
index aea67ac..3466ed2 100644
--- a/test/fpcnv.ssa
+++ b/test/fpcnv.ssa
@@ -92,7 +92,7 @@ function l $dtol(d %f) {
# 0xdcf5fbe299d0148aull, 0xffffffff00000000ull, -1 };
#
# double fin[] = { 0.17346516197824458, 442.0760005466251, 4342856.879893436,
- # 98547543006.49626, 236003043787688.3, 9.499222733527032e+18,
+ # 4294967295.0, 98547543006.49626, 236003043787688.3, 9.499222733527032e+18,
# 1.1936266170755652e+19 };
#
# int main() {
--
2.37.3
Re: [PATCH qbe v2 2/2] implement float -> unsigned casts
Thanks for root-causing that and providing a fix.
On Sat, Aug 26, 2023, at 00:18, Michael Forney wrote:
> Looking into the qbe code a little bit, I think it just assumes
> that for dtoui/ftoui, if the result class is Kw, it can just change
> the instruction to dtosi/stosi. However, this doesn't work for any
> value larger than INT_MAX but still within UINT_MAX, and I noticed
> that fpcnv doesn't test any values in this range.
Hm, yes that seems to be the issue.
> I'm not sure if
> it's ok to just change i.cls like this, or if an additional copy
> instruction is needed.
I did not really like that Tmp.cls becomes out of sync
with the defining instruction, so I just inserted a copy
to a new Kl temporary. See master's latest commit:
https://c9x.me/git/qbe.git/commit/?id=d6c9669c3c83ce9f570a9c3528110b666aba88f2