Drew DeVault: 1 Implement thread-local storage 13 files changed, 107 insertions(+), 13 deletions(-)
Thanks Michael for the review. Many of your points coincide with what I had in mind. And of course, thanks Drew for the proposal! Thread-local storage is a sensible addition to qbe.
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~mpu/qbe/patches/34327/mbox | git am -3Learn more about email & git
--- alias.c | 3 +++ all.h | 3 +++ amd64/emit.c | 7 ++++++- amd64/isel.c | 1 + arm64/emit.c | 13 ++++++++++++- doc/il.txt | 17 +++++++++++++---- gas.c | 6 ++++++ load.c | 3 ++- parse.c | 24 ++++++++++++++++++++++++ rv64/emit.c | 8 ++++++++ test/tls.ssa | 23 +++++++++++++++++++++++ tools/lexh.c | 2 +- tools/test.sh | 10 +++++----- 13 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 test/tls.ssa diff --git a/alias.c b/alias.c index aa3846e..c1e05df 100644 --- a/alias.c +++ b/alias.c @@ -19,6 +19,9 @@ getalias(Alias *a, Ref r, Fn *fn) if (c->type == CAddr) { a->type = ASym; a->label = c->label; + } else if (c->type == CThreadLocal) { + a->type = ATLS; + a->label = c->label; } else a->type = ACon; a->offset = c->bits.i; diff --git a/all.h b/all.h index c8ec93c..47da8b2 100644 --- a/all.h +++ b/all.h @@ -269,6 +269,7 @@ struct Alias { AEsc = 3, /* stack escaping */ ASym = 4, AUnk = 6, + ATLS = 7, #define astack(t) ((t) & 1) } type; Ref base; @@ -309,6 +310,7 @@ struct Con { CUndef, CBits, CAddr, + CThreadLocal, } type; uint32_t label; union { @@ -334,6 +336,7 @@ struct Lnk { char align; char *sec; char *secf; + char istls; }; struct Fn { diff --git a/amd64/emit.c b/amd64/emit.c index b8e9e8e..9870e0b 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -171,6 +171,10 @@ emitcon(Con *con, FILE *f) if (con->bits.i) fprintf(f, "%+"PRId64, con->bits.i); break; + case CThreadLocal: + l = str(con->label); + fprintf(f, "%%fs:%s@tpoff", l); + break; case CBits: fprintf(f, "%"PRId64, con->bits.i); break; @@ -306,7 +310,8 @@ Next: fputc(')', f); break; case RCon: - fputc('$', f); + if (fn->con[ref.val].type != CThreadLocal) + fputc('$', f); emitcon(&fn->con[ref.val], f); break; default: diff --git a/amd64/isel.c b/amd64/isel.c index 5a64429..592f7fc 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -36,6 +36,7 @@ noimm(Ref r, Fn *fn) return 0; switch (fn->con[r.val].type) { case CAddr: + case CThreadLocal: /* we only support the 'small' * code model of the ABI, this * means that we can always diff --git a/arm64/emit.c b/arm64/emit.c index 2506eea..543c933 100644 --- a/arm64/emit.c +++ b/arm64/emit.c @@ -252,7 +252,8 @@ loadcon(Con *c, int r, int k, FILE *f) w = KWIDE(k); rn = rname(r, k); n = c->bits.i; - if (c->type == CAddr) { + switch (c->type) { + case CAddr: rn = rname(r, Kl); if (n) sprintf(off, "+%"PRIi64, n); @@ -264,6 +265,16 @@ loadcon(Con *c, int r, int k, FILE *f) fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n", rn, rn, p, str(c->label), off); return; + case CThreadLocal: + rn = rname(r, Kl); + fprintf(f, "\tmrs\t%s, tpidr_el0\n", rn); + fprintf(f, "\tadd\t%s, %s, #:tprel_hi12:%s, lsl #12\n", + rn, rn, str(c->label)); + fprintf(f, "\tadd\t%s, %s, #:tprel_lo12_nc:%s\n", + rn, rn, str(c->label)); + return; + default: + break; } assert(c->type == CBits); if (!w) diff --git a/doc/il.txt b/doc/il.txt index 308fe45..f2b92fa 100644 --- a/doc/il.txt +++ b/doc/il.txt @@ -173,10 +173,11 @@ by zero-extension, or by sign-extension. `bnf CONST := - ['-'] NUMBER # Decimal integer - | 's_' FP # Single-precision float - | 'd_' FP # Double-precision float - | $IDENT # Global symbol + ['-'] NUMBER # Decimal integer + | 's_' FP # Single-precision float + | 'd_' FP # Double-precision float + | $IDENT # Global symbol + | threadlocal $IDENT # Thread-local global symbol Throughout the IL, constants are specified with a unified syntax and semantics. Constants are immediates, meaning
I was thinking it might be better to make this CONST := ... | LINKAGE* $IDENT rather than add a special case for threadlocal. This would also be useful for plan9 support, so we could distinguish between `$x` (static) and `export $x` (global).
@@ -212,6 +213,9 @@ Global symbols can also be used directly as constants; they will be resolved and turned into actual numeric constants by the linker. +Prefixing a global symbol with threadlocal will cause its +address to be loaded from thread-local storage. + - 4. Linkage ------------ @@ -220,6 +224,7 @@ constants by the linker. 'export' [NL] | 'section' SECNAME [NL] | 'section' SECNAME SECFLAGS [NL] + | 'threadlocal' SECNAME := '"' .... '"' SECFLAGS := '"' .... '"' @@ -253,6 +258,10 @@ The section and export linkage flags should each appear at most once in a definition. If multiple occurrences are present, QBE is free to use any. +The threadlocal flag is shorthand for loading this symbol +into the appropriate section, .tdata or .tbss, with the +appropriate flags set. + - 5. Definitions ---------------- diff --git a/gas.c b/gas.c index 865edba..92ffd7f 100644 --- a/gas.c +++ b/gas.c @@ -29,6 +29,12 @@ gasemitlnk(char *n, Lnk *l, char *s, FILE *f) fprintf(f, ".section %s", l->sec); if (l->secf) fprintf(f, ", %s", l->secf); + } else if (l->istls) { + if (strcmp(s, ".bss") == 0) { + fprintf(f, ".section .tbss,\"awT\""); + } else { + fprintf(f, ".section .tdata,\"awT\""); + } } else { fputs(s, f); }
Instead of passing the strings ".bss" and ".data" and checking which one with a strcmp, I think we should change the `char *s` argument to a boolean `int bss`, and changing the else to `bss ? ".bss" : ".data"`.
diff --git a/load.c b/load.c index 2e10a35..1a1084a 100644 --- a/load.c +++ b/load.c @@ -152,7 +152,8 @@ load(Slice sl, bits msk, Loc *l) break; case ACon: case ASym: - c.type = CAddr; + case ATLS: + c.type = a->type == ATLS ? CThreadLocal : CAddr; c.label = a->label; c.bits.i = a->offset; c.local = 0;
Elsewhere in this file there is another switch that will die("unreachable") on ATLS. Is this codepath never expected to execute?
diff --git a/parse.c b/parse.c index 1912c8b..5e09a21 100644 --- a/parse.c +++ b/parse.c @@ -42,6 +42,7 @@ enum { Ttype, Tdata, Tsection, + Tthreadlocal, Talign, Tl, Tw, @@ -92,6 +93,7 @@ static char *kwmap[Ntok] = { [Ttype] = "type", [Tdata] = "data", [Tsection] = "section", + [Tthreadlocal] = "threadlocal", [Talign] = "align", [Tl] = "l", [Tw] = "w", @@ -403,6 +405,13 @@ parseref() case Tglo: c.type = CAddr; c.label = intern(tokval.str); + goto Look; + case Tthreadlocal: + c.type = CThreadLocal; + if (next() != Tglo) + err("function name expected"); + c.label = intern(tokval.str); + goto Look; Look: return newcon(&c, curf); default: @@ -798,6 +807,9 @@ parsefn(Lnk *lnk) int i; PState ps; + if (lnk->istls) + err("TLS is not supported for function declarations");
Maybe "threadlocal is not supported in function definitions"?
+ curb = 0; nblk = 0; curi = insb; @@ -1073,6 +1085,8 @@ parselnk(Lnk *lnk) case Tsection: if (lnk->sec) err("only one section allowed"); + if (lnk->istls) + err("threadlocal and section cannot co-exist"); if (next() != Tstr) err("section \"name\" expected"); lnk->sec = tokval.str; @@ -1081,6 +1095,13 @@ parselnk(Lnk *lnk) lnk->secf = tokval.str; } break; + case Tthreadlocal: + if (lnk->istls) + err("only one TLS directive allowed");
Maybe s/TLS/threadlocal/?
+ if (lnk->sec) + err("threadlocal and section cannot co-exist"); + lnk->istls = 1; + break; default: if (haslnk) if (t != Tdata) @@ -1138,6 +1159,9 @@ printcon(Con *c, FILE *f) if (c->bits.i) fprintf(f, "%+"PRIi64, c->bits.i); break; + case CThreadLocal: + fprintf(f, "threadlocal $%s", str(c->label)); + break; case CBits: if (c->flt == 1) fprintf(f, "s_%f", c->bits.s); diff --git a/rv64/emit.c b/rv64/emit.c index 2656c60..4cf189d 100644 --- a/rv64/emit.c +++ b/rv64/emit.c @@ -243,6 +243,14 @@ loadcon(Con *c, int r, int k, FILE *f) emitaddr(c, f); fputc('\n', f); break; + case CThreadLocal: + fprintf(f, "\tlui %s, %%tprel_hi(%s)\n", + rn, str(c->label)); + fprintf(f, "\tadd %s, %s, tp, %%tprel_add(%s)\n", + rn, rn, str(c->label)); + fprintf(f, "\taddi %s, %s, %%tprel_lo(%s)\n", + rn, rn, str(c->label)); + break; case CBits: n = c->bits.i; if (!w) diff --git a/test/tls.ssa b/test/tls.ssa new file mode 100644 index 0000000..5975932 --- /dev/null +++ b/test/tls.ssa @@ -0,0 +1,23 @@ +export function w $main() { +@start + %thread =l alloc8 8 + %retval =l alloc8 8 + + storew 1337, threadlocal $i + call $pthread_create(l %thread, l 0, l $thread, l 0) + + %thread_id =l loadl %thread + call $pthread_join(l %thread_id, l %retval) + + %ret =l loadl %retval + %res =l cnel %ret, 42 + ret %res +} + +function w $thread() { +@start + %ret =l loadl threadlocal $i + ret %ret +} + +threadlocal data $i = { w 42 } diff --git a/tools/lexh.c b/tools/lexh.c index 8d0af21..5be376e 100644 --- a/tools/lexh.c +++ b/tools/lexh.c @@ -26,7 +26,7 @@ char *tok[] = { "vaarg", "vastart", "...", "env", "call", "phi", "jmp", "jnz", "ret", "export", - "function", "type", "data", "section", "align", + "function", "type", "data", "section", "threadlocal", "align", "l", "w", "h", "b", "d", "s", "z", "loadw", "loadl", "loads", "loadd", "alloc1", "alloc2", diff --git a/tools/test.sh b/tools/test.sh index 4653b83..37105df 100755 --- a/tools/test.sh +++ b/tools/test.sh @@ -22,7 +22,7 @@ init() { arm64) for p in aarch64-linux-musl aarch64-linux-gnu do - cc="$p-gcc -no-pie -static" + cc="$p-gcc -no-pie -static -lpthread" qemu="qemu-aarch64" if $cc -v >/dev/null 2>&1 && @@ -46,7 +46,7 @@ init() { rv64) for p in riscv64-linux-musl riscv64-linux-gnu do - cc="$p-gcc -no-pie -static" + cc="$p-gcc -no-pie -static -lpthread" qemu="qemu-riscv64" if $cc -v >/dev/null 2>&1 && @@ -73,13 +73,13 @@ init() { cc="cc -Wl,-no_pie" ;; *OpenBSD*) - cc="cc -nopie" + cc="cc -nopie -lpthread" ;; *FreeBSD*) - cc="cc" + cc="cc -lpthread" ;; *) - cc="${CC:-cc} -no-pie" + cc="${CC:-cc} -no-pie -lpthread" testcc "$cc" || cc="${CC:-cc}" ;; esac base-commit: c8cd2824eae0137505fe46530c3a8e9788ab9a63 -- 2.37.1
Drew DeVault <sir@cmpwn.com> wrote:
What do you think about still using CAddr and ASym here, but adding new fields to Con and Alias indicating that they are threadlocal?