~mpu/qbe

Implement thread-local storage v3 PROPOSED

Drew DeVault: 1
 Implement thread-local storage

 13 files changed, 107 insertions(+), 13 deletions(-)
Thanks Michael for the review. Many of your points coincide
with what I had in mind. And of course, thanks Drew for the
proposal! Thread-local storage is a sensible addition to
qbe.
Next
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~mpu/qbe/patches/34327/mbox | git am -3
Learn more about email & git

[PATCH v3] Implement thread-local storage Export this patch

---
 alias.c       |  3 +++
 all.h         |  3 +++
 amd64/emit.c  |  7 ++++++-
 amd64/isel.c  |  1 +
 arm64/emit.c  | 13 ++++++++++++-
 doc/il.txt    | 17 +++++++++++++----
 gas.c         |  6 ++++++
 load.c        |  3 ++-
 parse.c       | 24 ++++++++++++++++++++++++
 rv64/emit.c   |  8 ++++++++
 test/tls.ssa  | 23 +++++++++++++++++++++++
 tools/lexh.c  |  2 +-
 tools/test.sh | 10 +++++-----
 13 files changed, 107 insertions(+), 13 deletions(-)
 create mode 100644 test/tls.ssa

diff --git a/alias.c b/alias.c
index aa3846e..c1e05df 100644
--- a/alias.c
+++ b/alias.c
@@ -19,6 +19,9 @@ getalias(Alias *a, Ref r, Fn *fn)
		if (c->type == CAddr) {
			a->type = ASym;
			a->label = c->label;
		} else if (c->type == CThreadLocal) {
			a->type = ATLS;
			a->label = c->label;
		} else
			a->type = ACon;
		a->offset = c->bits.i;
diff --git a/all.h b/all.h
index c8ec93c..47da8b2 100644
--- a/all.h
+++ b/all.h
@@ -269,6 +269,7 @@ struct Alias {
		AEsc = 3, /* stack escaping */
		ASym = 4,
		AUnk = 6,
		ATLS = 7,
	#define astack(t) ((t) & 1)
	} type;
	Ref base;
@@ -309,6 +310,7 @@ struct Con {
		CUndef,
		CBits,
		CAddr,
		CThreadLocal,
	} type;
	uint32_t label;
	union {
@@ -334,6 +336,7 @@ struct Lnk {
	char align;
	char *sec;
	char *secf;
	char istls;
};

struct Fn {
diff --git a/amd64/emit.c b/amd64/emit.c
index b8e9e8e..9870e0b 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -171,6 +171,10 @@ emitcon(Con *con, FILE *f)
		if (con->bits.i)
			fprintf(f, "%+"PRId64, con->bits.i);
		break;
	case CThreadLocal:
		l = str(con->label);
		fprintf(f, "%%fs:%s@tpoff", l);
		break;
	case CBits:
		fprintf(f, "%"PRId64, con->bits.i);
		break;
@@ -306,7 +310,8 @@ Next:
			fputc(')', f);
			break;
		case RCon:
			fputc('$', f);
			if (fn->con[ref.val].type != CThreadLocal)
				fputc('$', f);
			emitcon(&fn->con[ref.val], f);
			break;
		default:
diff --git a/amd64/isel.c b/amd64/isel.c
index 5a64429..592f7fc 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -36,6 +36,7 @@ noimm(Ref r, Fn *fn)
		return 0;
	switch (fn->con[r.val].type) {
	case CAddr:
	case CThreadLocal:
		/* we only support the 'small'
		 * code model of the ABI, this
		 * means that we can always
diff --git a/arm64/emit.c b/arm64/emit.c
index 2506eea..543c933 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -252,7 +252,8 @@ loadcon(Con *c, int r, int k, FILE *f)
	w = KWIDE(k);
	rn = rname(r, k);
	n = c->bits.i;
	if (c->type == CAddr) {
	switch (c->type) {
	case CAddr:
		rn = rname(r, Kl);
		if (n)
			sprintf(off, "+%"PRIi64, n);
@@ -264,6 +265,16 @@ loadcon(Con *c, int r, int k, FILE *f)
		fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n",
			rn, rn, p, str(c->label), off);
		return;
	case CThreadLocal:
		rn = rname(r, Kl);
		fprintf(f, "\tmrs\t%s, tpidr_el0\n", rn);
		fprintf(f, "\tadd\t%s, %s, #:tprel_hi12:%s, lsl #12\n",
				rn, rn, str(c->label));
		fprintf(f, "\tadd\t%s, %s, #:tprel_lo12_nc:%s\n",
				rn, rn, str(c->label));
		return;
	default:
		break;
	}
	assert(c->type == CBits);
	if (!w)
diff --git a/doc/il.txt b/doc/il.txt
index 308fe45..f2b92fa 100644
--- a/doc/il.txt
+++ b/doc/il.txt
@@ -173,10 +173,11 @@ by zero-extension, or by sign-extension.

    `bnf
    CONST :=
        ['-'] NUMBER  # Decimal integer
      | 's_' FP       # Single-precision float
      | 'd_' FP       # Double-precision float
      | $IDENT        # Global symbol
        ['-'] NUMBER       # Decimal integer
      | 's_' FP            # Single-precision float
      | 'd_' FP            # Double-precision float
      | $IDENT             # Global symbol
      | threadlocal $IDENT # Thread-local global symbol

Throughout the IL, constants are specified with a unified
syntax and semantics.  Constants are immediates, meaning
@@ -212,6 +213,9 @@ Global symbols can also be used directly as constants;
they will be resolved and turned into actual numeric
constants by the linker.

Prefixing a global symbol with threadlocal will cause its
address to be loaded from thread-local storage.

- 4. Linkage
------------

@@ -220,6 +224,7 @@ constants by the linker.
        'export' [NL]
      | 'section' SECNAME [NL]
      | 'section' SECNAME SECFLAGS [NL]
      | 'threadlocal'

    SECNAME  := '"' .... '"'
    SECFLAGS := '"' .... '"'
@@ -253,6 +258,10 @@ The section and export linkage flags should each appear
at most once in a definition.  If multiple occurrences
are present, QBE is free to use any.

The threadlocal flag is shorthand for loading this symbol
into the appropriate section, .tdata or .tbss, with the
appropriate flags set.

- 5. Definitions
----------------

diff --git a/gas.c b/gas.c
index 865edba..92ffd7f 100644
--- a/gas.c
+++ b/gas.c
@@ -29,6 +29,12 @@ gasemitlnk(char *n, Lnk *l, char *s, FILE *f)
		fprintf(f, ".section %s", l->sec);
		if (l->secf)
			fprintf(f, ", %s", l->secf);
	} else if (l->istls) {
		if (strcmp(s, ".bss") == 0) {
			fprintf(f, ".section .tbss,\"awT\"");
		} else {
			fprintf(f, ".section .tdata,\"awT\"");
		}
	} else {
		fputs(s, f);
	}
diff --git a/load.c b/load.c
index 2e10a35..1a1084a 100644
--- a/load.c
+++ b/load.c
@@ -152,7 +152,8 @@ load(Slice sl, bits msk, Loc *l)
			break;
		case ACon:
		case ASym:
			c.type = CAddr;
		case ATLS:
			c.type = a->type == ATLS ? CThreadLocal : CAddr;
			c.label = a->label;
			c.bits.i = a->offset;
			c.local = 0;
diff --git a/parse.c b/parse.c
index 1912c8b..5e09a21 100644
--- a/parse.c
+++ b/parse.c
@@ -42,6 +42,7 @@ enum {
	Ttype,
	Tdata,
	Tsection,
	Tthreadlocal,
	Talign,
	Tl,
	Tw,
@@ -92,6 +93,7 @@ static char *kwmap[Ntok] = {
	[Ttype] = "type",
	[Tdata] = "data",
	[Tsection] = "section",
	[Tthreadlocal] = "threadlocal",
	[Talign] = "align",
	[Tl] = "l",
	[Tw] = "w",
@@ -403,6 +405,13 @@ parseref()
	case Tglo:
		c.type = CAddr;
		c.label = intern(tokval.str);
		goto Look;
	case Tthreadlocal:
		c.type = CThreadLocal;
		if (next() != Tglo)
			err("function name expected");
		c.label = intern(tokval.str);
		goto Look;
	Look:
		return newcon(&c, curf);
	default:
@@ -798,6 +807,9 @@ parsefn(Lnk *lnk)
	int i;
	PState ps;

	if (lnk->istls)
		err("TLS is not supported for function declarations");

	curb = 0;
	nblk = 0;
	curi = insb;
@@ -1073,6 +1085,8 @@ parselnk(Lnk *lnk)
		case Tsection:
			if (lnk->sec)
				err("only one section allowed");
			if (lnk->istls)
				err("threadlocal and section cannot co-exist");
			if (next() != Tstr)
				err("section \"name\" expected");
			lnk->sec = tokval.str;
@@ -1081,6 +1095,13 @@ parselnk(Lnk *lnk)
				lnk->secf = tokval.str;
			}
			break;
		case Tthreadlocal:
			if (lnk->istls)
				err("only one TLS directive allowed");
			if (lnk->sec)
				err("threadlocal and section cannot co-exist");
			lnk->istls = 1;
			break;
		default:
			if (haslnk)
			if (t != Tdata)
@@ -1138,6 +1159,9 @@ printcon(Con *c, FILE *f)
		if (c->bits.i)
			fprintf(f, "%+"PRIi64, c->bits.i);
		break;
	case CThreadLocal:
		fprintf(f, "threadlocal $%s", str(c->label));
		break;
	case CBits:
		if (c->flt == 1)
			fprintf(f, "s_%f", c->bits.s);
diff --git a/rv64/emit.c b/rv64/emit.c
index 2656c60..4cf189d 100644
--- a/rv64/emit.c
+++ b/rv64/emit.c
@@ -243,6 +243,14 @@ loadcon(Con *c, int r, int k, FILE *f)
		emitaddr(c, f);
		fputc('\n', f);
		break;
	case CThreadLocal:
		fprintf(f, "\tlui %s, %%tprel_hi(%s)\n",
				rn, str(c->label));
		fprintf(f, "\tadd %s, %s, tp, %%tprel_add(%s)\n",
				rn, rn, str(c->label));
		fprintf(f, "\taddi %s, %s, %%tprel_lo(%s)\n",
				rn, rn, str(c->label));
		break;
	case CBits:
		n = c->bits.i;
		if (!w)
diff --git a/test/tls.ssa b/test/tls.ssa
new file mode 100644
index 0000000..5975932
--- /dev/null
+++ b/test/tls.ssa
@@ -0,0 +1,23 @@
export function w $main() {
@start
	%thread =l alloc8 8
	%retval =l alloc8 8

	storew 1337, threadlocal $i
	call $pthread_create(l %thread, l 0, l $thread, l 0)

	%thread_id =l loadl %thread
	call $pthread_join(l %thread_id, l %retval)

	%ret =l loadl %retval
	%res =l cnel %ret, 42
	ret %res
}

function w $thread() {
@start
	%ret =l loadl threadlocal $i
	ret %ret
}

threadlocal data $i = { w 42 }
diff --git a/tools/lexh.c b/tools/lexh.c
index 8d0af21..5be376e 100644
--- a/tools/lexh.c
+++ b/tools/lexh.c
@@ -26,7 +26,7 @@ char *tok[] = {
	"vaarg", "vastart", "...", "env",

	"call", "phi", "jmp", "jnz", "ret", "export",
	"function", "type", "data", "section", "align",
	"function", "type", "data", "section", "threadlocal", "align",
	"l", "w", "h", "b", "d", "s", "z", "loadw", "loadl",
	"loads", "loadd", "alloc1", "alloc2",

diff --git a/tools/test.sh b/tools/test.sh
index 4653b83..37105df 100755
--- a/tools/test.sh
+++ b/tools/test.sh
@@ -22,7 +22,7 @@ init() {
	arm64)
		for p in aarch64-linux-musl aarch64-linux-gnu
		do
			cc="$p-gcc -no-pie -static"
			cc="$p-gcc -no-pie -static -lpthread"
			qemu="qemu-aarch64"
			if
				$cc -v >/dev/null 2>&1 &&
@@ -46,7 +46,7 @@ init() {
	rv64)
		for p in riscv64-linux-musl riscv64-linux-gnu
		do
			cc="$p-gcc -no-pie -static"
			cc="$p-gcc -no-pie -static -lpthread"
			qemu="qemu-riscv64"
			if
				$cc -v >/dev/null 2>&1 &&
@@ -73,13 +73,13 @@ init() {
			cc="cc -Wl,-no_pie"
			;;
		*OpenBSD*)
			cc="cc -nopie"
			cc="cc -nopie -lpthread"
			;;
		*FreeBSD*)
			cc="cc"
			cc="cc -lpthread"
			;;
		*)
			cc="${CC:-cc} -no-pie"
			cc="${CC:-cc} -no-pie -lpthread"
			testcc "$cc" || cc="${CC:-cc}"
			;;
		esac

base-commit: c8cd2824eae0137505fe46530c3a8e9788ab9a63
-- 
2.37.1
Drew DeVault <sir@cmpwn.com> wrote:
What do you think about still using CAddr and ASym here, but adding
new fields to Con and Alias indicating that they are threadlocal?