~mcf/cproc

add utf.*: a subset of sbase's libutf v3 PROPOSED

Nihal Jere
Nihal Jere: 2
 add utf.*: a subset of sbase's libutf
 handle prefixed string literals

 18 files changed, 606 insertions(+), 58 deletions(-)
Thanks! This is shaping up pretty well.

On 2021-04-14, Nihal Jere <nihal@nihaljere.xyz> wrote:
Next
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~mcf/cproc/patches/22042/mbox | git am -3
Learn more about email & git
View this thread in the archives

[PATCH v3 1/2] add utf.*: a subset of sbase's libutf Export this patch

Nihal Jere
Also I wrote some functions for UTF-16.
---
 Makefile |   1 +
 utf.c    | 239 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 utf.h    |  44 ++++++++++
 3 files changed, 284 insertions(+)
 create mode 100644 utf.c
 create mode 100644 utf.h

diff --git a/Makefile b/Makefile
index f53225d..4cf0b0d 100644
--- a/Makefile
+++ b/Makefile
@@ -37,6 +37,7 @@ SRC=\
	token.c\
	tree.c\
	type.c\
	utf.c\
	util.c\
	$(BACKEND).c
OBJ=$(SRC:%.c=$(objdir)/%.o)
diff --git a/utf.c b/utf.c
new file mode 100644
index 0000000..ba9c6a6
--- /dev/null
+++ b/utf.c
@@ -0,0 +1,239 @@
/* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <cls@lubutu.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
#include <string.h>
#include "utf.h"

#define MIN(x,y)  ((x) < (y) ? (x) : (y))

#define UTFSEQ(x) ((((x) & 0x80) == 0x00) ? 1 /* 0xxxxxxx */ \
                 : (((x) & 0xC0) == 0x80) ? 0 /* 10xxxxxx */ \
                 : (((x) & 0xE0) == 0xC0) ? 2 /* 110xxxxx */ \
                 : (((x) & 0xF0) == 0xE0) ? 3 /* 1110xxxx */ \
                 : (((x) & 0xF8) == 0xF0) ? 4 /* 11110xxx */ \
                 : (((x) & 0xFC) == 0xF8) ? 5 /* 111110xx */ \
                 : (((x) & 0xFE) == 0xFC) ? 6 /* 1111110x */ \
                                          : 0 )

#define BADRUNE(x) ((x) < 0 || (x) > Runemax \
                || ((x) & 0xFFFE) == 0xFFFE \
                || ((x) >= 0xD800 && (x) <= 0xDFFF) \
                || ((x) >= 0xFDD0 && (x) <= 0xFDEF))

int
runetochar(char *s, const Rune *p)
{
	Rune r = *p;

	switch(runelen(r)) {
	case 1: /* 0aaaaaaa */
		s[0] = r;
		return 1;
	case 2: /* 00000aaa aabbbbbb */
		s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
		s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
		return 2;
	case 3: /* aaaabbbb bbcccccc */
		s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
		s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
		s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
		return 3;
	case 4: /* 000aaabb bbbbcccc ccdddddd */
		s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
		s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
		s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
		s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
		return 4;
	default:
		return 0; /* error */
	}
}

int 
runetochar16(unsigned short *s, const Rune *p)
{
	Rune r = *p;
	switch(runelen16(r)) {
	case 1:
		s[0] = r;
		return 1;
	case 2:
		s[0] = 0xD800 | ((r - 0x10000) >> 10);
		s[1] = 0xDC00 | (r & 0x3FF);
		return 2;
	}
	return 0; /* error */
}

int
runestochars(char *s, const Rune *p, size_t n)
{
	int ret;
	char *ptr = s;
	for (int i = 0; i < n; i++) {
		if ((ret = runetochar(ptr, p+i)) == 0)
			return -1;
		ptr += ret;
	}

	return ptr - s;
}

int
runestochar16s(unsigned short *s, const Rune *p, size_t n)
{
	int ret;
	unsigned short *ptr = s;
	for (int i = 0; i < n; i++) {
		if ((ret = runetochar16(ptr, p+i)) == 0)
			return -1;
		ptr += ret;
	}

	return ptr - s;
}

int
chartorune(Rune *p, const char *s)
{
	return charntorune(p, s, UTFmax);
}

int
charntorune(Rune *p, const char *s, size_t len)
{
	unsigned int i, n;
	Rune r;

	if(len == 0) /* can't even look at s[0] */
		return 0;

	switch((n = UTFSEQ(s[0]))) {
	case 1: r = s[0];        break; /* 0xxxxxxx */
	case 2: r = s[0] & 0x1F; break; /* 110xxxxx */
	case 3: r = s[0] & 0x0F; break; /* 1110xxxx */
	case 4: r = s[0] & 0x07; break; /* 11110xxx */
	case 5: r = s[0] & 0x03; break; /* 111110xx */
	case 6: r = s[0] & 0x01; break; /* 1111110x */
	default: /* invalid sequence */
		*p = Runeerror;
		return 1;
	}
	/* add values from continuation bytes */
	for(i = 1; i < MIN(n, len); i++)
		if((s[i] & 0xC0) == 0x80) {
			/* add bits from continuation byte to rune value
			 * cannot overflow: 6 byte sequences contain 31 bits */
			r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */
		}
		else { /* expected continuation */
			*p = Runeerror;
			return i;
		}

	if(i < n) /* must have reached len limit */
		return 0;

	/* reject invalid or overlong sequences */
	if(BADRUNE(r) || runelen(r) < (int)n)
		r = Runeerror;

	*p = r;
	return n;
}

int
runelen(Rune r)
{
	if(BADRUNE(r))
		return 0; /* error */
	else if(r <= 0x7F)
		return 1;
	else if(r <= 0x07FF)
		return 2;
	else if(r <= 0xFFFF)
		return 3;
	else
		return 4;
}

int
runelen16(Rune r)
{
	if(BADRUNE(r))
		return 0; /* error */
	else if(r <= 0xFFFF)
		return 1;
	else
		return 2;
}

size_t
runenlen(const Rune *p, size_t len)
{
	size_t i, n = 0;

	for(i = 0; i < len; i++)
		n += runelen(p[i]);
	return n;
}

size_t
runenlen16(const Rune *p, size_t len)
{
	size_t i, n = 0;

	for(i = 0; i < len; i++)
		n += runelen16(p[i]);
	return n;
}

int
fullrune(const char *s, size_t len)
{
	Rune r;

	return charntorune(&r, s, len) > 0;
}

size_t
utflen(const char *s)
{
	const char *p = s;
	size_t i;
	Rune r;

	for(i = 0; *p != '\0'; i++)
		p += chartorune(&r, p);
	return i;
}

size_t
utfnlen(const char *s, size_t len)
{
	const char *p = s;
	size_t i;
	Rune r;
	int n;

	for(i = 0; (n = charntorune(&r, p, len-(p-s))) && r != '\0'; i++)
		p += n;
	return i;
}
diff --git a/utf.h b/utf.h
new file mode 100644
index 0000000..c69d3cd
--- /dev/null
+++ b/utf.h
@@ -0,0 +1,44 @@
/* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <cls@lubutu.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
#include <stdio.h>

typedef int Rune;

enum {
	UTFmax    = 6,       /* maximum bytes per rune */
	Runeself  = 0x80,    /* rune and utf are equal (<) */
	Runeerror = 0xFFFD,  /* decoding error in utf */
	Runemax   = 0x10FFFF /* maximum rune value */
};

int runetochar(char *, const Rune *);
int runetochar16(unsigned short *, const Rune *);
int runestochars(char *, const Rune *, size_t);
int runestochar16s(unsigned short *, const Rune *, size_t);
int chartorune(Rune *, const char *);
int charntorune(Rune *, const char *, size_t);
int runelen(Rune);
int runelen16(Rune);
size_t runenlen(const Rune *, size_t);
size_t runenlen16(const Rune *, size_t);
int fullrune(const char *, size_t);
size_t utflen(const char *);
size_t utfnlen(const char *, size_t);
-- 
2.31.1

[PATCH v3 2/2] handle prefixed string literals Export this patch

Nihal Jere
---
The patch now handles initializers inside structs and functions
properly, and the tests for these have been updated for wide strings as
well.

I'm not sure my change in parseinit is correct, but it seems to work
well in my testing.

While testing, I found that gcc treats wide prefixed literals as type
int, and complains when you try to assign it to an unsigned int array,
even though C11 says that they should be unsigned. Maybe its different
for C99?
 cc.h                                 |  6 +-
 decl.c                               |  6 +-
 expr.c                               | 83 ++++++++++++++++++++++-----
 init.c                               | 11 ++--
 qbe.c                                | 58 +++++++++++++------
 test/hello.qbe                       |  2 +-
 test/initializer-replace-local.c     | 10 ++++
 test/initializer-replace-local.qbe   | 51 +++++++++++++++--
 test/initializer-replace-static.c    | 14 +++++
 test/initializer-replace-static.qbe  |  4 +-
 test/initializer-string-array.qbe    | 34 +++++++++--
 test/initializer-string-braces.qbe   |  2 +-
 test/initializer-string.c            | 13 ++++-
 test/initializer-string.qbe          | 84 +++++++++++++++++++++++++++-
 test/initializer-unsigned-string.qbe |  2 +-
 15 files changed, 322 insertions(+), 58 deletions(-)

diff --git a/cc.h b/cc.h
index 3df0880..f10d4b6 100644
--- a/cc.h
+++ b/cc.h
@@ -332,7 +332,11 @@ struct expr {
			double f;
		} constant;
		struct {
			char *data;
			union {
				char *data8;
				unsigned short *data16;
				unsigned *data32;
			};
			size_t size;
		} string;
		struct {
diff --git a/decl.c b/decl.c
index 94e4b15..927700b 100644
--- a/decl.c
+++ b/decl.c
@@ -872,8 +872,8 @@ staticassert(struct scope *s)
		e = assignexpr(s);
		if (!e->decayed || e->base->kind != EXPRSTRING)
			error(&tok.loc, "expected string literal after static assertion expression");
		if (!c)
			error(&tok.loc, "static assertion failed: %.*s", (int)e->base->string.size, e->base->string.data);
		if (!c && e->base->type->base->size == 1)
			error(&tok.loc, "static assertion failed: %.*s", (int)e->base->string.size, e->base->string.data8);
	} else if (!c) {
		error(&tok.loc, "static assertion failed");
	}
@@ -1027,7 +1027,7 @@ struct decl *stringdecl(struct expr *expr)
	if (!strings)
		strings = mkmap(64);
	assert(expr->kind == EXPRSTRING);
	mapkey(&key, expr->string.data, expr->string.size);
	mapkey(&key, expr->string.data8, expr->string.size);
	entry = mapput(strings, &key);
	d = *entry;
	if (!d) {
diff --git a/expr.c b/expr.c
index 8b10bf4..b7682fa 100644
--- a/expr.c
+++ b/expr.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "utf.h"
#include "util.h"
#include "cc.h"

@@ -366,7 +367,8 @@ isodigit(int c)
static int
unescape(char **p)
{
	int c;
	Rune c;
	int ret;
	char *s = *p;

	if (*s == '\\') {
@@ -397,7 +399,9 @@ unescape(char **p)
			while (isodigit(*++s));
		}
	} else {
		c = *s++;
		if ((ret = chartorune(&c, s)) == Runeerror)
			error(&tok.loc, "invalid UTF-8 string");
		s += ret;
	}
	*p = s;
	return c;
@@ -456,8 +460,10 @@ primaryexpr(struct scope *s)
	struct expr *e;
	struct decl *d;
	struct type *t;
	char *src, *dst, *end;
	char *src, *end, p = 0, np;
	Rune *dst, *tmp;
	int base;
	size_t tmpsize, count;

	switch (tok.kind) {
	case TIDENT:
@@ -473,22 +479,71 @@ primaryexpr(struct scope *s)
		next();
		break;
	case TSTRINGLIT:
		e = mkexpr(EXPRSTRING, mkarraytype(&typechar, QUALNONE, 0));
		e->lvalue = true;
		e->string.size = 0;
		e->string.data = NULL;
		tmp = NULL;
		tmpsize = 0;
		do {
			e->string.data = xreallocarray(e->string.data, e->string.size + strlen(tok.lit), 1);
			dst = e->string.data + e->string.size;
			src = tok.lit;
			if (*src != '"')
				fatal("wide string literal not yet implemented");
			switch (*src) {
			default: error(&tok.loc, "invalid prefix for string literal");
			case 'L': ++src; np = 'w'; goto typeknown;
			case 'U': ++src; np = 'U'; goto typeknown;
			case '"':        np = 'c'; goto typeknown;
			case 'u': ++src;
			}
			switch (*src) {
			default: error(&tok.loc, "invalid prefix for string literal");
			case '"':        np = 'u'; break;
			case '8': ++src; np = '8';
			}
typeknown:
			if (p && p != np && !(p == 'c' || np == 'c'))
				error(&tok.loc, "cannot have adjacent string literals with different prefixes");
			if (!p || np != 'c')
				p = np;
			tmp = xreallocarray(tmp, tmpsize + utflen(tok.lit), sizeof(Rune));
			dst = tmp + tmpsize;
			for (++src; *src != '"'; ++dst)
				*dst = unescape(&src);
			e->string.size = dst - e->string.data;
			tmpsize = dst - tmp;
			next();
		} while (tok.kind == TSTRINGLIT);
		e->type->array.length = e->string.size + 1;
		switch (p) {
		case 'c':
		case '8': t = &typechar;       break;
		case 'w': t = targ->typewchar; break;
		case 'u': t = &typeushort;     break;
		case 'U': t = &typeuint;       break;
		}
		e = mkexpr(EXPRSTRING, mkarraytype(t, QUALNONE, 0));
		e->lvalue = true;
		e->string.data8 = NULL;
		switch (p) {
		case 'c':
		case '8': e->string.size = runenlen(tmp, tmpsize);          break;
		case 'u': e->string.size = runenlen16(tmp, tmpsize);        break;
		case 'U':
		case 'w': e->string.size = tmpsize; e->string.data32 = tmp; break;
		default: assert(0);
		}
		/* account for null terminator */
		e->string.size += 1;
		e->string.data8 = xreallocarray(e->string.data8, e->string.size, t->size);
		switch (p) {
		case 'c':
		case '8': count = runestochars(e->string.data8, tmp, tmpsize);
		          e->string.data8[e->string.size-1] = 0; break;
		case 'u': count = runestochar16s(e->string.data16, tmp, tmpsize);
		          e->string.data16[e->string.size-1] = 0; break;
		case 'U': 
		case 'w': e->string.data32[e->string.size-1] = 0; goto postconvert;
		default: assert(0);
		}
		if (count == -1)
			error(&tok.loc, "string literal contains invalid multibyte sequence");
		assert(count == e->string.size - 1);
		free(tmp);
postconvert:
		e->type->array.length = e->string.size;
		e->type->size = e->type->array.length * e->type->base->size;
		e->type->incomplete = false;
		e = decay(e);
@@ -620,7 +675,7 @@ builtinfunc(struct scope *s, enum builtinkind kind)
		break;
	case BUILTINNANF:
		e = assignexpr(s);
		if (!e->decayed || e->base->kind != EXPRSTRING || e->base->string.size > 0)
		if (!e->decayed || e->base->kind != EXPRSTRING || e->base->string.size > 1)
			error(&tok.loc, "__builtin_nanf currently only supports empty string literals");
		e = mkexpr(EXPRCONST, &typefloat);
		/* TODO: use NAN here when we can handle musl's math.h */
diff --git a/init.c b/init.c
index 0e890c0..f0d44ba 100644
--- a/init.c
+++ b/init.c
@@ -246,12 +246,15 @@ parseinit(struct scope *s, struct type *t)
				if (!expr->decayed || expr->base->kind != EXPRSTRING)
					break;
				base = t->base;
				/* XXX: wide string literals */
				if (!(base->prop & PROPCHAR))
					break;
				while (base->base)
					base = base->base;
				expr = expr->base;
				if (base->kind != expr->type->base->kind) {
					error(&tok.loc, "cannot initialize array with string literal of different width");
					break;
				}
				if (t->incomplete)
					updatearray(t, expr->string.size);
					updatearray(t, expr->string.size - 1);
				goto add;
			case TYPESTRUCT:
			case TYPEUNION:
diff --git a/qbe.c b/qbe.c
index 1c4af9e..849301c 100644
--- a/qbe.c
+++ b/qbe.c
@@ -945,7 +945,7 @@ funcinit(struct func *func, struct decl *d, struct init *init)
	struct lvalue dst;
	struct value *src, *v;
	uint64_t offset = 0, max = 0;
	size_t i;
	size_t i, w;

	funcalloc(func, d);
	if (!init)
@@ -954,13 +954,18 @@ funcinit(struct func *func, struct decl *d, struct init *init)
		zero(func, d->value, d->type->align, offset, init->start);
		dst.bits = init->bits;
		if (init->expr->kind == EXPRSTRING) {
			for (i = 0; i < init->expr->string.size && i < init->end - init->start; ++i) {
				v = mkintconst(&iptr, init->start + i);
			w = init->expr->type->base->size;
			for (i = 0; i < init->expr->string.size && i < (init->end - init->start)/w; ++i) {
				v = mkintconst(&iptr, init->start + i*w);
				dst.addr = funcinst(func, IADD, &iptr, d->value, v);
				v = mkintconst(&i8, init->expr->string.data[i]);
				funcstore(func, &typechar, QUALNONE, dst, v);
				switch (w) {
					case 1: v = mkintconst(&i8, init->expr->string.data8[i] & 0xFF); break;
					case 2: v = mkintconst(&i16, init->expr->string.data16[i] & 0xFFFF); break;
					case 4: v = mkintconst(&i32, init->expr->string.data32[i] & 0xFFFFFFFF); break;
				}
				funcstore(func, init->expr->type->base, QUALNONE, dst, v);
			}
			offset = init->start + i;
			offset = init->start + i*w;
		} else {
			if (offset < init->end && (dst.bits.before || dst.bits.after))
				zero(func, d->value, d->type->align, offset, init->end);
@@ -1251,7 +1256,7 @@ static void
dataitem(struct expr *expr, uint64_t size)
{
	struct decl *decl;
	size_t i;
	size_t i, w;
	char c;

	switch (expr->kind) {
@@ -1280,17 +1285,28 @@ dataitem(struct expr *expr, uint64_t size)
			printf("%" PRIu64, expr->constant.i);
		break;
	case EXPRSTRING:
		fputc('"', stdout);
		for (i = 0; i < expr->string.size && i < size; ++i) {
			c = expr->string.data[i];
			if (isprint(c) && c != '"' && c != '\\')
				putchar(c);
			else
				printf("\\%03hho", c);
		w = expr->type->base->size;
		if (w == 1) {
			fputc('"', stdout);
			for (i = 0; i < expr->string.size && i < size; ++i) {
				c = expr->string.data8[i];
				if (isprint(c) && c != '"' && c != '\\')
					putchar(c);
				else
					printf("\\%03hho", c);
			}
			fputc('"', stdout);
		} else {
			for (i = 0; i < expr->string.size && i*w < size; ++i) {
				switch (w) {
				case 2: printf("%hu ", expr->string.data16[i]); break;
				case 4: printf("%u ", expr->string.data32[i]);  break;
				default: assert(0);
				}
			}
		}
		fputc('"', stdout);
		if (i < size)
			printf(", z %" PRIu64, size - i);
		if (i*w < size)
			printf(", z %" PRIu64, size - i*w);
		break;
	default:
		error(&tok.loc, "initializer is not a constant expression");
@@ -1303,6 +1319,7 @@ emitdata(struct decl *d, struct init *init)
	struct init *cur;
	struct type *t;
	uint64_t offset = 0, start, end, bits = 0;
	size_t idx;

	if (!d->align)
		d->align = d->type->align;
@@ -1326,7 +1343,12 @@ emitdata(struct decl *d, struct init *init)
			*/
			assert(cur->expr->kind == EXPRSTRING);
			assert(init->expr->kind == EXPRCONST);
			cur->expr->string.data[init->start - cur->start] = init->expr->constant.i;
			idx = (init->start - cur->start) / cur->expr->type->base->size;
			switch (cur->expr->type->base->size) {
			case 1: cur->expr->string.data8[idx]  = init->expr->constant.i; break;
			case 2: cur->expr->string.data16[idx] = init->expr->constant.i; break;
			case 4: cur->expr->string.data32[idx] = init->expr->constant.i; break;
			}
		}
		start = cur->start + cur->bits.before / 8;
		end = cur->end - (cur->bits.after + 7) / 8;
diff --git a/test/hello.qbe b/test/hello.qbe
index 3e695ab..1826315 100644
--- a/test/hello.qbe
+++ b/test/hello.qbe
@@ -1,4 +1,4 @@
data $.Lstring.2 = align 1 { b "hello", z 1, }
data $.Lstring.2 = align 1 { b "hello\000", }
export
function w $main() {
@start.1
diff --git a/test/initializer-replace-local.c b/test/initializer-replace-local.c
index 8b93ef2..be6a58f 100644
--- a/test/initializer-replace-local.c
+++ b/test/initializer-replace-local.c
@@ -1,10 +1,20 @@
void f(void) {
	struct {
		char s[6];
		unsigned short t[6];
		unsigned u[6];
	} x = {
		.s[0] = 'x',
		.s[4] = 'y',
		.s = "hello",
		.s[1] = 'a',
		.t[0] = u'x',
		.t[4] = u'y',
		.t = u"hello",
		.t[1] = u'a',
		.u[0] = U'x',
		.u[4] = U'y',
		.u = U"hello",
		.u[1] = u'a',
	};
}
diff --git a/test/initializer-replace-local.qbe b/test/initializer-replace-local.qbe
index 72ad90a..480773b 100644
--- a/test/initializer-replace-local.qbe
+++ b/test/initializer-replace-local.qbe
@@ -1,7 +1,7 @@
export
function $f() {
@start.1
	%.1 =l alloc4 6
	%.1 =l alloc4 44
@body.2
	%.2 =l add %.1, 0
	storeb 104, %.2
@@ -13,10 +13,49 @@ function $f() {
	storeb 108, %.5
	%.6 =l add %.1, 4
	storeb 111, %.6
	%.7 =l add %.1, 1
	%.8 =w copy 97
	storeb %.8, %.7
	%.9 =l add %.1, 5
	storeb 0, %.9
	%.7 =l add %.1, 5
	storeb 0, %.7
	%.8 =l add %.1, 1
	%.9 =w copy 97
	storeb %.9, %.8
	%.10 =l add %.1, 2
	storeh 0, %.10
	%.11 =l add %.1, 4
	storew 0, %.11
	%.12 =l add %.1, 6
	storeh 104, %.12
	%.13 =l add %.1, 8
	storeh 101, %.13
	%.14 =l add %.1, 10
	storeh 108, %.14
	%.15 =l add %.1, 12
	storeh 108, %.15
	%.16 =l add %.1, 14
	storeh 111, %.16
	%.17 =l add %.1, 16
	storeh 0, %.17
	%.18 =l add %.1, 8
	storeh 97, %.18
	%.19 =l add %.1, 10
	storeh 0, %.19
	%.20 =l add %.1, 12
	storew 0, %.20
	%.21 =l add %.1, 16
	storew 0, %.21
	%.22 =l add %.1, 20
	storew 104, %.22
	%.23 =l add %.1, 24
	storew 101, %.23
	%.24 =l add %.1, 28
	storew 108, %.24
	%.25 =l add %.1, 32
	storew 108, %.25
	%.26 =l add %.1, 36
	storew 111, %.26
	%.27 =l add %.1, 40
	storew 0, %.27
	%.28 =l add %.1, 24
	%.29 =w extuh 97
	storew %.29, %.28
	ret
}
diff --git a/test/initializer-replace-static.c b/test/initializer-replace-static.c
index c1fa376..a6839c9 100644
--- a/test/initializer-replace-static.c
+++ b/test/initializer-replace-static.c
@@ -4,3 +4,17 @@ struct {
	.s = "hello",
	.s[1] = 'a',
};

struct {
	unsigned l[5];
} y = {
	.l = L"a😐Ϩ€",
	.l[1] = L'😃',
};

struct {
	unsigned short u[6];
} z = {
	.u = u"a😐Ϩ€",
	.u[1] = u'😃',
};
diff --git a/test/initializer-replace-static.qbe b/test/initializer-replace-static.qbe
index 18b774e..01d7435 100644
--- a/test/initializer-replace-static.qbe
+++ b/test/initializer-replace-static.qbe
@@ -1 +1,3 @@
export data $x = align 1 { b "hallo", z 1, }
export data $x = align 1 { b "hallo\000", }
export data $y = align 4 { w 97 128515 1000 8364 0 , }
export data $z = align 2 { h 97 62979 56848 1000 8364 0 , }
diff --git a/test/initializer-string-array.qbe b/test/initializer-string-array.qbe
index 87a03fd..cfae412 100644
--- a/test/initializer-string-array.qbe
+++ b/test/initializer-string-array.qbe
@@ -1,7 +1,7 @@
export
function $f() {
@start.1
	%.1 =l alloc4 8
	%.1 =l alloc4 16
@body.2
	%.2 =l add %.1, 0
	storeb 97, %.2
@@ -11,13 +11,37 @@ function $f() {
	storeb 99, %.4
	%.5 =l add %.1, 3
	storeb 0, %.5
	%.6 =l add %.1, 4
	%.6 =l add %.1, 0
	storeb 120, %.6
	%.7 =l add %.1, 5
	%.7 =l add %.1, 1
	storeb 121, %.7
	%.8 =l add %.1, 6
	%.8 =l add %.1, 2
	storeb 122, %.8
	%.9 =l add %.1, 7
	%.9 =l add %.1, 3
	storeb 0, %.9
	%.10 =l add %.1, 4
	storeb 0, %.10
	%.11 =l add %.1, 5
	storeb 0, %.11
	%.12 =l add %.1, 6
	storeb 0, %.12
	%.13 =l add %.1, 7
	storeb 0, %.13
	%.14 =l add %.1, 8
	storeb 0, %.14
	%.15 =l add %.1, 9
	storeb 0, %.15
	%.16 =l add %.1, 10
	storeb 0, %.16
	%.17 =l add %.1, 11
	storeb 0, %.17
	%.18 =l add %.1, 12
	storeb 0, %.18
	%.19 =l add %.1, 13
	storeb 0, %.19
	%.20 =l add %.1, 14
	storeb 0, %.20
	%.21 =l add %.1, 15
	storeb 0, %.21
	ret
}
diff --git a/test/initializer-string-braces.qbe b/test/initializer-string-braces.qbe
index 46be123..5d44117 100644
--- a/test/initializer-string-braces.qbe
+++ b/test/initializer-string-braces.qbe
@@ -1 +1 @@
export data $s = align 1 { b "abc", z 1, }
export data $s = align 1 { b "abc\000", }
diff --git a/test/initializer-string.c b/test/initializer-string.c
index c92f897..d45da39 100644
--- a/test/initializer-string.c
+++ b/test/initializer-string.c
@@ -1,4 +1,13 @@
char x[] = "hello";
char a[] = "hello";
char b[] = u8"سلام عليكم";
unsigned short c[] = u"नमस्ते";
unsigned d[] = U"Привет";
unsigned e[] = L"你好";

void f(void) {
	char y[] = "hello";
	char v[] = "hello";
    char w[] = u8"سلام عليكم";
    unsigned short x[] = u"नमस्ते";
    unsigned y[] = U"Привет";
    unsigned z[] = L"你好";
}
diff --git a/test/initializer-string.qbe b/test/initializer-string.qbe
index ba992da..b0e583b 100644
--- a/test/initializer-string.qbe
+++ b/test/initializer-string.qbe
@@ -1,8 +1,16 @@
export data $x = align 1 { b "hello", z 1, }
export data $a = align 1 { b "hello\000", }
export data $b = align 1 { b "\330\263\331\204\330\247\331\205 \330\271\331\204\331\212\331\203\331\205\000", }
export data $c = align 2 { h 2344 2350 2360 2381 2340 2375 0 , }
export data $d = align 4 { w 1055 1088 1080 1074 1077 1090 0 , }
export data $e = align 4 { w 20320 22909 0 , }
export
function $f() {
@start.1
	%.1 =l alloc4 6
	%.8 =l alloc4 20
	%.29 =l alloc4 14
	%.37 =l alloc4 28
	%.45 =l alloc4 12
@body.2
	%.2 =l add %.1, 0
	storeb 104, %.2
@@ -16,5 +24,79 @@ function $f() {
	storeb 111, %.6
	%.7 =l add %.1, 5
	storeb 0, %.7
	%.9 =l add %.8, 0
	storeb 216, %.9
	%.10 =l add %.8, 1
	storeb 179, %.10
	%.11 =l add %.8, 2
	storeb 217, %.11
	%.12 =l add %.8, 3
	storeb 132, %.12
	%.13 =l add %.8, 4
	storeb 216, %.13
	%.14 =l add %.8, 5
	storeb 167, %.14
	%.15 =l add %.8, 6
	storeb 217, %.15
	%.16 =l add %.8, 7
	storeb 133, %.16
	%.17 =l add %.8, 8
	storeb 32, %.17
	%.18 =l add %.8, 9
	storeb 216, %.18
	%.19 =l add %.8, 10
	storeb 185, %.19
	%.20 =l add %.8, 11
	storeb 217, %.20
	%.21 =l add %.8, 12
	storeb 132, %.21
	%.22 =l add %.8, 13
	storeb 217, %.22
	%.23 =l add %.8, 14
	storeb 138, %.23
	%.24 =l add %.8, 15
	storeb 217, %.24
	%.25 =l add %.8, 16
	storeb 131, %.25
	%.26 =l add %.8, 17
	storeb 217, %.26
	%.27 =l add %.8, 18
	storeb 133, %.27
	%.28 =l add %.8, 19
	storeb 0, %.28
	%.30 =l add %.29, 0
	storeh 2344, %.30
	%.31 =l add %.29, 2
	storeh 2350, %.31
	%.32 =l add %.29, 4
	storeh 2360, %.32
	%.33 =l add %.29, 6
	storeh 2381, %.33
	%.34 =l add %.29, 8
	storeh 2340, %.34
	%.35 =l add %.29, 10
	storeh 2375, %.35
	%.36 =l add %.29, 12
	storeh 0, %.36
	%.38 =l add %.37, 0
	storew 1055, %.38
	%.39 =l add %.37, 4
	storew 1088, %.39
	%.40 =l add %.37, 8
	storew 1080, %.40
	%.41 =l add %.37, 12
	storew 1074, %.41
	%.42 =l add %.37, 16
	storew 1077, %.42
	%.43 =l add %.37, 20
	storew 1090, %.43
	%.44 =l add %.37, 24
	storew 0, %.44
	%.46 =l add %.45, 0
	storew 20320, %.46
	%.47 =l add %.45, 4
	storew 22909, %.47
	%.48 =l add %.45, 8
	storew 0, %.48
	ret
}
diff --git a/test/initializer-unsigned-string.qbe b/test/initializer-unsigned-string.qbe
index 46be123..5d44117 100644
--- a/test/initializer-unsigned-string.qbe
+++ b/test/initializer-unsigned-string.qbe
@@ -1 +1 @@
export data $s = align 1 { b "abc", z 1, }
export data $s = align 1 { b "abc\000", }
-- 
2.31.1
Thanks! This is shaping up pretty well.

On 2021-04-14, Nihal Jere <nihal@nihaljere.xyz> wrote: