~mcf/cproc

pp: Implement token concatenation operator v1 PROPOSED

Greg Bonik: 1
 pp: Implement token concatenation operator

 18 files changed, 457 insertions(+), 69 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~mcf/cproc/patches/53513/mbox | git am -3
Learn more about email & git

[PATCH] pp: Implement token concatenation operator Export this patch

---
Hi Michael,

What a great project you have here. I just randomly discovered it and
decided to have some fun with it over the weekend by implementing the
token concatenation operator in the preprocessor. 

I reused the scanner for the token concatenation logic. I
considered a direct implementation instead (i.e. handling each case
like IDENT ## IDENT -> IDENT etc.), but that approach results in a lot
of edge case handling which basically duplicates the scanner logic.

To make that possible, I changed the scanner so that it can read
either from a file or from memory. Inserting an `if` like this into
the hot path probably has a performance penalty, but I figured this
project seems to prioritize simplicity over performance.
(Alternatively, one could use fmemopen(), but it is not widely
available outside of the Linux world.)

I also moved `buf` from the scanner into a static variable, so that
all scanners share the same buffer. This makes it trivial to create
and destroy a scanner on the fly, which we do every time we
concatenate two tokens.

Best,
Greg

 cc.h                                  |   3 +
 pp.c                                  | 223 ++++++++++++++++++++++----
 scan.c                                |  83 +++++++---
 test/preprocess-macro-concat-2.c      |  17 ++
 test/preprocess-macro-concat-2.pp     |  15 ++
 test/preprocess-macro-concat-3.c      |  58 +++++++
 test/preprocess-macro-concat-3.pp     |  45 ++++++
 test/preprocess-macro-concat.c        |   3 +
 test/preprocess-macro-concat.pp       |   1 +
 test/preprocess-standard-example-2.c  |   6 +-
 test/preprocess-standard-example-2.pp |   2 +-
 test/preprocess-standard-example-3.c  |  16 ++
 test/preprocess-standard-example-3.pp |   5 +
 test/preprocess-standard-example-4.c  |   4 +
 test/preprocess-standard-example-4.pp |   2 +
 test/preprocess-standard-example-5.c  |   6 +
 test/preprocess-standard-example-5.pp |   1 +
 token.c                               |  36 +++--
 18 files changed, 457 insertions(+), 69 deletions(-)
 create mode 100644 test/preprocess-macro-concat-2.c
 create mode 100644 test/preprocess-macro-concat-2.pp
 create mode 100644 test/preprocess-macro-concat-3.c
 create mode 100644 test/preprocess-macro-concat-3.pp
 create mode 100644 test/preprocess-macro-concat.c
 create mode 100644 test/preprocess-macro-concat.pp
 create mode 100644 test/preprocess-standard-example-3.c
 create mode 100644 test/preprocess-standard-example-3.pp
 create mode 100644 test/preprocess-standard-example-4.c
 create mode 100644 test/preprocess-standard-example-4.pp
 create mode 100644 test/preprocess-standard-example-5.c
 create mode 100644 test/preprocess-standard-example-5.pp

diff --git a/cc.h b/cc.h
index cb53b0d..caca8c8 100644
--- a/cc.h
+++ b/cc.h
@@ -401,7 +401,9 @@ struct init {
extern struct token tok;
extern const char *tokstr[];

const char *tokenstring(const struct token *);
void tokenprint(const struct token *);
void tokendesc(char *, size_t, enum tokenkind, const char *);
char *tokencheck(const struct token *, enum tokenkind, const char *);
void error(const struct location *, const char *, ...);

@@ -411,6 +413,7 @@ void scanfrom(const char *, FILE *);
void scanopen(void);
void scansetloc(struct location loc);
void scan(struct token *);
int parsetoken(const char *, bool *);

/* preprocessor */

diff --git a/pp.c b/pp.c
index 99e56e8..216cbbd 100644
--- a/pp.c
+++ b/pp.c
@@ -13,6 +13,7 @@ struct macroparam {
		PARAMTOK = 1<<0,  /* the parameter is used normally */
		PARAMSTR = 1<<1,  /* the parameter is used with the '#' operator */
		PARAMVAR = 1<<2,  /* the parameter is __VA_ARGS__ */
		PARAMCAT = 1<<3,  /* the parameter is used at either side of the '##' operator */
	} flags;
};

@@ -21,6 +22,9 @@ struct macroarg {
	size_t ntoken;
	/* stringized argument */
	struct token str;
	/* raw tokens, if used with the '##" operator */
	struct token *raw;
	size_t nraw;
};

struct macro {
@@ -34,6 +38,12 @@ struct macro {
	/* parameters of function-like macro */
	struct macroparam *param;
	size_t nparam;
	/* upper bound on the number of new tokens created with ## operator */
	size_t maxnewtokens;
	/* array of length `maxnewtokens`, lazily allocated */
	struct token *newtokens;
	/* pointer into `newtokens` for bump allocation */
	struct token *nextnewtoken;
	/* argument tokens of macro invocation */
	struct macroarg *arg;
	/* replacement list */
@@ -45,6 +55,7 @@ struct frame {
	struct token *token;
	size_t ntoken;
	struct macro *macro;
	struct token *concatlhs;
};

enum ppflags ppflags;
@@ -120,6 +131,7 @@ macrodone(struct macro *m)
	m->hide = false;
	if (m->kind == MACROFUNC && m->nparam > 0) {
		free(m->arg[0].token);
		free(m->arg[0].raw);
		free(m->arg);
	}
	--macrodepth;
@@ -147,11 +159,122 @@ ctxpush(struct token *t, size_t n, struct macro *m, bool space)
	f->token = t;
	f->ntoken = n;
	f->macro = m;
	f->concatlhs = NULL;
	if (n > 0)
		t[0].space = space;
	return f;
}

static bool
stringizednext(struct frame *f, struct token **t)
{
	struct macro *m;
	size_t i;

	*t = framenext(f);
	m = f->macro;
	if (m->kind != MACROFUNC || (*t)->kind != THASH)
		return false;

	assert(f->ntoken);
	*t = framenext(f);
	i = macroparam(m, *t);
	assert(i != -1);
	*t = &m->arg[i].str;
	return true;
}


static struct token *
tokenconcat(struct token *t1, struct token *t2, struct macro *m)
{
	const char *s1, *s2;
	size_t l1, l2;
	char desc1[64], desc2[64];
	bool needlit;
	struct token *ret;
	int kind;

	if (!t1)
		return t2;
	if (!t2)
		return t1;

	s1 = tokenstring(t1);
	s2 = tokenstring(t2);

	if (!s1 || !s2)
		goto invalid;

	assert(m->nextnewtoken < m->newtokens + m->maxnewtokens);
	ret = m->nextnewtoken++;
	ret->loc = t1->loc;
	ret->space = t1->space;
	ret->hide = false;

	l1 = strlen(s1);
	l2 = strlen(s2);
	ret->lit = xmalloc(l1 + l2 + 1);
	memcpy(ret->lit, s1, l1);
	memcpy(ret->lit + l1, s2, l2);
	ret->lit[l1 + l2] = '\0';

	kind = parsetoken(ret->lit, &needlit);
	if (kind < 0)
		goto invalid;
	ret->kind = kind;

	if (!needlit) {
		free(ret->lit);
		ret->lit = NULL;
	}

	return ret;

invalid:
	tokendesc(desc1, sizeof(desc1), t1->kind, t1->lit);
	tokendesc(desc2, sizeof(desc2), t2->kind, t2->lit);
	error(&t1->loc, "Concatenation of %s and %s is not a valid token", desc1, desc2);
	return NULL;
}

static struct token *
concat(struct token *t, struct frame *f) {
	struct token *rhs, *tail;
	int havemore;
	size_t i, taillen;
	struct macro *m = f->macro;

	for (;;) {
		stringizednext(f, &rhs);
		/* multiple ##s in a row can be treated as one */
		if (rhs->kind == THASHHASH)
			continue;

		i = macroparam(m, rhs);
		if (i != -1)
			rhs = m->arg[i].nraw ? m->arg[i].raw : NULL;

		t = tokenconcat(t, rhs, m);
		havemore = f->ntoken && f->token->kind == THASHHASH;

		if (i != -1 && m->arg[i].nraw > 1) {
			tail = m->arg[i].raw + 1;
			taillen = m->arg[i].nraw - 1;
			if (havemore)
				f->concatlhs = &m->arg[i].raw[taillen--];
			if (taillen)
				ctxpush(tail, taillen, NULL, tail->space);
			break;
		}

		if (!havemore)
			break;
		framenext(f);
	}
	return t;
}

/* get the next token from the context */
static struct token *
ctxnext(void)
@@ -159,7 +282,7 @@ ctxnext(void)
	struct frame *f;
	struct token *t;
	struct macro *m;
	bool space;
	bool space, stringized;
	size_t i;

again:
@@ -171,32 +294,53 @@ again:
	}
	if (ctx.len == 0)
		return NULL;

	m = f->macro;
	if (m && m->kind == MACROFUNC) {
		/* try to expand macro parameter */
		space = f->token->space;
		switch (f->token->kind) {
		case THASH:
			framenext(f);
			t = framenext(f);
			assert(t);
			i = macroparam(m, t);
			assert(i != -1);
			f = ctxpush(&m->arg[i].str, 1, NULL, space);
			break;
		case TIDENT:
			i = macroparam(m, f->token);
			if (i == -1)
				break;
	if (!m)
		return framenext(f);

	space = f->token->space;
	stringized = stringizednext(f, &t);
	i = macroparam(m, t);

	/* concatenation */
	if (f->concatlhs || f->ntoken && f->token->kind == THASHHASH) {
		if (f->concatlhs) {
			/* the previous token was a parameter that expanded to multiple tokens */
			assert(t->kind == THASHHASH);
			t = f->concatlhs;
			f->concatlhs = NULL;
		} else if (i != -1 && m->arg[i].nraw > 1) {
			/* emit all but the last token from the expansion and leave the cursor at the ## */
			f->concatlhs = &m->arg[i].raw[m->arg[i].nraw - 1];
			ctxpush(m->arg[i].raw, m->arg[i].nraw - 1, NULL, space);
			goto again;
		} else {
			framenext(f);
			if (m->arg[i].ntoken == 0)
				goto again;
			f = ctxpush(m->arg[i].token, m->arg[i].ntoken, NULL, space);
			break;
			if (i != -1)
				t = m->arg[i].nraw ? m->arg[i].raw : NULL;
		}
		/* XXX: token concatenation */
		assert(f->ntoken);

		t = concat(t, f);
		if (t)
			f = ctxpush(t, 1, NULL, space);
		goto again;
	}

	if (stringized) {
		ctxpush(t, 1, NULL, space);
		goto again;
	}
	return framenext(f);

	/* expand macro parameter normally */
	if (i != -1) {
		ctxpush(m->arg[i].token, m->arg[i].ntoken, NULL, space);
		goto again;
	}

	/* no transformations applied -- return the next token from the replacement list */
	return t;
}

static void
@@ -214,6 +358,9 @@ define(void)
	m = xmalloc(sizeof(*m));
	m->name = tokencheck(&tok, TIDENT, "after #define");
	m->hide = false;
	m->maxnewtokens = 0;
	m->newtokens = NULL;
	m->nextnewtoken = NULL;
	t = arrayadd(&repl, sizeof(*t));
	scan(t);
	if (t->kind == TLPAREN && !t->space) {
@@ -246,17 +393,18 @@ define(void)
	/* read macro body */
	i = macroparam(m, t);
	while (t->kind != TNEWLINE && t->kind != TEOF) {
		if (t->kind == THASHHASH)
			error(&t->loc, "'##' operator is not yet implemented");
		prev = t->kind;
		t = arrayadd(&repl, sizeof(*t));
		scan(t);
		if (t->kind == TIDENT && strcmp(t->lit, "__VA_ARGS__") == 0 && !macrovarargs(m))
			error(&t->loc, "__VA_ARGS__ can only be used in variadic function-like macros");
		if (t->kind == THASHHASH)
			++m->maxnewtokens;
		if (m->kind != MACROFUNC)
			continue;

		if (i != -1)
			m->param[i].flags |= PARAMTOK;
			m->param[i].flags |= t->kind == THASHHASH ? PARAMCAT : PARAMTOK;
		i = macroparam(m, t);
		if (prev == THASH) {
			tokencheck(t, TIDENT, "after '#' operator");
@@ -264,12 +412,18 @@ define(void)
				error(&t->loc, "'%s' is not a macro parameter name", t->lit);
			m->param[i].flags |= PARAMSTR;
			i = -1;
		} else if (prev == THASHHASH && i != -1) {
			m->param[i].flags |= PARAMCAT;
			i = -1;
		}
	}
	m->token = repl.val;
	m->ntoken = repl.len / sizeof(*t) - 1;
	tok = *t;

	if (m->ntoken && (m->token[0].kind == THASHHASH || m->token[m->ntoken - 1].kind == THASHHASH))
		error(&t->loc, "Macro replacement list cannot start or end with '##'");

	mapkey(&k, m->name, strlen(m->name));
	entry = mapput(&macros, &k);
	if (*entry && !macroequal(m, *entry))
@@ -293,6 +447,7 @@ undef(void)
		free(name);
		free(m->param);
		free(m->token);
		free(m->newtokens);
		*entry = NULL;
	}
	scan(&tok);
@@ -457,6 +612,9 @@ expand(struct token *t)
			return false;
		expandfunc(m);
	}
	if (!m->newtokens && m->maxnewtokens)
		m->newtokens = xreallocarray(NULL, m->maxnewtokens, sizeof(*m->newtokens));
	m->nextnewtoken = m->newtokens;
	ctxpush(m->token, m->ntoken, m, space);
	m->hide = true;
	++macrodepth;
@@ -468,9 +626,9 @@ expandfunc(struct macro *m)
{
	struct macroparam *p;
	struct macroarg *arg;
	struct array str, tok;
	struct array str, tok, raw = {0};
	size_t i, depth, paren;
	struct token *t;
	struct token *t, *traw;

	/* read macro arguments */
	paren = 0;
@@ -485,6 +643,7 @@ expandfunc(struct macro *m)
			arrayaddbuf(&str, "\"", 1);
		}
		arg[i].ntoken = 0;
		arg[i].nraw = 0;
		for (;;) {
			if (t->kind == TEOF)
				error(&t->loc, "EOF when reading macro parameters");
@@ -499,6 +658,10 @@ expandfunc(struct macro *m)
				}
				if (p->flags & PARAMSTR)
					stringize(&str, t);
				if (p->flags & PARAMCAT) {
					arrayaddbuf(&raw, t, sizeof(*t));
					++arg[i].nraw;
				}
			}
			if (p->flags & PARAMTOK && !expand(t)) {
				arrayaddbuf(&tok, t, sizeof(*t));
@@ -521,9 +684,11 @@ expandfunc(struct macro *m)
		error(&t->loc, "not enough arguments for macro '%s'", m->name);
	if (t->kind != TRPAREN)
		error(&t->loc, "too many arguments for macro '%s'", m->name);
	for (i = 0, t = tok.val; i < m->nparam; ++i) {
	for (i = 0, t = tok.val, traw = raw.val; i < m->nparam; ++i) {
		arg[i].token = t;
		t += arg[i].ntoken;
		arg[i].raw = traw;
		traw += arg[i].nraw;
	}
	m->arg = arg;
}
diff --git a/scan.c b/scan.c
index 8a38e22..73f151d 100644
--- a/scan.c
+++ b/scan.c
@@ -7,18 +7,21 @@
#include "util.h"
#include "cc.h"

struct buffer {
static struct buffer {
	unsigned char *str;
	size_t len, cap;
};
} buf;

struct scanner {
	int chr;
	bool usebuf;
	bool sawspace;
	FILE *file;
	bool fromfile;
	union {
		FILE *file;
		const char *cursor;
	} src;
	struct location loc;
	struct buffer buf;
	struct scanner *next;
};

@@ -47,15 +50,23 @@ bufget(struct buffer *b)
	return s;
}

static int
readchar(struct scanner *s)
{
	if (s->fromfile)
		return getc(s->src.file);
	return *s->src.cursor ? (unsigned char) *s->src.cursor++ : EOF;
}

static void
nextchar(struct scanner *s)
{
	int c;

	if (s->usebuf)
		bufadd(&s->buf, s->chr);
		bufadd(&buf, s->chr);
	for (;;) {
		s->chr = getc(s->file);
		s->chr = readchar(s);
		if (s->chr == '\n') {
			++s->loc.line, s->loc.col = 0;
			break;
@@ -63,9 +74,12 @@ nextchar(struct scanner *s)
		++s->loc.col;
		if (s->chr != '\\')
			break;
		c = getc(s->file);
		c = readchar(s);
		if (c != '\n') {
			ungetc(c, s->file);
			if (s->fromfile)
				ungetc(c, s->src.file);
			else
				--s->src.cursor;
			break;
		}
		++s->loc.line, s->loc.col = 0;
@@ -274,6 +288,8 @@ scankind(struct scanner *s, struct location *loc)
	enum tokenkind tok;
	struct location oldloc;

	buf.len = 0;

again:
	*loc = s->loc;
	switch (s->chr) {
@@ -349,7 +365,7 @@ again:
	case '.':
		nextchar(s);
		if (isdigit(s->chr)) {
			bufadd(&s->buf, '.');
			bufadd(&buf, '.');
			return number(s);
		}
		if (s->chr != '.')
@@ -357,7 +373,10 @@ again:
		oldloc = s->loc;
		nextchar(s);
		if (s->chr != '.') {
			ungetc(s->chr, s->file);
			if (s->src.file)
				ungetc(s->chr, s->src.file);
			else
				--s->src.cursor;
			s->loc = oldloc;
			s->chr = '.';
			return TPERIOD;
@@ -387,7 +406,7 @@ again:
	case 'u':
		s->usebuf = true;
		nextchar(s);
		if (s->buf.str[0] == 'u' && s->chr == '8')
		if (buf.str[0] == 'u' && s->chr == '8')
			nextchar(s);
		switch (s->chr) {
		case '\'': return charconst(s);
@@ -413,10 +432,8 @@ scanfrom(const char *name, FILE *file)
	struct scanner *s;

	s = xmalloc(sizeof(*s));
	s->file = file;
	s->buf.str = NULL;
	s->buf.len = 0;
	s->buf.cap = 0;
	s->fromfile = true;
	s->src.file = file;
	s->usebuf = false;
	s->loc.file = name;
	s->loc.line = 1;
@@ -430,9 +447,9 @@ scanfrom(const char *name, FILE *file)
void
scanopen(void)
{
	if (!scanner->file) {
		scanner->file = fopen(scanner->loc.file, "r");
		if (!scanner->file)
	if (!scanner->src.file) {
		scanner->src.file = fopen(scanner->loc.file, "r");
		if (!scanner->src.file)
			fatal("open %s:", scanner->loc.file);
		nextchar(scanner);
	}
@@ -447,8 +464,8 @@ scansetloc(struct location loc)
static void
scanclose(void)
{
	fclose(scanner->file);
	free(scanner->buf.str);
	if (scanner->fromfile)
		fclose(scanner->src.file);
	free(scanner);
}

@@ -465,7 +482,7 @@ scan(struct token *t)
		scanopen();
	}
	if (scanner->usebuf) {
		t->lit = bufget(&scanner->buf);
		t->lit = bufget(&buf);
		scanner->usebuf = false;
	} else {
		t->lit = NULL;
@@ -473,3 +490,27 @@ scan(struct token *t)
	t->space = scanner->sawspace;
	t->hide = false;
}

int
parsetoken(const char *str, bool *needlit) {
	struct location loc;
	int ret;
	struct scanner s;

	s.usebuf = false;
	s.sawspace = false;
	s.fromfile = false;
	s.src.cursor = str;
	s.loc.file = "<memory>";
	s.loc.line = 1;
	s.loc.col = 0;
	s.next = NULL;

	nextchar(&s);

	ret = scankind(&s, &loc);
	if (loc.line != 1 || loc.col != 1 || *s.src.cursor)
		return -1;
	*needlit = s.usebuf;
	return ret;
}
diff --git a/test/preprocess-macro-concat-2.c b/test/preprocess-macro-concat-2.c
new file mode 100644
index 0000000..a7093eb
--- /dev/null
+++ b/test/preprocess-macro-concat-2.c
@@ -0,0 +1,17 @@
/* tests for token glueing logic */
#define cat(x, y) x ## y
cat(,);
cat(,1);
cat(2,);
cat(ret, urn);
cat(a, 3);
cat(123, 456);
cat(4, u);
cat(5, e4);
cat(6, .2e3);
cat(., 78);
cat(-, -);
cat(>>, =);
cat(u8, "foo");
cat(L, "bar");

diff --git a/test/preprocess-macro-concat-2.pp b/test/preprocess-macro-concat-2.pp
new file mode 100644
index 0000000..1a5c257
--- /dev/null
+++ b/test/preprocess-macro-concat-2.pp
@@ -0,0 +1,15 @@
;
1;
2;
return;
a3;
123456;
4u;
5e4;
6.2e3;
.78;
--;
>>=;
u8"foo";
L"bar";

diff --git a/test/preprocess-macro-concat-3.c b/test/preprocess-macro-concat-3.c
new file mode 100644
index 0000000..2720e88
--- /dev/null
+++ b/test/preprocess-macro-concat-3.c
@@ -0,0 +1,58 @@
/* various test cases for the ## operator */

#define cat(x, y) x ## y
cat(a b, c);
cat(a, b c);
cat(a b, c d);
cat(, a b);
cat(a b, );
cat(x, x);
cat(y, y);

#define prepend(x) foo ## x
prepend(a);
prepend(a b);
prepend();

#define append(x) x ## foo
append(a);
append(a b);
append();

#define glue(x, y) x ## foo ## y
glue(a, b);
glue(,);
glue(a b, c d);

#define cat3(x, y, z) x ## y ## z
cat3(a, b, c);
cat3(,,);
cat3(a, , c);
cat3(a, b c, d);
cat3(a, b c d, e);
cat3(a b, c d, e f);
cat3(a b, , c d);
cat3(, , a b c);
cat3(, a b c, );

#define rep3(x) x ## x ## x
rep3(abc)

#define catcat(x, y) x ## ## y
catcat(a, b);

#define stringize_cat(x, y) #x ## y
stringize_cat(foo bar, );

#define cat_stringize(x, y) x ## #y
cat_stringize(, foo bar);
cat_stringize(u8, foo bar);
cat_stringize(abc u8, foo bar);

#define moretokens(x, y) foo x ## y bar
moretokens(a, b);
moretokens(,);

#define cat_then_stringize(x, y) x ## y #x
cat_then_stringize(a, b);
cat_then_stringize(,);
diff --git a/test/preprocess-macro-concat-3.pp b/test/preprocess-macro-concat-3.pp
new file mode 100644
index 0000000..1673591
--- /dev/null
+++ b/test/preprocess-macro-concat-3.pp
@@ -0,0 +1,45 @@
a bc;
ab c;
a bc d;
a b;
a b;
xx;
yy;

fooa;
fooa b;
foo;

afoo;
a bfoo;
foo;

afoob;
foo;
a bfooc d;

abc;
;
ac;
ab cd;
ab c de;
a bc de f;
a bc d;
a b c;
a b c;

abcabcabc

ab;

"foo bar";

"foo bar";
u8"foo bar";
abc u8"foo bar";

foo ab bar;
foo bar;

ab "a";
 "";
diff --git a/test/preprocess-macro-concat.c b/test/preprocess-macro-concat.c
new file mode 100644
index 0000000..4f7676d
--- /dev/null
+++ b/test/preprocess-macro-concat.c
@@ -0,0 +1,3 @@
/* "smoke test" for the ## operator */
#define cat(x, y) x ## y
cat(hello, world)
diff --git a/test/preprocess-macro-concat.pp b/test/preprocess-macro-concat.pp
new file mode 100644
index 0000000..31e0fce
--- /dev/null
+++ b/test/preprocess-macro-concat.pp
@@ -0,0 +1 @@
helloworld
diff --git a/test/preprocess-standard-example-2.c b/test/preprocess-standard-example-2.c
index 0deb2ad..bce1bdd 100644
--- a/test/preprocess-standard-example-2.c
+++ b/test/preprocess-standard-example-2.c
@@ -1,4 +1,4 @@
/* C11 6.10.3.5p5 with token concatenation disabled for now */
/* C11 6.10.3.5p5 */
#define    x          3
#define    f(a)       f(x * (a))
#undef     x
@@ -11,10 +11,10 @@
#define    t(a)       a
#define    p()        int
#define    q(x)       x
//#define    r(x,y)     x ## y
#define    r(x,y)     x ## y
#define    str(x)     # x
f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
g(x+(3,4)-w) | h 5) & m
	(f)^m(m);
p() i[q()] = { q(1), /*r(2,3), r(4,), r(,5), r(,)*/ };
p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
char c[2][6] = { str(hello), str() };
diff --git a/test/preprocess-standard-example-2.pp b/test/preprocess-standard-example-2.pp
index de72fb1..c2de80d 100644
--- a/test/preprocess-standard-example-2.pp
+++ b/test/preprocess-standard-example-2.pp
@@ -1,4 +1,4 @@
f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);
int i[] = { 1, };
int i[] = { 1, 23, 4, 5, };
char c[2][6] = { "hello", "" };
diff --git a/test/preprocess-standard-example-3.c b/test/preprocess-standard-example-3.c
new file mode 100644
index 0000000..28bc6fd
--- /dev/null
+++ b/test/preprocess-standard-example-3.c
@@ -0,0 +1,16 @@
/* C11 6.10.3.5p6 with file inclusion disabled for now */
#define str(s) # s
#define xstr(s) str(s)
#define debug(s, t) printf("x" # s "= %d, x" # t "= %s", \
		x ## s, x ## t)
#define INCFILE(n) vers ## n
#define glue(a, b) a ## b
#define xglue(a, b) glue(a, b)
#define HIGHLOW "hello"
#define LOW LOW ", world"
debug(1, 2);
fputs(str(strncmp("abc\0d", "abc", '\4') //
		== 0) str(: @\n), s);
/* #include */ xstr(INCFILE(2).h)
glue(HIGH, LOW);
xglue(HIGH, LOW)
diff --git a/test/preprocess-standard-example-3.pp b/test/preprocess-standard-example-3.pp
new file mode 100644
index 0000000..b7b71df
--- /dev/null
+++ b/test/preprocess-standard-example-3.pp
@@ -0,0 +1,5 @@
printf("x" "1" "= %d, x" "2" "= %s", x1, x2);
fputs("strncmp(\"abc\\0d\", \"abc\", '\\4') == 0" ": @\n", s);
 "vers2.h"
"hello";
"hello" ", world"
diff --git a/test/preprocess-standard-example-4.c b/test/preprocess-standard-example-4.c
new file mode 100644
index 0000000..5de63f2
--- /dev/null
+++ b/test/preprocess-standard-example-4.c
@@ -0,0 +1,4 @@
/* C11 6.10.3.5p7 */
#define t(x,y,z) x ## y ## z
int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,),
		t(10,,), t(,11,), t(,,12), t(,,) };
diff --git a/test/preprocess-standard-example-4.pp b/test/preprocess-standard-example-4.pp
new file mode 100644
index 0000000..1b290c5
--- /dev/null
+++ b/test/preprocess-standard-example-4.pp
@@ -0,0 +1,2 @@
int j[] = { 123, 45, 67, 89,
 10, 11, 12, };
diff --git a/test/preprocess-standard-example-5.c b/test/preprocess-standard-example-5.c
new file mode 100644
index 0000000..4545fdf
--- /dev/null
+++ b/test/preprocess-standard-example-5.c
@@ -0,0 +1,6 @@
/* C11 6.10.3.3p4 */
#define hash_hash # ## #
#define mkstr(a) # a
#define in_between(a) mkstr(a)
#define join(c, d) in_between(c hash_hash d)
char p[] = join(x, y);
diff --git a/test/preprocess-standard-example-5.pp b/test/preprocess-standard-example-5.pp
new file mode 100644
index 0000000..cf5b153
--- /dev/null
+++ b/test/preprocess-standard-example-5.pp
@@ -0,0 +1 @@
char p[] = "x ## y";
diff --git a/token.c b/token.c
index b67baa6..ee7ee49 100644
--- a/token.c
+++ b/token.c
@@ -120,34 +120,40 @@ const char *tokstr[] = {
	[THASHHASH] = "##",
};

void
tokenprint(const struct token *t)
const char *
tokenstring(const struct token *t)
{
	const char *str;

	if (t->space)
		fputc(' ', stdout);
	switch (t->kind) {
	case TIDENT:
	case TNUMBER:
	case TCHARCONST:
	case TSTRINGLIT:
		str = t->lit;
		break;
		return t->lit;
	case TNEWLINE:
		str = "\n";
		break;
	case TEOF:
		return;
		return "\n";
	default:
		str = tokstr[t->kind];
		return t->kind < LEN(tokstr) ? tokstr[t->kind] : NULL;
	}
	if (!str)
}

void
tokenprint(const struct token *t)
{
	const char *str;

	if (t->space)
		fputc(' ', stdout);

	if (t->kind == TEOF)
		return;

	if (!(str = tokenstring(t)))
		fatal("cannot print token %d", t->kind);

	fputs(str, stdout);
}

static void
void
tokendesc(char *buf, size_t len, enum tokenkind kind, const char *lit)
{
	const char *class;
-- 
2.34.1