~mpu/qbe

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
2 2

[PATCH] Fix sprintf() and strcpy() warnings on OpenBSD.

Details
Message ID
<20231230174801.79177-1-w@bunny.rocks>
DKIM signature
missing
Download raw message
Patch: +42 -39
When compiling on OpenBSD, the compiler produces warning messages about the
usage of these two functions, and suggests replacing them with snprintf() and
strlcpy().

strlcpy() on Linux requires including bsd/string.h and linking libbsd. I found
it easier to just use equivalent snprintf() calls instead.
---
 amd64/emit.c  |  2 +-
 amd64/isel.c  |  2 +-
 arm64/emit.c  | 10 +++++-----
 arm64/isel.c  |  2 +-
 minic/minic.y | 21 +++++++++++----------
 minic/yacc.c  | 24 ++++++++++++------------
 parse.c       |  8 ++++----
 rv64/emit.c   |  2 +-
 rv64/isel.c   |  2 +-
 tools/pmov.c  |  2 +-
 util.c        |  6 ++++--
 11 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/amd64/emit.c b/amd64/emit.c
index c949589..6449de7 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -191,7 +191,7 @@ regtoa(int reg, int sz)

	assert(reg <= XMM15);
	if (reg >= XMM0) {
		sprintf(buf, "xmm%d", reg-XMM0);
		snprintf(buf, sizeof buf, "xmm%d", reg-XMM0);
		return buf;
	} else
		return rname[reg][sz];
diff --git a/amd64/isel.c b/amd64/isel.c
index e29c8bf..2bd8ec0 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -95,7 +95,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
		memset(&a, 0, sizeof a);
		a.offset.type = CAddr;
		n = stashbits(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4);
		sprintf(buf, "\"%sfp%d\"", T.asloc, n);
		snprintf(buf, sizeof buf, "\"%sfp%d\"", T.asloc, n);
		a.offset.sym.id = intern(buf);
		fn->mem[fn->nmem-1] = a;
	}
diff --git a/arm64/emit.c b/arm64/emit.c
index 85b5f3d..b40ed27 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -116,21 +116,21 @@ rname(int r, int k)

	if (r == SP) {
		assert(k == Kl);
		sprintf(buf, "sp");
		snprintf(buf, sizeof buf, "sp");
	}
	else if (R0 <= r && r <= LR)
		switch (k) {
		default: die("invalid class");
		case Kw: sprintf(buf, "w%d", r-R0); break;
		case Kw: snprintf(buf, sizeof buf, "w%d", r-R0); break;
		case Kx:
		case Kl: sprintf(buf, "x%d", r-R0); break;
		case Kl: snprintf(buf, sizeof buf, "x%d", r-R0); break;
		}
	else if (V0 <= r && r <= V30)
		switch (k) {
		default: die("invalid class");
		case Ks: sprintf(buf, "s%d", r-V0); break;
		case Ks: snprintf(buf, sizeof buf, "s%d", r-V0); break;
		case Kx:
		case Kd: sprintf(buf, "d%d", r-V0); break;
		case Kd: snprintf(buf, sizeof buf, "d%d", r-V0); break;
		}
	else
		die("invalid register");
diff --git a/arm64/isel.c b/arm64/isel.c
index 062beb3..100cf7e 100644
--- a/arm64/isel.c
+++ b/arm64/isel.c
@@ -112,7 +112,7 @@ fixarg(Ref *pr, int k, int phi, Fn *fn)
			n = stashbits(&c->bits, KWIDE(k) ? 8 : 4);
			vgrow(&fn->con, ++fn->ncon);
			c = &fn->con[fn->ncon-1];
			sprintf(buf, "\"%sfp%d\"", T.asloc, n);
			snprintf(buf, sizeof buf, "\"%sfp%d\"", T.asloc, n);
			*c = (Con){.type = CAddr};
			c->sym.id = intern(buf);
			r2 = newtmp("isel", Kl, fn);
diff --git a/minic/minic.y b/minic/minic.y
index eeef900..2d14407 100644
--- a/minic/minic.y
+++ b/minic/minic.y
@@ -130,7 +130,7 @@ varadd(char *v, int glo, unsigned ctyp)
	h = h0;
	do {
		if (varh[h].v[0] == 0) {
			strcpy(varh[h].v, v);
			snprintf(varh[h].v, sizeof varh[h].v, "%s", v);
			varh[h].glo = glo;
			varh[h].ctyp = ctyp;
			return;
@@ -154,7 +154,7 @@ varget(char *v)
		if (strcmp(varh[h].v, v) == 0) {
			if (!varh[h].glo) {
				s.t = Var;
				strcpy(s.u.v, v);
				snprintf(s.u.v, sizeof s.u.v, "%s", v);
			} else {
				s.t = Glo;
				s.u.n = varh[h].glo;
@@ -409,10 +409,10 @@ expr(Node *n)
	Binop:
		sr.ctyp = prom(o, &s0, &s1);
		if (strchr("ne<l", n->op)) {
			sprintf(ty, "%c", irtyp(sr.ctyp));
			snprintf(ty, sizeof ty, "%c", irtyp(sr.ctyp));
			sr.ctyp = INT;
		} else
			strcpy(ty, "");
			ty[0] = '\0';
		fprintf(of, "\t");
		psymb(sr);
		fprintf(of, " =%c", irtyp(sr.ctyp));
@@ -605,7 +605,7 @@ param(char *v, unsigned ctyp, Node *pl)
		die("invalid void declaration");
	n = mknode(0, 0, pl);
	varadd(v, 0, ctyp);
	strcpy(n->u.v, v);
	snprintf(n->u.v, sizeof n->u.v, "%s", v);
	return n;
}

@@ -674,12 +674,13 @@ fdcl: type IDENT '(' ')' ';'

idcl: type IDENT ';'
{
	size_t sz = sizeof "{ x 0 }";
	if ($1 == NIL)
		die("invalid void declaration");
	if (nglo == NGlo)
		die("too many string literals");
	ini[nglo] = alloc(sizeof "{ x 0 }");
	sprintf(ini[nglo], "{ %c 0 }", irtyp($1));
	ini[nglo] = alloc(sz);
	snprintf(ini[nglo], sz, "{ %c 0 }", irtyp($1));
	varadd($2->u.v, nglo++, $1);
};

@@ -881,7 +882,7 @@ yylex()
			if (strcmp(v, kwds[i].s) == 0)
				return kwds[i].t;
		yylval.n = mknode('V', 0, 0);
		strcpy(yylval.n->u.v, v);
		snprintf(yylval.n->u.v, sizeof yylval.n->u.v, "%s", v);
		return IDENT;
	}

@@ -889,7 +890,7 @@ yylex()
		i = 0;
		n = 32;
		p = alloc(n);
		strcpy(p, "{ b \"");
		snprintf(p, n, "{ b \"");
		for (i=5;; i++) {
			c = getchar();
			if (c == EOF)
@@ -902,7 +903,7 @@ yylex()
			if (c == '"' && p[i-1]!='\\')
				break;
		}
		strcpy(&p[i], "\", b 0 }");
		snprintf(&p[i], n-i, "\", b 0 }");
		if (nglo == NGlo)
			die("too many globals");
		ini[nglo] = p;
diff --git a/minic/yacc.c b/minic/yacc.c
index 01f054e..922f1db 100644
--- a/minic/yacc.c
+++ b/minic/yacc.c
@@ -897,7 +897,7 @@ gettype(char *type)
	if (tk==TLangle) {
		if (nexttk()!=TIdnt)
			die("syntax error, ident expected after <");
		strcpy(type, idnt);
		snprintf(type, IdntSz, "%s", idnt);
		if (nexttk()!=TRangle)
			die("syntax error, unclosed <");
		return nexttk();
@@ -918,7 +918,7 @@ findsy(char *name, int add)
				if (ntk>=MaxTk)
					die("too many tokens");
				ntk++;
				strcpy(is[n].name, name);
				snprintf(is[n].name, sizeof is[n].name, "%s", name);
				return n;
			}
			n = MaxTk;
@@ -929,7 +929,7 @@ findsy(char *name, int add)
	if (add) {
		if (nsy>=MaxTk+MaxNt)
			die("too many non-terminals");
		strcpy(is[nsy].name, name);
		snprintf(is[nsy].name, sizeof is[nsy].name, "%s", name);
		return nsy++;
	} else
		return nsy;
@@ -942,9 +942,9 @@ getdecls()
	Info *si;
	char type[IdntSz], *s;

	strcpy(is[0].name, "$");
	snprintf(is[0].name, sizeof is[0].name, "$");
	ntk = 1;
	strcpy(is[Sym0].name, "@start");
	snprintf(is[Sym0].name, sizeof is[Sym0].name, "@start");
	nsy = MaxTk+1;
	sstart = S;
	prec = 0;
@@ -1004,8 +1004,8 @@ getdecls()
				n = ntk++;
			}
			si = &is[n];
			strcpy(si->name, idnt);
			strcpy(si->type, type);
			snprintf(si->name, sizeof si->name, "%s", idnt);
			snprintf(si->type, sizeof si->type, "%s", type);
			si->prec = p;
			si->assoc = a;
			tk = nexttk();
@@ -1024,8 +1024,8 @@ getdecls()
				nsy++;
			}
			si = &is[n];
			strcpy(si->name, idnt);
			strcpy(si->type, type);
			snprintf(si->name, sizeof si->name, "%s", idnt);
			snprintf(si->type, sizeof si->type, "%s", type);
			tk = nexttk();
		}
		break;
@@ -1258,14 +1258,14 @@ init(int ac, char *av[])
	fin = fopen(srca, "r");
	if (strlen(pref) + 10 > sizeof buf)
		die("-b prefix too long");
	sprintf(buf, "%s.tab.c", pref);
	snprintf(buf, sizeof buf, "%s.tab.c", pref);
	fout = fopen(buf, "w");
	if (vf) {
		sprintf(buf, "%s.output", pref);
		snprintf(buf, sizeof buf, "%s.output", pref);
		fgrm = fopen(buf, "w");
	}
	if (df) {
		sprintf(buf, "%s.tab.h", pref);
		snprintf(buf, sizeof buf, "%s.tab.h", pref);
		fhdr = fopen(buf, "w");
		if (fhdr) {
			fprintf(fhdr, "#ifndef Y_TAB_H_\n");
diff --git a/parse.c b/parse.c
index 33ed6ec..8659aab 100644
--- a/parse.c
+++ b/parse.c
@@ -375,7 +375,7 @@ expect(int t)
		return;
	s1 = ttoa[t] ? ttoa[t] : "??";
	s2 = ttoa[t1] ? ttoa[t1] : "??";
	sprintf(buf, "%s expected, got %s instead", s1, s2);
	snprintf(buf, sizeof buf, "%s expected, got %s instead", s1, s2);
	err(buf);
}

@@ -396,7 +396,7 @@ tmpref(char *v)
	t = curf->ntmp;
	*h = t;
	newtmp(0, Kx, curf);
	strcpy(curf->tmp[t].name, v);
	snprintf(curf->tmp[t].name, sizeof curf->tmp[t].name, "%s", v);
	return TMP(t);
}

@@ -559,7 +559,7 @@ findblk(char *name)
			return b;
	b = newblk();
	b->id = nblk++;
	strcpy(b->name, name);
	snprintf(b->name, sizeof b->name, "%s", name);
	b->dlink = blkh[h];
	blkh[h] = b;
	return b;
@@ -1006,7 +1006,7 @@ parsetyp()
	ty->size = 0;
	if (nextnl() != Ttyp ||  nextnl() != Teq)
		err("type name and then = expected");
	strcpy(ty->name, tokval.str);
	snprintf(ty->name, sizeof ty->name, "%s", tokval.str);
	t = nextnl();
	if (t == Talign) {
		if (nextnl() != Tint)
diff --git a/rv64/emit.c b/rv64/emit.c
index 23a8be8..086b992 100644
--- a/rv64/emit.c
+++ b/rv64/emit.c
@@ -233,7 +233,7 @@ loadaddr(Con *c, char *rn, FILE *f)

	if (c->sym.type == SThr) {
		if (c->bits.i)
			sprintf(off, "+%"PRIi64, c->bits.i);
			snprintf(off, sizeof off, "+%"PRIi64, c->bits.i);
		else
			off[0] = 0;
		fprintf(f, "\tlui %s, %%tprel_hi(%s)%s\n",
diff --git a/rv64/isel.c b/rv64/isel.c
index 8921a07..89fed32 100644
--- a/rv64/isel.c
+++ b/rv64/isel.c
@@ -44,7 +44,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
			n = stashbits(&c->bits, KWIDE(k) ? 8 : 4);
			vgrow(&fn->con, ++fn->ncon);
			c = &fn->con[fn->ncon-1];
			sprintf(buf, "\"%sfp%d\"", T.asloc, n);
			snprintf(buf, sizeof buf, "\"%sfp%d\"", T.asloc, n);
			*c = (Con){.type = CAddr};
			c->sym.id = intern(buf);
			emit(Oload, k, r1, CON(c-fn->con), R);
diff --git a/tools/pmov.c b/tools/pmov.c
index ffc38ea..d00e523 100644
--- a/tools/pmov.c
+++ b/tools/pmov.c
@@ -37,7 +37,7 @@ main()
			tmp[t].hint.r = -1;
			tmp[t].hint.m = 0;
			tmp[t].slot = -1;
			sprintf(tmp[t].name, "tmp%d", t-Tmp0+1);
			snprintf(tmp[t].name, tmp[t].name, "tmp%d", t-Tmp0+1);
		}

	bsinit_(mbeg.b, Tmp0+NIReg);
diff --git a/util.c b/util.c
index 362fa98..73755e1 100644
--- a/util.c
+++ b/util.c
@@ -170,6 +170,7 @@ intern(char *s)
	Bucket *b;
	uint32_t h;
	uint i, n;
	size_t sz;

	h = hash(s) & IMask;
	b = &itbl[h];
@@ -186,9 +187,10 @@ intern(char *s)
	else if ((n & (n-1)) == 0)
		vgrow(&b->str, n+n);

	b->str[n] = emalloc(strlen(s)+1);
	sz = strlen(s)+1;
	b->str[n] = emalloc(sz);
	b->nstr = n + 1;
	strcpy(b->str[n], s);
	snprintf(b->str[n], sz, "%s", s);
	return h + (n<<IBits);
}

-- 
2.42.0
Details
Message ID
<2f0fc251-610f-477b-9213-c9f4f0825330@app.fastmail.com>
In-Reply-To
<20231230174801.79177-1-w@bunny.rocks> (view parent)
DKIM signature
missing
Download raw message
On Sat, Dec 30, 2023, at 18:48, Chenguang Wang wrote:
> When compiling on OpenBSD, the compiler produces warning messages about the
> usage of these two functions, and suggests replacing them with snprintf() and
> strlcpy().

I have mixed feelings about that. Additionally all of the
`sizeof xxx` proposed will break silently if we move to
pointer types instead of array types.

I manually reviewed:

  - https://c9x.me/git/qbe.git/tree/amd64/emit.c#n194
    fine, the %d expands to at most 2 chars, the buf is 6 chars
    and fits "xmmNN". It's easy for the compiler to see that
    things cannot go wrong with the assert.

  - https://c9x.me/git/qbe.git/tree/amd64/isel.c#n98
    not so easy for the compiler to find out about T.asloc,
    but easy enough for a human to see that things are fine

  - https://c9x.me/git/qbe.git/tree/arm64/emit.c#n113
    easy enough to see things are fine, register names
    fit in 3 chars

  - https://c9x.me/git/qbe.git/tree/arm64/isel.c#n115
    same as in amd64/

  - https://c9x.me/git/qbe.git/tree/parse.c#n378
    128 is more than enough for the longest possible output

  - https://c9x.me/git/qbe.git/tree/parse.c#n562
    this is not obvious for a compiler, but Ttmp must
    fit in char[NString] (see label Alpha: in next())

  - https://c9x.me/git/qbe.git/tree/parse.c#n562
    same as above for Tlbl

  - https://c9x.me/git/qbe.git/tree/parse.c#n1017
    same as above for Ttyp

  - https://c9x.me/git/qbe.git/tree/rv64/emit.c#n236
    easy for the compiler to see it's fine, an int64
    will take at most 20 bytes to print

  - https://c9x.me/git/qbe.git/tree/rv64/isel.c#n47
    same as in amd64/

  - https://c9x.me/git/qbe.git/tree/util.c#n189
    obviously right

Now minic/minic.y is not compiled by most users so I think
we can leave it alone. Same for minic/minic.y and tools/pmov.c
(for which the current patch is broken).

Anyway, I think when the scale of the review of these
dangerous calls is such that it can be done in 30 minutes,
I don't think we will really benefit from the more verbose
calls.
Details
Message ID
<fdac5259-9723-48b6-adc5-c39c11eb20be@app.fastmail.com>
In-Reply-To
<2f0fc251-610f-477b-9213-c9f4f0825330@app.fastmail.com> (view parent)
DKIM signature
missing
Download raw message
Fair points. Just for the record...

1. `-Wsizeof-pointer-memaccess` catches `sizeof` misuses on pointer
types, but it only works on gcc; the clang version doesn't detect this:

    $ cat t.c
    #include <stdio.h>
    int main(void) {
        char *p = 0;
        char buf[42];
        snprintf(p, sizeof p, "hi");
        snprintf(buf, sizeof buf, "hi");
    }

    $ gcc -W -Wall t.c
    t.c: In function 'int main()':
    t.c:4:17: warning: argument to 'sizeof' in 'int snprintf(char*,
    size_t, const char*, ...)' call is the same expression as the
    destination; did you mean to provide an explicit length?
    [-Wsizeof-pointer-memaccess]
        4 |     snprintf(p, sizeof p, "hi");
          |                 ^~~~~~~~

The error message is confusing; the gcc doc [1] says it better:

    This warning triggers for example for `memset(ptr, 0, sizeof(ptr))`;
    if ptr is not an array, but a pointer, and suggests a possible fix

At least I myself do not have gcc on my dev machine; probably not a good
idea to rely on this.

2. `-Wsizeof-pointer-div` works on clang as well, but only if we use
`countof` (`#define countof(x) (sizeof(x) / sizeof((x)[0]))`) instead of
`sizeof` everywhere. Too easily misused, in my opinion.

[1]: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wsizeof-pointer-memaccess


On Tue, Jan 2, 2024, at 6:08 AM, Quentin Carbonneaux wrote:
> On Sat, Dec 30, 2023, at 18:48, Chenguang Wang wrote:
>> When compiling on OpenBSD, the compiler produces warning messages about the
>> usage of these two functions, and suggests replacing them with snprintf() and
>> strlcpy().
>
> I have mixed feelings about that. Additionally all of the
> `sizeof xxx` proposed will break silently if we move to
> pointer types instead of array types.
>
> I manually reviewed:
>
>   - https://c9x.me/git/qbe.git/tree/amd64/emit.c#n194
>     fine, the %d expands to at most 2 chars, the buf is 6 chars
>     and fits "xmmNN". It's easy for the compiler to see that
>     things cannot go wrong with the assert.
>
>   - https://c9x.me/git/qbe.git/tree/amd64/isel.c#n98
>     not so easy for the compiler to find out about T.asloc,
>     but easy enough for a human to see that things are fine
>
>   - https://c9x.me/git/qbe.git/tree/arm64/emit.c#n113
>     easy enough to see things are fine, register names
>     fit in 3 chars
>
>   - https://c9x.me/git/qbe.git/tree/arm64/isel.c#n115
>     same as in amd64/
>
>   - https://c9x.me/git/qbe.git/tree/parse.c#n378
>     128 is more than enough for the longest possible output
>
>   - https://c9x.me/git/qbe.git/tree/parse.c#n562
>     this is not obvious for a compiler, but Ttmp must
>     fit in char[NString] (see label Alpha: in next())
>
>   - https://c9x.me/git/qbe.git/tree/parse.c#n562
>     same as above for Tlbl
>
>   - https://c9x.me/git/qbe.git/tree/parse.c#n1017
>     same as above for Ttyp
>
>   - https://c9x.me/git/qbe.git/tree/rv64/emit.c#n236
>     easy for the compiler to see it's fine, an int64
>     will take at most 20 bytes to print
>
>   - https://c9x.me/git/qbe.git/tree/rv64/isel.c#n47
>     same as in amd64/
>
>   - https://c9x.me/git/qbe.git/tree/util.c#n189
>     obviously right
>
> Now minic/minic.y is not compiled by most users so I think
> we can leave it alone. Same for minic/minic.y and tools/pmov.c
> (for which the current patch is broken).
>
> Anyway, I think when the scale of the review of these
> dangerous calls is such that it can be done in 30 minutes,
> I don't think we will really benefit from the more verbose
> calls.
Reply to thread Export thread (mbox)