Chenguang Wang: 1 Fix sprintf() and strcpy() warnings on OpenBSD. 11 files changed, 42 insertions(+), 39 deletions(-)
Fair points. Just for the record... 1. `-Wsizeof-pointer-memaccess` catches `sizeof` misuses on pointer types, but it only works on gcc; the clang version doesn't detect this: $ cat t.c #include <stdio.h> int main(void) { char *p = 0; char buf[42]; snprintf(p, sizeof p, "hi"); snprintf(buf, sizeof buf, "hi"); } $ gcc -W -Wall t.c t.c: In function 'int main()': t.c:4:17: warning: argument to 'sizeof' in 'int snprintf(char*, size_t, const char*, ...)' call is the same expression as the destination; did you mean to provide an explicit length? [-Wsizeof-pointer-memaccess] 4 | snprintf(p, sizeof p, "hi"); | ^~~~~~~~ The error message is confusing; the gcc doc [1] says it better: This warning triggers for example for `memset(ptr, 0, sizeof(ptr))`; if ptr is not an array, but a pointer, and suggests a possible fix At least I myself do not have gcc on my dev machine; probably not a good idea to rely on this. 2. `-Wsizeof-pointer-div` works on clang as well, but only if we use `countof` (`#define countof(x) (sizeof(x) / sizeof((x)[0]))`) instead of `sizeof` everywhere. Too easily misused, in my opinion. [1]: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wsizeof-pointer-memaccess
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~mpu/qbe/patches/48175/mbox | git am -3Learn more about email & git
When compiling on OpenBSD, the compiler produces warning messages about the usage of these two functions, and suggests replacing them with snprintf() and strlcpy().
I have mixed feelings about that. Additionally all of the `sizeof xxx` proposed will break silently if we move to pointer types instead of array types. I manually reviewed: - https://c9x.me/git/qbe.git/tree/amd64/emit.c#n194 fine, the %d expands to at most 2 chars, the buf is 6 chars and fits "xmmNN". It's easy for the compiler to see that things cannot go wrong with the assert. - https://c9x.me/git/qbe.git/tree/amd64/isel.c#n98 not so easy for the compiler to find out about T.asloc, but easy enough for a human to see that things are fine - https://c9x.me/git/qbe.git/tree/arm64/emit.c#n113 easy enough to see things are fine, register names fit in 3 chars - https://c9x.me/git/qbe.git/tree/arm64/isel.c#n115 same as in amd64/ - https://c9x.me/git/qbe.git/tree/parse.c#n378 128 is more than enough for the longest possible output - https://c9x.me/git/qbe.git/tree/parse.c#n562 this is not obvious for a compiler, but Ttmp must fit in char[NString] (see label Alpha: in next()) - https://c9x.me/git/qbe.git/tree/parse.c#n562 same as above for Tlbl - https://c9x.me/git/qbe.git/tree/parse.c#n1017 same as above for Ttyp - https://c9x.me/git/qbe.git/tree/rv64/emit.c#n236 easy for the compiler to see it's fine, an int64 will take at most 20 bytes to print - https://c9x.me/git/qbe.git/tree/rv64/isel.c#n47 same as in amd64/ - https://c9x.me/git/qbe.git/tree/util.c#n189 obviously right Now minic/minic.y is not compiled by most users so I think we can leave it alone. Same for minic/minic.y and tools/pmov.c (for which the current patch is broken). Anyway, I think when the scale of the review of these dangerous calls is such that it can be done in 30 minutes, I don't think we will really benefit from the more verbose calls.
strlcpy() on Linux requires including bsd/string.h and linking libbsd. I found it easier to just use equivalent snprintf() calls instead. --- amd64/emit.c | 2 +- amd64/isel.c | 2 +- arm64/emit.c | 10 +++++----- arm64/isel.c | 2 +- minic/minic.y | 21 +++++++++++---------- minic/yacc.c | 24 ++++++++++++------------ parse.c | 8 ++++---- rv64/emit.c | 2 +- rv64/isel.c | 2 +- tools/pmov.c | 2 +- util.c | 6 ++++-- 11 files changed, 42 insertions(+), 39 deletions(-) diff --git a/amd64/emit.c b/amd64/emit.c index c949589..6449de7 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -191,7 +191,7 @@ regtoa(int reg, int sz) assert(reg <= XMM15); if (reg >= XMM0) { - sprintf(buf, "xmm%d", reg-XMM0); + snprintf(buf, sizeof buf, "xmm%d", reg-XMM0); return buf; } else return rname[reg][sz]; diff --git a/amd64/isel.c b/amd64/isel.c index e29c8bf..2bd8ec0 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -95,7 +95,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) memset(&a, 0, sizeof a); a.offset.type = CAddr; n = stashbits(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4); - sprintf(buf, "\"%sfp%d\"", T.asloc, n); + snprintf(buf, sizeof buf, "\"%sfp%d\"", T.asloc, n); a.offset.sym.id = intern(buf); fn->mem[fn->nmem-1] = a; } diff --git a/arm64/emit.c b/arm64/emit.c index 85b5f3d..b40ed27 100644 --- a/arm64/emit.c +++ b/arm64/emit.c @@ -116,21 +116,21 @@ rname(int r, int k) if (r == SP) { assert(k == Kl); - sprintf(buf, "sp"); + snprintf(buf, sizeof buf, "sp"); } else if (R0 <= r && r <= LR) switch (k) { default: die("invalid class"); - case Kw: sprintf(buf, "w%d", r-R0); break; + case Kw: snprintf(buf, sizeof buf, "w%d", r-R0); break; case Kx: - case Kl: sprintf(buf, "x%d", r-R0); break; + case Kl: snprintf(buf, sizeof buf, "x%d", r-R0); break; } else if (V0 <= r && r <= V30) switch (k) { default: die("invalid class"); - case Ks: sprintf(buf, "s%d", r-V0); break; + case Ks: snprintf(buf, sizeof buf, "s%d", r-V0); break; case Kx: - case Kd: sprintf(buf, "d%d", r-V0); break; + case Kd: snprintf(buf, sizeof buf, "d%d", r-V0); break; } else die("invalid register"); diff --git a/arm64/isel.c b/arm64/isel.c index 062beb3..100cf7e 100644 --- a/arm64/isel.c +++ b/arm64/isel.c @@ -112,7 +112,7 @@ fixarg(Ref *pr, int k, int phi, Fn *fn) n = stashbits(&c->bits, KWIDE(k) ? 8 : 4); vgrow(&fn->con, ++fn->ncon); c = &fn->con[fn->ncon-1]; - sprintf(buf, "\"%sfp%d\"", T.asloc, n); + snprintf(buf, sizeof buf, "\"%sfp%d\"", T.asloc, n); *c = (Con){.type = CAddr}; c->sym.id = intern(buf); r2 = newtmp("isel", Kl, fn); diff --git a/minic/minic.y b/minic/minic.y index eeef900..2d14407 100644 --- a/minic/minic.y +++ b/minic/minic.y @@ -130,7 +130,7 @@ varadd(char *v, int glo, unsigned ctyp) h = h0; do { if (varh[h].v[0] == 0) { - strcpy(varh[h].v, v); + snprintf(varh[h].v, sizeof varh[h].v, "%s", v); varh[h].glo = glo; varh[h].ctyp = ctyp; return; @@ -154,7 +154,7 @@ varget(char *v) if (strcmp(varh[h].v, v) == 0) { if (!varh[h].glo) { s.t = Var; - strcpy(s.u.v, v); + snprintf(s.u.v, sizeof s.u.v, "%s", v); } else { s.t = Glo; s.u.n = varh[h].glo; @@ -409,10 +409,10 @@ expr(Node *n) Binop: sr.ctyp = prom(o, &s0, &s1); if (strchr("ne<l", n->op)) { - sprintf(ty, "%c", irtyp(sr.ctyp)); + snprintf(ty, sizeof ty, "%c", irtyp(sr.ctyp)); sr.ctyp = INT; } else - strcpy(ty, ""); + ty[0] = '\0'; fprintf(of, "\t"); psymb(sr); fprintf(of, " =%c", irtyp(sr.ctyp)); @@ -605,7 +605,7 @@ param(char *v, unsigned ctyp, Node *pl) die("invalid void declaration"); n = mknode(0, 0, pl); varadd(v, 0, ctyp); - strcpy(n->u.v, v); + snprintf(n->u.v, sizeof n->u.v, "%s", v); return n; } @@ -674,12 +674,13 @@ fdcl: type IDENT '(' ')' ';' idcl: type IDENT ';' { + size_t sz = sizeof "{ x 0 }"; if ($1 == NIL) die("invalid void declaration"); if (nglo == NGlo) die("too many string literals"); - ini[nglo] = alloc(sizeof "{ x 0 }"); - sprintf(ini[nglo], "{ %c 0 }", irtyp($1)); + ini[nglo] = alloc(sz); + snprintf(ini[nglo], sz, "{ %c 0 }", irtyp($1)); varadd($2->u.v, nglo++, $1); }; @@ -881,7 +882,7 @@ yylex() if (strcmp(v, kwds[i].s) == 0) return kwds[i].t; yylval.n = mknode('V', 0, 0); - strcpy(yylval.n->u.v, v); + snprintf(yylval.n->u.v, sizeof yylval.n->u.v, "%s", v); return IDENT; } @@ -889,7 +890,7 @@ yylex() i = 0; n = 32; p = alloc(n); - strcpy(p, "{ b \""); + snprintf(p, n, "{ b \""); for (i=5;; i++) { c = getchar(); if (c == EOF) @@ -902,7 +903,7 @@ yylex() if (c == '"' && p[i-1]!='\\') break; } - strcpy(&p[i], "\", b 0 }"); + snprintf(&p[i], n-i, "\", b 0 }"); if (nglo == NGlo) die("too many globals"); ini[nglo] = p; diff --git a/minic/yacc.c b/minic/yacc.c index 01f054e..922f1db 100644 --- a/minic/yacc.c +++ b/minic/yacc.c @@ -897,7 +897,7 @@ gettype(char *type) if (tk==TLangle) { if (nexttk()!=TIdnt) die("syntax error, ident expected after <"); - strcpy(type, idnt); + snprintf(type, IdntSz, "%s", idnt); if (nexttk()!=TRangle) die("syntax error, unclosed <"); return nexttk(); @@ -918,7 +918,7 @@ findsy(char *name, int add) if (ntk>=MaxTk) die("too many tokens"); ntk++; - strcpy(is[n].name, name); + snprintf(is[n].name, sizeof is[n].name, "%s", name); return n; } n = MaxTk; @@ -929,7 +929,7 @@ findsy(char *name, int add) if (add) { if (nsy>=MaxTk+MaxNt) die("too many non-terminals"); - strcpy(is[nsy].name, name); + snprintf(is[nsy].name, sizeof is[nsy].name, "%s", name); return nsy++; } else return nsy; @@ -942,9 +942,9 @@ getdecls() Info *si; char type[IdntSz], *s; - strcpy(is[0].name, "$"); + snprintf(is[0].name, sizeof is[0].name, "$"); ntk = 1; - strcpy(is[Sym0].name, "@start"); + snprintf(is[Sym0].name, sizeof is[Sym0].name, "@start"); nsy = MaxTk+1; sstart = S; prec = 0; @@ -1004,8 +1004,8 @@ getdecls() n = ntk++; } si = &is[n]; - strcpy(si->name, idnt); - strcpy(si->type, type); + snprintf(si->name, sizeof si->name, "%s", idnt); + snprintf(si->type, sizeof si->type, "%s", type); si->prec = p; si->assoc = a; tk = nexttk(); @@ -1024,8 +1024,8 @@ getdecls() nsy++; } si = &is[n]; - strcpy(si->name, idnt); - strcpy(si->type, type); + snprintf(si->name, sizeof si->name, "%s", idnt); + snprintf(si->type, sizeof si->type, "%s", type); tk = nexttk(); } break; @@ -1258,14 +1258,14 @@ init(int ac, char *av[]) fin = fopen(srca, "r"); if (strlen(pref) + 10 > sizeof buf) die("-b prefix too long"); - sprintf(buf, "%s.tab.c", pref); + snprintf(buf, sizeof buf, "%s.tab.c", pref); fout = fopen(buf, "w"); if (vf) { - sprintf(buf, "%s.output", pref); + snprintf(buf, sizeof buf, "%s.output", pref); fgrm = fopen(buf, "w"); } if (df) { - sprintf(buf, "%s.tab.h", pref); + snprintf(buf, sizeof buf, "%s.tab.h", pref); fhdr = fopen(buf, "w"); if (fhdr) { fprintf(fhdr, "#ifndef Y_TAB_H_\n"); diff --git a/parse.c b/parse.c index 33ed6ec..8659aab 100644 --- a/parse.c +++ b/parse.c @@ -375,7 +375,7 @@ expect(int t) return; s1 = ttoa[t] ? ttoa[t] : "??"; s2 = ttoa[t1] ? ttoa[t1] : "??"; - sprintf(buf, "%s expected, got %s instead", s1, s2); + snprintf(buf, sizeof buf, "%s expected, got %s instead", s1, s2); err(buf); } @@ -396,7 +396,7 @@ tmpref(char *v) t = curf->ntmp; *h = t; newtmp(0, Kx, curf); - strcpy(curf->tmp[t].name, v); + snprintf(curf->tmp[t].name, sizeof curf->tmp[t].name, "%s", v); return TMP(t); } @@ -559,7 +559,7 @@ findblk(char *name) return b; b = newblk(); b->id = nblk++; - strcpy(b->name, name); + snprintf(b->name, sizeof b->name, "%s", name); b->dlink = blkh[h]; blkh[h] = b; return b; @@ -1006,7 +1006,7 @@ parsetyp() ty->size = 0; if (nextnl() != Ttyp || nextnl() != Teq) err("type name and then = expected"); - strcpy(ty->name, tokval.str); + snprintf(ty->name, sizeof ty->name, "%s", tokval.str); t = nextnl(); if (t == Talign) { if (nextnl() != Tint) diff --git a/rv64/emit.c b/rv64/emit.c index 23a8be8..086b992 100644 --- a/rv64/emit.c +++ b/rv64/emit.c @@ -233,7 +233,7 @@ loadaddr(Con *c, char *rn, FILE *f) if (c->sym.type == SThr) { if (c->bits.i) - sprintf(off, "+%"PRIi64, c->bits.i); + snprintf(off, sizeof off, "+%"PRIi64, c->bits.i); else off[0] = 0; fprintf(f, "\tlui %s, %%tprel_hi(%s)%s\n", diff --git a/rv64/isel.c b/rv64/isel.c index 8921a07..89fed32 100644 --- a/rv64/isel.c +++ b/rv64/isel.c @@ -44,7 +44,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) n = stashbits(&c->bits, KWIDE(k) ? 8 : 4); vgrow(&fn->con, ++fn->ncon); c = &fn->con[fn->ncon-1]; - sprintf(buf, "\"%sfp%d\"", T.asloc, n); + snprintf(buf, sizeof buf, "\"%sfp%d\"", T.asloc, n); *c = (Con){.type = CAddr}; c->sym.id = intern(buf); emit(Oload, k, r1, CON(c-fn->con), R); diff --git a/tools/pmov.c b/tools/pmov.c index ffc38ea..d00e523 100644 --- a/tools/pmov.c +++ b/tools/pmov.c @@ -37,7 +37,7 @@ main() tmp[t].hint.r = -1; tmp[t].hint.m = 0; tmp[t].slot = -1; - sprintf(tmp[t].name, "tmp%d", t-Tmp0+1); + snprintf(tmp[t].name, tmp[t].name, "tmp%d", t-Tmp0+1); } bsinit_(mbeg.b, Tmp0+NIReg); diff --git a/util.c b/util.c index 362fa98..73755e1 100644 --- a/util.c +++ b/util.c @@ -170,6 +170,7 @@ intern(char *s) Bucket *b; uint32_t h; uint i, n; + size_t sz; h = hash(s) & IMask; b = &itbl[h]; @@ -186,9 +187,10 @@ intern(char *s) else if ((n & (n-1)) == 0) vgrow(&b->str, n+n); - b->str[n] = emalloc(strlen(s)+1); + sz = strlen(s)+1; + b->str[n] = emalloc(sz); b->nstr = n + 1; - strcpy(b->str[n], s); + snprintf(b->str[n], sz, "%s", s); return h + (n<<IBits); } -- 2.42.0