~mpu/qbe

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
4 4

[PATCH v4] implement line number info tracking

Details
Message ID
<20230122135907.30113-1-t@laumann.xyz>
DKIM signature
missing
Download raw message
Patch: +56 -9
Co-authored-by: Bor Grošelj Simić <bgs@turminal.net>
---
v3 -> v4

 * Rebased bgs' work on current qbe master - @bgs: I changed the Author, let
   me know if you'd like your name on the Author spot, I'll add myself to
   Co-authored-by in that case.

 * drop the file number argument from "file" and "loc" - the current file number
   is kept in parse(), and added to Fn so the various emit() functions can get
   to it.

 * address mpu's comments on v3: added emitfile() and emitloc() that are called
   into from the arch-specific emit implementations

In amd64/isel.c, the v3 version would fall-through to the default emit code,
call emiti() and fixarg(). I noticed that {arm,rv}64 isel implementations took
care _not_ to call fixarg() for Oloc ops, so I've done the same for amd64. I
don't know which behaviour is correct here for amd64.

This version does not track file names it's already seen. This means a sequence
like:

    file "foo.saa"
    ...
    file "bar.saa"
    ...
    file "foo.saa"

will give this output:

    .file 1 "foo.saa"
    ...
    .file 2 "bar.saa"
    ...
    .file 3 "foo.saa"

I figured that it'd be uncommon to want to switch back and forth between the
same files, but let me know if this case needs to be supported.

 all.h        |  5 ++++-
 amd64/emit.c |  3 +++
 amd64/isel.c |  3 +++
 arm64/emit.c |  4 ++++
 arm64/isel.c |  4 ++++
 emit.c       | 12 ++++++++++++
 main.c       |  2 +-
 ops.h        |  2 ++
 parse.c      | 17 +++++++++++++----
 rv64/emit.c  |  3 +++
 rv64/isel.c  |  4 ++++
 tools/lexh.c |  6 +++---
 12 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/all.h b/all.h
index 7eba443..e65cce6 100644
--- a/all.h
+++ b/all.h
@@ -386,6 +386,7 @@ struct Fn {
	char vararg;
	char dynalloc;
	char name[NString];
	uint filenum; /* debug info: file number for .loc */
	Lnk lnk;
};

@@ -501,7 +502,7 @@ bshas(BSet *bs, uint elt)

/* parse.c */
extern Op optab[NOp];
void parse(FILE *, char *, void (Dat *), void (Fn *));
void parse(FILE *, char *, FILE *, void (Dat *), void (Fn *));
void printfn(Fn *, FILE *);
void printref(Ref, Fn *, FILE *);
void err(char *, ...) __attribute__((noreturn));
@@ -571,3 +572,5 @@ int stashbits(void *, int);
void elf_emitfnfin(char *, FILE *);
void elf_emitfin(FILE *);
void macho_emitfin(FILE *);
void emitfile(uint, char *, FILE *);
void emitloc(long int, long int, FILE *);
diff --git a/amd64/emit.c b/amd64/emit.c
index 9b8bb5d..00fb7aa 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -525,6 +525,9 @@ emitins(Ins i, Fn *fn, FILE *f)
		emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
		emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
		break;
	case Oloc:
		emitloc(fn->filenum, fn->con[i.arg[0].val].bits.i, f);
		break;
	}
}

diff --git a/amd64/isel.c b/amd64/isel.c
index 3e3fe62..caf10d2 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -371,6 +371,9 @@ sel(Ins i, ANum *an, Fn *fn)
	case_Oload:
		seladdr(&i.arg[0], an, fn);
		goto Emit;
	case Oloc:
		emiti(i);
		break;
	case Ocall:
	case Osalloc:
	case Ocopy:
diff --git a/arm64/emit.c b/arm64/emit.c
index 5113c66..8bc6f75 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -446,6 +446,10 @@ emitins(Ins *i, E *e)
		if (!req(i->to, R))
			emitf("mov %=, sp", i, e);
		break;
	case Oloc:
		/* FIXME: this should be in gas.c and called into */
		emitloc(e->fn->filenum, e->fn->con[i->arg[0].val].bits.i, e->f);
		break;
	}
}

diff --git a/arm64/isel.c b/arm64/isel.c
index 062beb3..a2c53aa 100644
--- a/arm64/isel.c
+++ b/arm64/isel.c
@@ -224,6 +224,10 @@ sel(Ins i, Fn *fn)
		emiti(i);
		return;
	}
	if (i.op == Oloc) {
		emiti(i);
		return;
	}
	if (i.op != Onop) {
		emiti(i);
		iarg = curi->arg; /* fixarg() can change curi */
diff --git a/emit.c b/emit.c
index 017c461..fa4d66b 100644
--- a/emit.c
+++ b/emit.c
@@ -207,3 +207,15 @@ macho_emitfin(FILE *f)

	emitfin(f, sec);
}

void
emitfile(uint n, char *fname, FILE *f)
{
	fprintf(f, ".file %u %s\n", n, fname);
}

void
emitloc(long int a, long int b, FILE *f)
{
	fprintf(f, "\t.loc %ld %ld\n", a, b);
}
diff --git a/main.c b/main.c
index abfe03e..15de8dd 100644
--- a/main.c
+++ b/main.c
@@ -181,7 +181,7 @@ main(int ac, char *av[])
				exit(1);
			}
		}
		parse(inf, f, data, func);
		parse(inf, f, outf, data, func);
		fclose(inf);
	} while (++optind < ac);

diff --git a/ops.h b/ops.h
index fbcc2a8..6a7fb3b 100644
--- a/ops.h
+++ b/ops.h
@@ -121,6 +121,8 @@ O(vastart, T(m,e,e,e, x,e,e,e), 0) X(0, 0, 0) V(0)

O(copy,    T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1) V(0)

/* Debug info */
O(loc,     T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1) V(0)

/****************************************/
/* INTERNAL OPERATIONS (keep nop first) */
diff --git a/parse.c b/parse.c
index aed9427..b438c28 100644
--- a/parse.c
+++ b/parse.c
@@ -53,6 +53,7 @@ enum Token {
	Tdata,
	Tsection,
	Talign,
	Tfile,
	Tl,
	Tw,
	Tsh,
@@ -110,6 +111,7 @@ static char *kwmap[Ntok] = {
	[Tdata] = "data",
	[Tsection] = "section",
	[Talign] = "align",
	[Tfile] = "file",
	[Tsb] = "sb",
	[Tub] = "ub",
	[Tsh] = "sh",
@@ -130,7 +132,7 @@ enum {
	TMask = 16383, /* for temps hash */
	BMask = 8191, /* for blocks hash */

	K = 9583425, /* found using tools/lexh.c */
	K = 10525445, /* found using tools/lexh.c */
	M = 23,
};

@@ -592,6 +594,7 @@ parseline(PState ps)
		case Tblit:
		case Tcall:
		case Ovastart:
		case Oloc:
			/* operations without result */
			r = R;
			k = Kw;
@@ -856,7 +859,7 @@ typecheck(Fn *fn)
}

static Fn *
parsefn(Lnk *lnk)
parsefn(Lnk *lnk, uint filenum)
{
	Blk *b;
	int i;
@@ -866,6 +869,7 @@ parsefn(Lnk *lnk)
	nblk = 0;
	curi = insb;
	curf = alloc(sizeof *curf);
	curf->filenum = filenum;
	curf->ntmp = 0;
	curf->ncon = 2;
	curf->tmp = vnew(curf->ntmp, sizeof curf->tmp[0], PFn);
@@ -1160,10 +1164,11 @@ parselnk(Lnk *lnk)
}

void
parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
parse(FILE *f, char *path, FILE *outf, void data(Dat *), void func(Fn *))
{
	Lnk lnk;
	uint n;
	static uint curfile = 0;

	lexinit();
	inf = f;
@@ -1177,8 +1182,12 @@ parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
		switch (parselnk(&lnk)) {
		default:
			err("top-level definition expected");
		case Tfile:
			expect(Tstr);
			emitfile(++curfile, tokval.str, outf);
			break;
		case Tfunc:
			func(parsefn(&lnk));
			func(parsefn(&lnk, curfile));
			break;
		case Tdata:
			parsedat(data, &lnk);
diff --git a/rv64/emit.c b/rv64/emit.c
index f9df146..3e09410 100644
--- a/rv64/emit.c
+++ b/rv64/emit.c
@@ -405,6 +405,9 @@ emitins(Ins *i, Fn *fn, FILE *f)
		if (!req(i->to, R))
			emitf("mv %=, sp", i, fn, f);
		break;
	case Oloc:
		emitloc(fn->filenum, fn->con[i->arg[0].val].bits.i, f);
		break;
	}
}

diff --git a/rv64/isel.c b/rv64/isel.c
index 8921a07..d9bbefe 100644
--- a/rv64/isel.c
+++ b/rv64/isel.c
@@ -187,6 +187,10 @@ sel(Ins i, Fn *fn)
		selcmp(i, ck, cc, fn);
		return;
	}
	if (i.op == Oloc) {
		emiti(i);
		return;
	}
	if (i.op != Onop) {
		emiti(i);
		i0 = curi; /* fixarg() can change curi */
diff --git a/tools/lexh.c b/tools/lexh.c
index 5ceb4ee..8883976 100644
--- a/tools/lexh.c
+++ b/tools/lexh.c
@@ -23,11 +23,11 @@ char *tok[] = {
	"ceql", "cnel", "cles", "clts", "cgts", "cges",
	"cnes", "ceqs", "cos", "cuos", "cled", "cltd",
	"cgtd", "cged", "cned", "ceqd", "cod", "cuod",
	"vaarg", "vastart", "...", "env",
	"vaarg", "vastart", "...", "env", "loc",

	"call", "phi", "jmp", "jnz", "ret", "hlt", "export",
	"function", "type", "data", "section", "align", "blit",
	"l", "w", "sh", "uh", "h", "sb", "ub", "b",
	"function", "type", "data", "section", "align", "file",
	"blit", "l", "w", "sh", "uh", "h", "sb", "ub", "b",
	"d", "s", "z", "loadw", "loadl", "loads", "loadd",
	"alloc1", "alloc2",

-- 
2.38.2
Details
Message ID
<CPYXSWC58DJE.3SL3VHELMRJP7@attila>
In-Reply-To
<20230122135907.30113-1-t@laumann.xyz> (view parent)
DKIM signature
missing
Download raw message
Hi,

really glad to see someone working on this.

>  * Rebased bgs' work on current qbe master - @bgs: I changed the Author, let
>    me know if you'd like your name on the Author spot, I'll add myself to
>    Co-authored-by in that case.

I think that's fine as it is now.


>  * drop the file number argument from "file" and "loc" - the current file number
>    is kept in parse(), and added to Fn so the various emit() functions can get
>    to it.
>
> This version does not track file names it's already seen. This means a sequence
> like:
>
>     file "foo.saa"
>     ...
>     file "bar.saa"
>     ...
>     file "foo.saa"
>
> will give this output:
>
>     .file 1 "foo.saa"
>     ...
>     .file 2 "bar.saa"
>     ...
>     .file 3 "foo.saa"
>
> I figured that it'd be uncommon to want to switch back and forth between the
> same files, but let me know if this case needs to be supported.

I don't think that's going to work. Having multiple numbers for the same file
may even be fine by the standard, the problem is that the frontend needs to
know exaclty which number is assigned to what file, because the numbers are
used to keep track of other kinds of debug information that the frontends will
eventually generate. This tracking can also be done in other ways, but letting
the frontend assign them and making QBE just pass them through seems the
easiest, at least until other parts of debug information infrastructure in QBE
and interested frontends crystallize.
Details
Message ID
<Y85dIUOEVr4ORkNT@zillasnegl>
In-Reply-To
<CPYXSWC58DJE.3SL3VHELMRJP7@attila> (view parent)
DKIM signature
missing
Download raw message
> really glad to see someone working on this.

:-)

> >  * Rebased bgs' work on current qbe master - @bgs: I changed the Author, let
> >    me know if you'd like your name on the Author spot, I'll add myself to
> >    Co-authored-by in that case.
> 
> I think that's fine as it is now.

Ok.

> >  * drop the file number argument from "file" and "loc" - the current file number
> >    is kept in parse(), and added to Fn so the various emit() functions can get
> >    to it.
> >
> > This version does not track file names it's already seen. This means a sequence
> > like:
> >
> >     file "foo.saa"
> >     ...
> >     file "bar.saa"
> >     ...
> >     file "foo.saa"
> >
> > will give this output:
> >
> >     .file 1 "foo.saa"
> >     ...
> >     .file 2 "bar.saa"
> >     ...
> >     .file 3 "foo.saa"
> >
> > I figured that it'd be uncommon to want to switch back and forth between the
> > same files, but let me know if this case needs to be supported.
> 
> I don't think that's going to work. Having multiple numbers for the same file
> may even be fine by the standard, the problem is that the frontend needs to
> know exaclty which number is assigned to what file, because the numbers are
> used to keep track of other kinds of debug information that the frontends will
> eventually generate. This tracking can also be done in other ways, but letting
> the frontend assign them and making QBE just pass them through seems the
> easiest, at least until other parts of debug information infrastructure in QBE
> and interested frontends crystallize.

I think I agree with you. I started working on your patch for harec [0] and
realised that you'd need to thread a source file reference through all AST
subunits and every generated qbe definition. Plus the way that harec generates
qbe definitions I _think_ means that you need to switch back and forth between
files in the generated ssa (which this version doesn't really support).

@mpu: are you OK with frontends having control over this arbitrary number? Or
should we try to change the numbers into something more meaningful (like file
names maybe)?

-- Thomas

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/34961
Details
Message ID
<707600ed-fb2b-46ce-bad7-bcb30e87844f@app.fastmail.com>
In-Reply-To
<CPYXSWC58DJE.3SL3VHELMRJP7@attila> (view parent)
DKIM signature
missing
Download raw message
With qbe, I want to provide abstractions to frontend writers. DWARF
is the assembly of debug information, the ambition of qbe is that
its users do not have to care about assembly much.

On Sun, Jan 22, 2023, at 19:40, Bor Grošelj Simić wrote:
> I don't think that's going to work. Having multiple numbers for the same file
> may even be fine by the standard, the problem is that the frontend needs to
> know exaclty which number is assigned to what file, because the numbers are
> used to keep track of other kinds of debug information that the frontends will
> eventually generate.

I encouraged Thomas to go down this route of hiding file numbers.
It does not make sense to me why DWARF's arbitrary choices should
leak into the IL. If we want to provision for more comprehensive
support for debug information I would like to see all the pieces
of the puzzle rather than hacking in short-term solutions.

And if we are to land a short-term hack we might as well get the
nice version of it, until it evolves further.

> This tracking can also be done in other ways, but letting
> the frontend assign them and making QBE just pass them through seems the
> easiest, at least until other parts of debug information infrastructure in QBE
> and interested frontends crystallize.

The "passthrough" strategy for debug info is going to fail sooner
or later, qbe's purpose is to generate decent code and the
transformations it performs will have to be aware of the semantics
of debug info.
Roberto E. Vargas Caballero <k0ga@shike2.com>
Details
Message ID
<Y85w4Rw8SsjXwFNL@simple-cc.org>
In-Reply-To
<707600ed-fb2b-46ce-bad7-bcb30e87844f@app.fastmail.com> (view parent)
DKIM signature
missing
Download raw message
Hi,

On Mon, Jan 23, 2023 at 12:01:22PM +0100, Quentin Carbonneaux wrote:
> The "passthrough" strategy for debug info is going to fail sooner
> or later, qbe's purpose is to generate decent code and the
> transformations it performs will have to be aware of the semantics
> of debug info.

Indeed.
Reply to thread Export thread (mbox)