~sircmpwn/hare-dev

hare: Improve +libc argv/argc/envp initialization v2 APPLIED

Ember Sawady: 1
 Improve +libc argv/argc/envp initialization

 9 files changed, 47 insertions(+), 163 deletions(-)
#1002982 alpine.yml success
#1002983 freebsd.yml success
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/hare-dev/patches/41695/mbox | git am -3
Learn more about email & git

[PATCH hare v2] Improve +libc argv/argc/envp initialization Export this patch

Previously, we had libc run @init, which meant that we couldn't make use
of the argc and argv it passed into rt::start_ha and we needed to do
terrible hacks in rt::start_linux. While we're here, delete the
now-unnecessary os/+linux/environ+libc.ha, which was out of sync with
os/+linux/environ.ha anyways

Signed-off-by: Ember Sawady <ecs@d2evs.net>
---
v2: fix +test+libc
 linux/start+libc.ha                           |   2 -
 os/+linux/environ+libc.ha                     | 126 ------------------
 rt/+linux/platformstart+libc.ha               |  23 ----
 ...platformstart.ha => platformstart-libc.ha} |   0
 rt/hare+libc.sc                               |   6 +-
 rt/start+libc.ha                              |  23 +++-
 rt/start+test+libc.ha                         |  24 +++-
 scripts/gen-stdlib                            |   2 +-
 stdlib.mk                                     |   4 +-
 9 files changed, 47 insertions(+), 163 deletions(-)
 delete mode 100644 os/+linux/environ+libc.ha
 delete mode 100644 rt/+linux/platformstart+libc.ha
 rename rt/+linux/{platformstart.ha => platformstart-libc.ha} (100%)

diff --git a/linux/start+libc.ha b/linux/start+libc.ha
index d005fef8..a1db4783 100644
--- a/linux/start+libc.ha
+++ b/linux/start+libc.ha
@@ -4,8 +4,6 @@ use rt;
use format::elf;

@init fn init_linux() void = {
	rt::start_linux();

	let i = 0;
	for (rt::envp[i] != null) {
		i += 1;
diff --git a/os/+linux/environ+libc.ha b/os/+linux/environ+libc.ha
deleted file mode 100644
index e9182d17..00000000
--- a/os/+linux/environ+libc.ha
@@ -1,126 +0,0 @@
// License: MPL-2.0
// (c) 2022 Alexey Yerin <yyp@disroot.org>
use bytes;
use rt;
use strings;
use types::c;

// The command line arguments provided to the program. By convention, the first
// member is usually the name of the program.
export let args: []str = [];

// Statically allocate arg strings if there are few enough arguments, saves a
// syscall if we don't need it.
let args_static: [32]str = [""...];

@init fn init_environ() void = {
	rt::start_linux();
	if (rt::argc < len(args_static)) {
		args = args_static[..rt::argc];
		for (let i = 0z; i < rt::argc; i += 1) {
			args[i] = c::tostr(rt::argv[i]: *const c::char)!;
		};
	} else {
		args = alloc([], rt::argc);
		for (let i = 0z; i < rt::argc; i += 1) {
			append(args, c::tostr(rt::argv[i]: *const c::char)!);
		};
	};

};

@fini fn fini_environ() void = {
	if (rt::argc >= len(args_static)) {
		free(args);
	};
	free(envp);
};

// Looks up an environment variable and returns its value, or void if unset.
export fn getenv(name: const str) (str | void) = {
	const name_b = strings::toutf8(name);
	for (let i = 0z; rt::envp[i] != null; i += 1) {
		const item = rt::envp[i]: *[*]u8;
		const ln = c::strlen(item: *c::char);
		const eq: size = match (bytes::index(item[..ln], '=')) {
		case void =>
			abort("Environment violates System-V invariants");
		case let i: size =>
			yield i;
		};
		if (bytes::equal(name_b, item[..eq])) {
			const ln = c::strlen(item: *const c::char);
			return strings::fromutf8(item[eq+1..ln])!;
		};
	};
};

// Looks up an environment variable and returns its value, or a default value if
// unset.
export fn tryenv(name: const str, default: str) str = match (getenv(name)) {
case let s: str =>
	yield s;
case void =>
	yield default;
};

let envp: []str = [];

// Returns a slice of the environment strings in the form KEY=VALUE.
export fn getenvs() []str = {
	if (len(envp) != 0) {
		return envp;
	};
	for (let i = 0z; rt::envp[i] != null; i += 1) {
		append(envp, c::tostr(rt::envp[i]: *const c::char)!);
	};
	return envp;
};

let uts: rt::utsname = rt::utsname { ... };
let uts_valid: bool = false;

// Returns the host kernel name
export fn sysname() const str = {
	if (!uts_valid) {
		rt::uname(&uts) as void;
		uts_valid = true;
	};
	return c::tostr(&uts.sysname: *const c::char)!;
};

// Returns the host system hostname
export fn hostname() const str = {
	if (!uts_valid) {
		rt::uname(&uts) as void;
		uts_valid = true;
	};
	return c::tostr(&uts.nodename: *const c::char)!;
};

// Returns the host kernel version
export fn release() const str = {
	if (!uts_valid) {
		rt::uname(&uts) as void;
		uts_valid = true;
	};
	return c::tostr(&uts.release: *const c::char)!;
};

// Returns the host operating system version
export fn version() const str = {
	if (!uts_valid) {
		rt::uname(&uts) as void;
		uts_valid = true;
	};
	return c::tostr(&uts.version: *const c::char)!;
};

// Returns the host CPU architecture
export fn machine() const str = {
	if (!uts_valid) {
		rt::uname(&uts) as void;
		uts_valid = true;
	};
	return c::tostr(&uts.machine: *const c::char)!;
};
diff --git a/rt/+linux/platformstart+libc.ha b/rt/+linux/platformstart+libc.ha
deleted file mode 100644
index 240a9651..00000000
--- a/rt/+linux/platformstart+libc.ha
@@ -1,23 +0,0 @@
// License: MPL-2.0
// (c) 2021-2022 Alexey Yerin <yyp@disroot.org>

export fn start_linux() void = {
	// Here we use a cool strategy of re-constructing argv and argc without
	// knowing their original values. Since environ is placed just after
	// them, it's possible to traverse backwards calculating how many
	// entries were processed and comparing that value to the one at
	// current position.
	let argv_ptr = c_environ: uintptr - size(*u8): uintptr * 2;
	let i = 0z;
	for (*(argv_ptr: **u8): uintptr: size != i; i += 1) {
		argv_ptr -= size(*u8): uintptr;
	};

	argc = i;
	argv = (argv_ptr + size(*u8): uintptr): *[*]*u8;
	envp = c_environ;
};

@init fn start_linux() void = start_linux();

let @symbol("environ") c_environ: *[*]nullable *u8;
diff --git a/rt/+linux/platformstart.ha b/rt/+linux/platformstart-libc.ha
similarity index 100%
rename from rt/+linux/platformstart.ha
rename to rt/+linux/platformstart-libc.ha
diff --git a/rt/hare+libc.sc b/rt/hare+libc.sc
index f56834be..74875d75 100644
--- a/rt/hare+libc.sc
+++ b/rt/hare+libc.sc
@@ -14,10 +14,10 @@ SECTIONS {
		*(.rela.plt)
	}

	.init_array : {
		PROVIDE_HIDDEN (__init_array_start = .);
	.libc_init_array : {
		PROVIDE_HIDDEN (__libc_init_array_start = .);
		KEEP (*(.init_array))
		PROVIDE_HIDDEN (__init_array_end = .);
		PROVIDE_HIDDEN (__libc_init_array_end = .);
	}

	.fini_array : {
diff --git a/rt/start+libc.ha b/rt/start+libc.ha
index d8331c78..60205a17 100644
--- a/rt/start+libc.ha
+++ b/rt/start+libc.ha
@@ -4,10 +4,19 @@
@symbol(".main") fn main() void;
@symbol("exit") fn c_exit(status: int) void;

const @symbol("__libc_init_array_start") init_start: [*]*fn() void;
const @symbol("__libc_init_array_end") init_end: [*]*fn() void;
const @symbol("__fini_array_start") fini_start: [*]*fn() void;
const @symbol("__fini_array_end") fini_end: [*]*fn() void;

export fn init() void = void;
// Run all global initialization functions.
export fn init() void = {
	const ninit = (&init_end: uintptr - &init_start: uintptr): size
		/ size(*fn() void);
	for (let i = 0z; i < ninit; i += 1) {
		init_start[i]();
	};
};

// Run all global finalization functions.
export fn fini() void = {
@@ -18,8 +27,16 @@ export fn fini() void = {
	};
};

export @symbol("main") @noreturn fn start_ha() void = {
	// libc runs @init and @fini for us (unless rt::exit() is called)
export @symbol("main") @noreturn fn start_ha(c_argc: int, c_argv: *[*]*u8) void = {
	argc = c_argc: size;
	argv = c_argv;
	envp = c_envp;
	// we deliberately prevent libc from running @init for us, in order to
	// be able to initialize argc/argv/envp beforehand. we can still get
	// away with just using libc for @fini though
	init();
	main();
	c_exit(0);
};

let @symbol("environ") c_envp: *[*]nullable *u8;
diff --git a/rt/start+test+libc.ha b/rt/start+test+libc.ha
index 4e1fb555..f63c1ffa 100644
--- a/rt/start+test+libc.ha
+++ b/rt/start+test+libc.ha
@@ -3,11 +3,20 @@

@symbol("__test_main") fn test_main() size;

export fn init() void = void;

const @symbol("__libc_init_array_start") init_start: [*]*fn() void;
const @symbol("__libc_init_array_end") init_end: [*]*fn() void;
const @symbol("__fini_array_start") fini_start: [*]*fn() void;
const @symbol("__fini_array_end") fini_end: [*]*fn() void;

// Run all global initialization functions.
export fn init() void = {
	const ninit = (&init_end: uintptr - &init_start: uintptr): size
		/ size(*fn() void);
	for (let i = 0z; i < ninit; i += 1) {
		init_start[i]();
	};
};

// Run all global finalization functions.
export fn fini() void = {
	const nfini = (&fini_end: uintptr - &fini_start: uintptr): size
@@ -17,7 +26,16 @@ export fn fini() void = {
	};
};

export @symbol("main") fn main() int = {
export @symbol("main") fn start_ha(c_argc: int, c_argv: *[*]*u8) int = {
	argc = c_argc: size;
	argv = c_argv;
	envp = c_envp;
	// we deliberately prevent libc from running @init for us, in order to
	// be able to initialize argc/argv/envp beforehand. we can still get
	// away with just using libc for @fini though
	init();
	const nfail = test_main();
	return if (nfail > 0) 1 else 0;
};

let @symbol("environ") c_envp: *[*]nullable *u8;
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
index d5224474..65d626b8 100755
--- a/scripts/gen-stdlib
+++ b/scripts/gen-stdlib
@@ -24,7 +24,7 @@ gensrcs_rt() {
		+linux/errno.ha \
		+linux/types.ha \
		+linux/segmalloc.ha \
		+linux/platformstart.ha \
		+linux/platformstart-libc.ha \
		+linux/prctl.ha \
		+linux/'+$(ARCH)'.ha \
		+linux/syscallno+'$(ARCH)'.ha \
diff --git a/stdlib.mk b/stdlib.mk
index 163a5a9e..f4854924 100644
--- a/stdlib.mk
+++ b/stdlib.mk
@@ -8,7 +8,7 @@ stdlib_rt_linux_srcs = \
	$(STDLIB)/rt/+linux/errno.ha \
	$(STDLIB)/rt/+linux/types.ha \
	$(STDLIB)/rt/+linux/segmalloc.ha \
	$(STDLIB)/rt/+linux/platformstart.ha \
	$(STDLIB)/rt/+linux/platformstart-libc.ha \
	$(STDLIB)/rt/+linux/prctl.ha \
	$(STDLIB)/rt/+linux/+$(ARCH).ha \
	$(STDLIB)/rt/+linux/syscallno+$(ARCH).ha \
@@ -2401,7 +2401,7 @@ testlib_rt_linux_srcs = \
	$(STDLIB)/rt/+linux/errno.ha \
	$(STDLIB)/rt/+linux/types.ha \
	$(STDLIB)/rt/+linux/segmalloc.ha \
	$(STDLIB)/rt/+linux/platformstart.ha \
	$(STDLIB)/rt/+linux/platformstart-libc.ha \
	$(STDLIB)/rt/+linux/prctl.ha \
	$(STDLIB)/rt/+linux/+$(ARCH).ha \
	$(STDLIB)/rt/+linux/syscallno+$(ARCH).ha \
-- 
2.41.0
hare/patches: SUCCESS in 1m42s

[Improve +libc argv/argc/envp initialization][0] v2 from [Ember Sawady][1]

[0]: https://lists.sr.ht/~sircmpwn/hare-dev/patches/41695
[1]: mailto:ecs@d2evs.net

✓ #1002983 SUCCESS hare/patches/freebsd.yml https://builds.sr.ht/~sircmpwn/job/1002983
✓ #1002982 SUCCESS hare/patches/alpine.yml  https://builds.sr.ht/~sircmpwn/job/1002982
Thanks!

To gitsrht:~sircmpwn/hare
   d8bd0b28..da9256b2  master -> master