Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/fromager/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@
SUPPORTS_NETWORK_ISOLATION = True
NETWORK_ISOLATION_ERROR = None

try:
external_commands.detect_build_isolation()
except Exception as e:
SUPPORTS_BUILD_ISOLATION: bool = False
BUILD_ISOLATION_ERROR: str | None = str(e)
else:
SUPPORTS_BUILD_ISOLATION = True
BUILD_ISOLATION_ERROR = None
Comment on lines +33 to +40
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Probe runs on every CLI invocation — creates and deletes a real system user for fromager --help.

detect_build_isolation() executes at module import, which means every invocation of the CLI (including --help, --version, shell-completion calls, and subcommands that never use isolation) forks useradd/userdel under root. Combined with the EXIT-trap-vs-exec leak flagged in run_build_isolation.sh, this will steadily pollute /etc/passwd on any host that runs Fromager frequently.

Consider detecting lazily only when --build-isolation is requested, or make the probe a no-op (e.g., just check that the script exists and unshare --user --map-root-user -- true works) instead of actually creating a user:

🛠 Lazy-detect sketch
-try:
-    external_commands.detect_build_isolation()
-except Exception as e:
-    SUPPORTS_BUILD_ISOLATION: bool = False
-    BUILD_ISOLATION_ERROR: str | None = str(e)
-else:
-    SUPPORTS_BUILD_ISOLATION = True
-    BUILD_ISOLATION_ERROR = None
+SUPPORTS_BUILD_ISOLATION: bool | None = None
+BUILD_ISOLATION_ERROR: str | None = None
+
+def _ensure_build_isolation_supported() -> None:
+    global SUPPORTS_BUILD_ISOLATION, BUILD_ISOLATION_ERROR
+    if SUPPORTS_BUILD_ISOLATION is not None:
+        return
+    try:
+        external_commands.detect_build_isolation()
+    except Exception as e:
+        SUPPORTS_BUILD_ISOLATION = False
+        BUILD_ISOLATION_ERROR = str(e)
+    else:
+        SUPPORTS_BUILD_ISOLATION = True

…then call _ensure_build_isolation_supported() inside the if build_isolation: branch at line 252.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/fromager/__main__.py` around lines 33 - 40, The current module-level call
to external_commands.detect_build_isolation() runs on import and creates/deletes
a system user for every CLI invocation; move this probe to be lazy by removing
the import-time call and instead implement a helper (e.g.,
_ensure_build_isolation_supported()) that calls
external_commands.detect_build_isolation() only when build isolation is actually
requested (the branch that checks a --build-isolation flag), and preserve the
exported flags SUPPORTS_BUILD_ISOLATION and BUILD_ISOLATION_ERROR by setting
them inside that helper (or returning results) so they are initialized only
on-demand; alternatively make the probe a safe no-op that simply verifies the
presence of the isolation script and that unshare --user --map-root-user -- true
succeeds (without creating users) before enabling isolation.



@click.group()
@click.version_option(
Expand Down Expand Up @@ -143,6 +152,14 @@
help="Build sdist and when with network isolation (unshare -cn)",
show_default=True,
)
@click.option(
"--build-isolation/--no-build-isolation",
default=False,
help="Sandbox build steps with mount, PID, IPC, and network namespace isolation. "
"Hides credentials, makes system directories read-only, and isolates /tmp. "
"Supersedes --network-isolation for build steps.",
show_default=True,
)
@click.pass_context
def main(
ctx: click.Context,
Expand All @@ -163,6 +180,7 @@ def main(
variant: str,
jobs: int | None,
network_isolation: bool,
build_isolation: bool,
) -> None:
# Save the debug flag so invoke_main() can use it.
global _DEBUG
Expand Down Expand Up @@ -220,6 +238,7 @@ def main(
logger.info(f"maximum concurrent jobs: {jobs}")
logger.info(f"constraints file: {constraints_file}")
logger.info(f"network isolation: {network_isolation}")
logger.info(f"build isolation: {build_isolation}")
if build_wheel_server_url:
logger.info(f"external build wheel server: {build_wheel_server_url}")
else:
Expand All @@ -230,6 +249,9 @@ def main(
if network_isolation and not SUPPORTS_NETWORK_ISOLATION:
ctx.fail(f"network isolation is not available: {NETWORK_ISOLATION_ERROR}")

if build_isolation and not SUPPORTS_BUILD_ISOLATION:
ctx.fail(f"build isolation is not available: {BUILD_ISOLATION_ERROR}")

wkctx = context.WorkContext(
active_settings=packagesettings.Settings.from_files(
settings_file=settings_file,
Expand All @@ -247,6 +269,7 @@ def main(
cleanup=cleanup,
variant=variant,
network_isolation=network_isolation,
build_isolation=build_isolation,
max_jobs=jobs,
settings_dir=settings_dir,
)
Expand Down
6 changes: 6 additions & 0 deletions src/fromager/build_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,19 +140,23 @@ def run(
cwd: str | None = None,
extra_environ: dict[str, str] | None = None,
network_isolation: bool | None = None,
build_isolation: bool | None = None,
log_filename: str | None = None,
stdin: TextIOWrapper | None = None,
) -> str:
"""Run command in a virtual environment

`network_isolation` defaults to context setting.
`build_isolation` defaults to context setting.
"""
extra_environ = extra_environ.copy() if extra_environ else {}
extra_environ.update(self.get_venv_environ(template_env=extra_environ))

# default from context
if network_isolation is None:
network_isolation = self._ctx.network_isolation
if build_isolation is None:
build_isolation = self._ctx.build_isolation
if network_isolation:
# Build Rust dependencies without network access
extra_environ.setdefault("CARGO_NET_OFFLINE", "true")
Expand All @@ -162,6 +166,7 @@ def run(
cwd=cwd,
extra_environ=extra_environ,
network_isolation=network_isolation,
build_isolation=build_isolation,
log_filename=log_filename,
stdin=stdin,
)
Expand Down Expand Up @@ -210,6 +215,7 @@ def install(self, reqs: typing.Iterable[Requirement]) -> None:
cmd,
cwd=str(self.path.parent),
network_isolation=False,
build_isolation=False,
)
logger.info(
"installed dependencies %s into build environment in %s",
Expand Down
2 changes: 2 additions & 0 deletions src/fromager/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(
cleanup: bool = True,
variant: str = "cpu",
network_isolation: bool = False,
build_isolation: bool = False,
max_jobs: int | None = None,
settings_dir: pathlib.Path | None = None,
wheel_server_url: str = "",
Expand Down Expand Up @@ -81,6 +82,7 @@ def __init__(
self.cleanup_buildenv = cleanup
self.variant = variant
self.network_isolation = network_isolation
self.build_isolation = build_isolation
self.settings_dir = settings_dir

self._constraints_filename = self.work_dir / "constraints.txt"
Expand Down
1 change: 1 addition & 0 deletions src/fromager/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ def _run_hook_with_extra_environ(
cwd=cwd,
extra_environ=extra_environ,
network_isolation=ctx.network_isolation,
build_isolation=ctx.build_isolation,
log_filename=log_filename,
)

Expand Down
67 changes: 66 additions & 1 deletion src/fromager/external_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,28 @@
HERE = pathlib.Path(__file__).absolute().parent

NETWORK_ISOLATION: list[str] | None
BUILD_ISOLATION: list[str] | None
if sys.platform == "linux":
# runner script with `unshare -rn` + `ip link set lo up`
NETWORK_ISOLATION = [str(HERE / "run_network_isolation.sh")]
# runner script with full build sandboxing (mount, PID, IPC, net, env scrubbing)
BUILD_ISOLATION = [str(HERE / "run_build_isolation.sh")]
else:
NETWORK_ISOLATION = None
BUILD_ISOLATION = None


def _get_scrub_env_vars() -> frozenset[str]:
"""Return the set of environment variable names to remove during build isolation.

Reads from the ``FROMAGER_SCRUB_ENV_VARS`` environment variable, which
should be a comma-separated list of variable names. Returns an empty set
if the variable is not set.
"""
raw = os.environ.get("FROMAGER_SCRUB_ENV_VARS", "")
if not raw:
return frozenset()
return frozenset(v.strip() for v in raw.split(",") if v.strip())


def network_isolation_cmd() -> typing.Sequence[str]:
Expand All @@ -32,6 +49,17 @@ def network_isolation_cmd() -> typing.Sequence[str]:
raise ValueError(f"unsupported platform {sys.platform}")


def build_isolation_cmd() -> typing.Sequence[str]:
"""Return command list for full build isolation.

Raises ValueError when build isolation is not supported.
Returns: command list to run a process with build isolation
"""
if BUILD_ISOLATION:
return BUILD_ISOLATION
raise ValueError(f"unsupported platform {sys.platform}")


def detect_network_isolation() -> None:
"""Detect if network isolation is available and working

Expand All @@ -44,6 +72,17 @@ def detect_network_isolation() -> None:
subprocess.check_output(check, stderr=subprocess.STDOUT)


def detect_build_isolation() -> None:
"""Detect if build isolation is available and working.

Build isolation requires mount, PID, IPC, and network namespace support.
"""
cmd = build_isolation_cmd()
if os.name == "posix":
check = [*cmd, "true"]
subprocess.check_output(check, stderr=subprocess.STDOUT)


class NetworkIsolationError(subprocess.CalledProcessError):
pass

Expand All @@ -55,6 +94,7 @@ def run(
cwd: str | None = None,
extra_environ: dict[str, typing.Any] | None = None,
network_isolation: bool = False,
build_isolation: bool = False,
log_filename: str | None = None,
stdin: TextIOWrapper | None = None,
) -> str:
Expand All @@ -64,13 +104,38 @@ def run(
line with the current package name for easier searching. Raises
``NetworkIsolationError`` instead of ``CalledProcessError`` when the
failure output indicates a network access problem.

When build_isolation is True, the command runs as an ephemeral Unix user
with network, PID, IPC, and UTS namespace isolation. The ephemeral user
cannot read credential files (e.g. .netrc owned by root with mode 600).
This supersedes network_isolation.
"""
if extra_environ is None:
extra_environ = {}
env = os.environ.copy()
env.update(extra_environ)

if network_isolation:
if build_isolation:
# Ephemeral user + PID + IPC + network + UTS namespace isolation.
# The ephemeral user provides file-level credential protection
# (.netrc is root:root 600, unreadable by the build user).
# This supersedes network_isolation.
cmd = [
*build_isolation_cmd(),
*cmd,
]
# Tell the isolation script which directory needs to be writable
# by the ephemeral build user.
if cwd:
env["FROMAGER_BUILD_DIR"] = cwd
# Remove variables listed in FROMAGER_SCRUB_ENV_VARS from the
# environment so they are not visible to build backends.
scrub_vars = _get_scrub_env_vars()
for var in scrub_vars:
env.pop(var, None)
env.setdefault("CARGO_NET_OFFLINE", "true")
network_isolation = True # for error detection below
elif network_isolation:
# prevent network access by creating a new network namespace that
# has no routing configured.
cmd = [
Expand Down
64 changes: 64 additions & 0 deletions src/fromager/run_build_isolation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash
#
# Run command with build isolation for untrusted build backends.
#
# Uses an ephemeral Unix user for file-level isolation:
# - Cannot read credential files like .netrc (owned by root, mode 600)
# - Gets its own /tmp entries (sticky bit prevents cross-user access)
#
# Combined with Linux namespaces for:
# - Network isolation (no routing in new net namespace)
# - PID isolation (build cannot see other processes)
# - IPC isolation (isolated shared memory, semaphores, message queues)
# - UTS isolation (separate hostname)
#
# The ephemeral user is created before entering the namespace, then
# unshare runs as that user with --map-root-user so it has enough
# privilege to bring up loopback and set hostname inside the namespace.
#
# This works in unprivileged containers (Podman/Docker) without --privileged
# or --cap-add SYS_ADMIN.
#
# Ubuntu 24.04: needs `sysctl kernel.apparmor_restrict_unprivileged_userns=0`
#

set -e
set -o pipefail

if [ "$#" -eq 0 ]; then
echo "Usage: $0 command [args...]" >&2
exit 2
fi

# --- Ephemeral user creation (before namespace entry) ---

BUILD_USER="fmr_$(head -c4 /dev/urandom | od -An -tu4 | tr -d ' ')"
useradd -r -M -d /nonexistent -s /sbin/nologin "$BUILD_USER"
trap 'userdel "$BUILD_USER" 2>/dev/null || true' EXIT

# Make build dir writable by ephemeral user if set
if [ -n "${FROMAGER_BUILD_DIR:-}" ] && [ -d "$FROMAGER_BUILD_DIR" ]; then
chmod -R o+rwX "$FROMAGER_BUILD_DIR" 2>/dev/null || true
fi
Comment on lines +40 to +42
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

chmod -R o+rwX makes the build dir world-accessible, not ephemeral-user-accessible.

o+rwX sets the "other" bits, so every user on the host/container — not just $BUILD_USER — gains read/write. With parallel builds each running under a different fmr_* user, one build's ephemeral user can read and tamper with another's build tree, defeating the isolation this PR advertises. Prefer chown/chgrp to the ephemeral user (or its primary group) instead:

🛠 Suggested fix
-if [ -n "${FROMAGER_BUILD_DIR:-}" ] && [ -d "$FROMAGER_BUILD_DIR" ]; then
-   chmod -R o+rwX "$FROMAGER_BUILD_DIR" 2>/dev/null || true
-fi
+if [ -n "${FROMAGER_BUILD_DIR:-}" ] && [ -d "$FROMAGER_BUILD_DIR" ]; then
+   chown -R "$BUILD_USER":"$BUILD_USER" "$FROMAGER_BUILD_DIR" 2>/dev/null || true
+fi
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if [ -n "${FROMAGER_BUILD_DIR:-}" ] && [ -d "$FROMAGER_BUILD_DIR" ]; then
chmod -R o+rwX "$FROMAGER_BUILD_DIR" 2>/dev/null || true
fi
if [ -n "${FROMAGER_BUILD_DIR:-}" ] && [ -d "$FROMAGER_BUILD_DIR" ]; then
chown -R "$BUILD_USER":"$BUILD_USER" "$FROMAGER_BUILD_DIR" 2>/dev/null || true
fi
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/fromager/run_build_isolation.sh` around lines 40 - 42, The current chmod
command makes the build dir world-accessible; instead change ownership to the
ephemeral build user/group so only that user (or its primary group) can access
it: locate the block that checks FROMAGER_BUILD_DIR and replace the chmod -R
o+rwX "$FROMAGER_BUILD_DIR" call with a chown/chgrp approach that sets ownership
to "$BUILD_USER" (and/or its primary group) and applies permissions for
owner/group (not "other"), e.g. chown -R "$BUILD_USER": "$FROMAGER_BUILD_DIR"
and adjust mode to u+rwX,g+rwX (with safe stderr redirection and || true to
preserve existing error handling). Ensure references to FROMAGER_BUILD_DIR and
BUILD_USER (and any fmr_* ephemeral user naming convention) are used so the
correct directory/user is targeted.


# --- Enter namespaces as ephemeral user ---
# setpriv drops to the ephemeral user, then unshare creates namespaces.
# --map-root-user maps the ephemeral user to UID 0 inside the namespace
# so it can run ip/hostname.

BUILD_UID=$(id -u "$BUILD_USER")
BUILD_GID=$(id -g "$BUILD_USER")

exec setpriv --reuid="$BUILD_UID" --regid="$BUILD_GID" --clear-groups -- \
unshare --uts --net --pid --ipc --fork --map-root-user -- \
Comment on lines +52 to +53
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

unshare --mount --map-root-user unprivileged user namespace requirements

💡 Result:

To run unshare --mount --map-root-user as an unprivileged (non-root) user, the following requirements must be met: Kernel configuration: - CONFIG_USER_NS=y (user namespaces enabled at compile time). - CONFIG_NAMESPACES=y (general namespace support). Runtime kernel settings (check with cat /proc/sys/kernel/unprivileged_userns_clone; some distros like older Debian disable it): - kernel.unprivileged_userns_clone=1 (allows unprivileged creation of user namespaces). Set with sudo sysctl kernel.unprivileged_userns_clone=1. - Sufficient user.max_user_namespaces limit (default usually allows it; check /proc/sys/user/max_user_namespaces). User setup: - Entries in /etc/subuid and /etc/subgid for the user (e.g., username:100000:65536), as --map-root-user uses subordinate IDs to map the caller's UID/GID to root (0:0) inside the namespace. The newuidmap/newgidmap setuid helpers (provided by util-linux or shadow) use these. util-linux version: - Recent enough for --map-root-user (available since ~2.26; works in modern versions). Process state: - Not in a chroot (EPERM if caller's root dir differs from mount namespace root). - Caller must have write access to /proc/self/{uid_map,gid_map,setgroups} post-unshare (handled by unshare). How it works: 1. unshare creates a user namespace (CLONE_NEWUSER, unprivileged since Linux 3.8). 2. --map-root-user writes to uid_map/gid_map (e.g., 0 1) and setgroups=deny, gaining CAP_SYS_ADMIN in the new user namespace. 3. This CAP_SYS_ADMIN (in user ns owning the mount ns) allows unsharing/using mount namespace (CLONE_NEWNS) for mounts like bind mounts, proc, tmpfs (but not block devices). Examples from man pages work unprivileged: unshare --user --map-root-user sh -c whoami # outputs root unshare --map-root-user --mount mount --bind foo bar If it fails with "Operation not permitted": - Check/enable kernel.unprivileged_userns_clone. - Verify subuid/subgid (use id -u and check files). - Ensure kernel >=3.8, CONFIG_USER_NS=y (grep CONFIG_USER_NS /boot/config-$(uname -r)). - No seccomp/container restrictions (e.g., Docker blocks by default). As of 2026, most distros (Ubuntu 22+, Fedora, etc.) enable by default; older/secure setups may need tweaks.

Citations:


🏁 Script executed:

cat -n src/fromager/run_build_isolation.sh | head -60

Repository: python-wheel-build/fromager

Length of output: 2580


🏁 Script executed:

# Search for Issue `#1019` references in the codebase
rg "1019" --type-list | head -20
rg "mount" src/fromager/run_build_isolation.sh

Repository: python-wheel-build/fromager

Length of output: 460


🏁 Script executed:

# Check the full context around lines 52-53 and look for any mount setup
sed -n '40,70p' src/fromager/run_build_isolation.sh

Repository: python-wheel-build/fromager

Length of output: 895


🏁 Script executed:

# Search for /tmp isolation or mount setup elsewhere
rg "/tmp" src/fromager/ -A 2 -B 2

Repository: python-wheel-build/fromager

Length of output: 880


🏁 Script executed:

# Search for references to Issue 1019 or mount-related isolation setup
rg "1019\|mount" src/fromager/ -i

Repository: python-wheel-build/fromager

Length of output: 53


🏁 Script executed:

# Check if there's any mount setup or planned isolation features in the codebase
rg "FROMAGER.*MOUNT\|ISOLATION.*MOUNT" -i

Repository: python-wheel-build/fromager

Length of output: 53


🏁 Script executed:

# Look for any TODOs or FIXMEs related to mount isolation
rg "TODO\|FIXME" src/fromager/run_build_isolation.sh

Repository: python-wheel-build/fromager

Length of output: 53


🏁 Script executed:

# Check the __main__.py file more thoroughly for sandbox-isolation context
rg -A 10 -B 5 "mount.*PID.*IPC" src/fromager/__main__.py

Repository: python-wheel-build/fromager

Length of output: 541


Add --mount to unshare invocation to fulfill promised isolation.

The help text (--build-isolation) explicitly promises "mount, PID, IPC, and network namespace isolation" and "makes system directories read-only, and isolates /tmp," but the unshare command at lines 52–53 omits --mount. Without it:

  • World-readable files in $HOME (.config/*, SSH known_hosts, etc.) remain accessible
  • /tmp is only isolated via sticky bit + ephemeral user, not per-build mount namespace
  • Cannot enforce read-only /usr or tmpfs overlays as promised

Add --mount after --ipc:

unshare --uts --net --pid --ipc --mount --fork --map-root-user -- \

The --map-root-user flag already grants sufficient CAP_SYS_ADMIN privilege for mount operations.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/fromager/run_build_isolation.sh` around lines 52 - 53, The unshare
invocation in run_build_isolation.sh currently omits the mount namespace flag;
update the exec line that calls setpriv/unshare (the command including "setpriv
--reuid=... --regid=... --clear-groups -- \ unshare --uts --net --pid --ipc
--fork --map-root-user -- \") to include --mount (i.e., add --mount after --ipc)
so the script actually creates a mount namespace and enables making system dirs
read-only and isolating /tmp as promised.

/bin/bash -c '
# bring loopback up
if command -v ip 2>&1 >/dev/null; then
ip link set lo up
fi
# set hostname
if command -v hostname 2>&1 >/dev/null; then
hostname localhost
fi
Comment on lines +56 to +62
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Redirection order is reversed.

2>&1 >/dev/null first points stderr at the current stdout (the terminal), then redirects stdout to /dev/null, so stderr still leaks to the caller. For command -v this is usually harmless because it only writes to stdout, but the idiom is a footgun — flip the order:

🛠 Suggested fix
-      if command -v ip 2>&1 >/dev/null; then
+      if command -v ip >/dev/null 2>&1; then
          ip link set lo up
       fi
       # set hostname
-      if command -v hostname 2>&1 >/dev/null; then
+      if command -v hostname >/dev/null 2>&1; then
          hostname localhost
       fi
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if command -v ip 2>&1 >/dev/null; then
ip link set lo up
fi
# set hostname
if command -v hostname 2>&1 >/dev/null; then
hostname localhost
fi
if command -v ip >/dev/null 2>&1; then
ip link set lo up
fi
# set hostname
if command -v hostname >/dev/null 2>&1; then
hostname localhost
fi
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/fromager/run_build_isolation.sh` around lines 56 - 62, The command
existence checks using "command -v ip" and "command -v hostname" use the wrong
redirection order ("2>&1 >/dev/null") which leaves stderr attached; change both
checks to redirect stdout then stderr (e.g., ">/dev/null 2>&1") so both streams
are discarded when checking presence of ip and hostname in the
run_build_isolation.sh script.

exec "$@"
' -- "$@"
Comment on lines +37 to +64
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Ephemeral user leaks: EXIT trap never fires after exec.

exec at line 52 replaces the current shell, so Bash never reaches its exit path and the trap ... EXIT set on line 37 is discarded. Every successful build (the common case) leaves a fmr_* entry behind in /etc/passwd//etc/shadow, and they accumulate indefinitely. Only the failure-before-exec path currently cleans up.

Run the child without exec and let the trap do the cleanup, or clean up explicitly:

🛠 Suggested fix
-exec setpriv --reuid="$BUILD_UID" --regid="$BUILD_GID" --clear-groups -- \
-   unshare --uts --net --pid --ipc --fork --map-root-user -- \
-   /bin/bash -c '
+setpriv --reuid="$BUILD_UID" --regid="$BUILD_GID" --clear-groups -- \
+   unshare --uts --net --pid --ipc --fork --map-root-user -- \
+   /bin/bash -c '
       # bring loopback up
       if command -v ip 2>&1 >/dev/null; then
          ip link set lo up
       fi
       # set hostname
       if command -v hostname 2>&1 >/dev/null; then
          hostname localhost
       fi
       exec "$@"
    ' -- "$@"
+status=$?
+exit "$status"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
trap 'userdel "$BUILD_USER" 2>/dev/null || true' EXIT
# Make build dir writable by ephemeral user if set
if [ -n "${FROMAGER_BUILD_DIR:-}" ] && [ -d "$FROMAGER_BUILD_DIR" ]; then
chmod -R o+rwX "$FROMAGER_BUILD_DIR" 2>/dev/null || true
fi
# --- Enter namespaces as ephemeral user ---
# setpriv drops to the ephemeral user, then unshare creates namespaces.
# --map-root-user maps the ephemeral user to UID 0 inside the namespace
# so it can run ip/hostname.
BUILD_UID=$(id -u "$BUILD_USER")
BUILD_GID=$(id -g "$BUILD_USER")
exec setpriv --reuid="$BUILD_UID" --regid="$BUILD_GID" --clear-groups -- \
unshare --uts --net --pid --ipc --fork --map-root-user -- \
/bin/bash -c '
# bring loopback up
if command -v ip 2>&1 >/dev/null; then
ip link set lo up
fi
# set hostname
if command -v hostname 2>&1 >/dev/null; then
hostname localhost
fi
exec "$@"
' -- "$@"
trap 'userdel "$BUILD_USER" 2>/dev/null || true' EXIT
# Make build dir writable by ephemeral user if set
if [ -n "${FROMAGER_BUILD_DIR:-}" ] && [ -d "$FROMAGER_BUILD_DIR" ]; then
chmod -R o+rwX "$FROMAGER_BUILD_DIR" 2>/dev/null || true
fi
# --- Enter namespaces as ephemeral user ---
# setpriv drops to the ephemeral user, then unshare creates namespaces.
# --map-root-user maps the ephemeral user to UID 0 inside the namespace
# so it can run ip/hostname.
BUILD_UID=$(id -u "$BUILD_USER")
BUILD_GID=$(id -g "$BUILD_USER")
setpriv --reuid="$BUILD_UID" --regid="$BUILD_GID" --clear-groups -- \
unshare --uts --net --pid --ipc --fork --map-root-user -- \
/bin/bash -c '
# bring loopback up
if command -v ip 2>&1 >/dev/null; then
ip link set lo up
fi
# set hostname
if command -v hostname 2>&1 >/dev/null; then
hostname localhost
fi
exec "$@"
' -- "$@"
status=$?
exit "$status"
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/fromager/run_build_isolation.sh` around lines 37 - 64, The EXIT trap that
runs userdel for BUILD_USER never executes because the script uses exec to
replace the shell with setpriv; change the invocation so the script does not
exec (i.e., invoke setpriv/unshare in a child process and wait for it) or
explicitly perform cleanup after the child returns: remove the exec before
setpriv --reuid/--regid/... -- unshare ... and instead spawn the command
(preserving BUILD_USER, BUILD_UID, BUILD_GID usage), capture its exit status,
then run the existing trap/cleanup (userdel "$BUILD_USER" 2>/dev/null || true)
before exiting so ephemeral fmr_* accounts are always removed.

Loading