From 599c9a70d80e19c78807b4f2d434a0061bbb52e2 Mon Sep 17 00:00:00 2001 From: oratis Date: Mon, 1 Jun 2026 23:16:47 +0800 Subject: [PATCH 1/7] chore(sandbox): diagnostic PoC for slirp4netns net-allowlist mechanics TEMPORARY. Adds scripts/sandbox-net-poc.sh + a continue-on-error Linux CI step that exercises the full bwrap --unshare-net + slirp4netns + DNS-proxy allowlist flow on a real kernel, printing diagnostics. This nails down the exact info-fd/ready-fd handshake, host-loopback DNS routing, and port-53 bindability before they are encoded in packages/core/src/sandbox/netns.ts. Both the script and the CI step are removed once the TS orchestrator + integration test land. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 8 +++ scripts/sandbox-net-poc.sh | 137 +++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100755 scripts/sandbox-net-poc.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5048392..f899730 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,6 +49,14 @@ jobs: sudo apt-get install -y bubblewrap slirp4netns curl sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true + # TEMPORARY diagnostic — proves the bwrap+slirp4netns+DNS-allowlist + # mechanics on a real kernel before they are encoded in netns.ts. Removed + # once the TypeScript orchestrator + integration test land. + - name: Sandbox net PoC (Linux, diagnostic) + if: runner.os == 'Linux' + continue-on-error: true + run: bash scripts/sandbox-net-poc.sh + - name: Typecheck run: pnpm typecheck diff --git a/scripts/sandbox-net-poc.sh b/scripts/sandbox-net-poc.sh new file mode 100755 index 0000000..24a480e --- /dev/null +++ b/scripts/sandbox-net-poc.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# DIAGNOSTIC PoC for the Linux selective-network-allowlist sandbox. +# NOT part of the product — this is a throwaway probe run in CI (Linux only) to +# nail down the exact bwrap + slirp4netns + DNS-proxy mechanics before encoding +# them in TypeScript (packages/core/src/sandbox/netns.ts). Removed once proven. +# +# Proves the full flow: +# bwrap --unshare-net (own netns, no connectivity) +# + slirp4netns (userspace NAT → outbound connectivity, rootless) +# + allowlisting DNS (resolv.conf → host proxy; NXDOMAIN for non-allowed) +# => allowed domain resolves+connects; denied domain fails to resolve. +# +# Everything is best-effort + verbose so one CI run reveals what works. + +set -uo pipefail + +say() { echo ""; echo "===== $* ====="; } + +say "versions" +bwrap --version || true +slirp4netns --version || true +python3 --version || true + +say "bwrap help — fd handshake flags" +bwrap --help 2>&1 | grep -iE "info-fd|block-fd|sync-fd|userns-block|unshare-net|--bind|--chdir" || true + +say "slirp4netns help — config/ready/dns flags" +slirp4netns --help 2>&1 | grep -iE "ready-fd|configure|disable-host-loopback|mtu|netns|--dns|outbound" || true + +say "unprivileged port start (need <=53 to bind :53 rootless)" +sysctl net.ipv4.ip_unprivileged_port_start 2>/dev/null || true +echo "relaxing to 53 for the proxy..." +sudo sysctl -w net.ipv4.ip_unprivileged_port_start=53 || true + +# ── workspace ─────────────────────────────────────────────────────────────── +WORK="$(mktemp -d)" +CTL="$WORK/ctl"; mkdir -p "$CTL" +mkfifo "$CTL/net-ready" +CWD="$WORK/cwd"; mkdir -p "$CWD" +echo "nameserver 10.0.2.2" > "$WORK/resolv.conf" # slirp gateway → host loopback +echo "WORK=$WORK" + +# ── allowlisting DNS proxy on host 127.0.0.1:53 (allow example.com only) ────── +say "start allowlist DNS proxy on 127.0.0.1:53" +cat > "$WORK/dns.py" <<'PY' +import socket, struct, sys +ALLOW = {"example.com", "www.example.com"} +UP = ("1.1.1.1", 53) +def qname(b): + i, parts = 12, [] + while i < len(b): + n = b[i] + if n == 0: break + parts.append(b[i+1:i+1+n].decode("latin1")); i += 1+n + return ".".join(parts).lower().rstrip(".") +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +s.bind(("127.0.0.1", 53)) +print("dns-proxy listening :53", flush=True) +while True: + try: + data, addr = s.recvfrom(2048) + name = qname(data) + if name in ALLOW: + print(f"ALLOW {name}", flush=True) + u = socket.socket(socket.AF_INET, socket.SOCK_DGRAM); u.settimeout(5) + u.sendto(data, UP) + try: resp,_ = u.recvfrom(2048); s.sendto(resp, addr) + except Exception as e: print("upstream err", e, flush=True) + u.close() + else: + print(f"DENY {name}", flush=True) + r = bytearray(data[:12]); r[2]=0x81; r[3]=0x83 # QR=1, RCODE=3 NXDOMAIN + s.sendto(bytes(r)+data[12:], addr) + except Exception as e: + print("proxy err", e, flush=True) +PY +python3 "$WORK/dns.py" & +DNS_PID=$! +sleep 0.5 +if ! kill -0 "$DNS_PID" 2>/dev/null; then echo "!! DNS proxy failed to start (port 53 bind?)"; fi + +# ── bwrap with own netns; inner cmd waits for slirp readiness via FIFO ──────── +say "spawn bwrap (--unshare-net), capture child-pid via --info-fd" +INNER='cat /dc-ctl/net-ready >/dev/null 2>&1 +echo "--- inside sandbox: interfaces ---" +ip addr show 2>/dev/null | grep -E "tap0|inet " || true +echo "--- resolv.conf ---"; cat /etc/resolv.conf +echo "--- curl ALLOWED (example.com) ---" +curl -sS --max-time 12 -o /dev/null -w "allowed_http=%{http_code}\n" https://example.com 2>&1 || echo "allowed_curl_exit=$?" +echo "--- curl DENIED (github.com) ---" +curl -sS --max-time 12 -o /dev/null -w "denied_http=%{http_code}\n" https://github.com 2>&1 || echo "denied_curl_exit=$?" +echo "--- direct-IP DENIED-domain note (raw IP bypasses DNS allowlist; expected) ---"' + +# info-fd → fd 8 → a file we read for child-pid JSON +bwrap \ + --ro-bind-try /usr /usr --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \ + --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /etc /etc \ + --proc /proc --dev /dev --tmpfs /tmp \ + --ro-bind "$WORK/resolv.conf" /etc/resolv.conf \ + --ro-bind "$CTL" /dc-ctl \ + --bind "$CWD" "$CWD" \ + --unshare-net --unshare-pid --unshare-ipc --unshare-uts \ + --new-session --die-with-parent \ + --info-fd 8 \ + /bin/sh -c "$INNER" 8>"$WORK/info.json" & +BWRAP_PID=$! +echo "bwrap host pid=$BWRAP_PID" + +# wait for info.json to be populated, then extract child-pid +for _ in $(seq 1 50); do [ -s "$WORK/info.json" ] && break; sleep 0.1; done +echo "--- info.json ---"; cat "$WORK/info.json" 2>/dev/null || echo "(empty)" +CHILD_PID="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["child-pid"])' "$WORK/info.json" 2>/dev/null || echo "")" +echo "child-pid=$CHILD_PID" + +if [ -n "$CHILD_PID" ]; then + say "start slirp4netns attached to child-pid=$CHILD_PID" + slirp4netns --configure --mtu=65520 --ready-fd=9 "$CHILD_PID" tap0 9>"$WORK/slirp-ready" & + SLIRP_PID=$! + # wait for slirp ready byte + for _ in $(seq 1 50); do [ -s "$WORK/slirp-ready" ] && break; sleep 0.1; done + echo "slirp ready marker present: $([ -s "$WORK/slirp-ready" ] && echo yes || echo no)" + sleep 0.3 + say "signal sandbox to proceed (open FIFO for write)" + echo go > "$CTL/net-ready" +else + echo "!! no child-pid; cannot attach slirp" + echo go > "$CTL/net-ready" 2>/dev/null || true +fi + +say "wait for sandbox to finish" +wait "$BWRAP_PID" 2>/dev/null; echo "bwrap exit=$?" + +say "cleanup" +kill "$SLIRP_PID" 2>/dev/null || true +kill "$DNS_PID" 2>/dev/null || true +rm -rf "$WORK" || true +echo "PoC done." From d226f1d7aa4407df6788423a5ac6237b1f803c91 Mon Sep 17 00:00:00 2001 From: oratis Date: Mon, 1 Jun 2026 23:26:53 +0800 Subject: [PATCH 2/7] chore(sandbox): make net PoC hang-proof (sleep readiness + watchdog) The FIFO-based readiness handshake deadlocked in CI (host open(O_WRONLY) blocks forever when the in-sandbox reader and host writer don't share the inode across the bind mount). Replace it with a 3s sleep window inside the sandbox (slirp configures in <1s) plus a 45s background watchdog that hard-kills the sandbox and a trap that always tears down slirp/proxy/tmp. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/sandbox-net-poc.sh | 66 +++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/scripts/sandbox-net-poc.sh b/scripts/sandbox-net-poc.sh index 24a480e..8fee1d8 100755 --- a/scripts/sandbox-net-poc.sh +++ b/scripts/sandbox-net-poc.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash # DIAGNOSTIC PoC for the Linux selective-network-allowlist sandbox. -# NOT part of the product — this is a throwaway probe run in CI (Linux only) to -# nail down the exact bwrap + slirp4netns + DNS-proxy mechanics before encoding -# them in TypeScript (packages/core/src/sandbox/netns.ts). Removed once proven. +# NOT part of the product — a throwaway probe run in CI (Linux only) to nail +# down the exact bwrap + slirp4netns + DNS-proxy mechanics before encoding them +# in TypeScript (packages/core/src/sandbox/netns.ts). Removed once proven. # # Proves the full flow: # bwrap --unshare-net (own netns, no connectivity) @@ -10,32 +10,41 @@ # + allowlisting DNS (resolv.conf → host proxy; NXDOMAIN for non-allowed) # => allowed domain resolves+connects; denied domain fails to resolve. # +# HANG-PROOF: no blocking FIFO (a `sleep` window covers slirp configuration), +# a background watchdog hard-kills the sandbox, and a trap always cleans up. # Everything is best-effort + verbose so one CI run reveals what works. set -uo pipefail say() { echo ""; echo "===== $* ====="; } +DNS_PID=""; SLIRP_PID=""; BWRAP_PID=""; WATCH_PID=""; WORK="" +cleanup() { + [ -n "$WATCH_PID" ] && kill "$WATCH_PID" 2>/dev/null || true + [ -n "$BWRAP_PID" ] && kill "$BWRAP_PID" 2>/dev/null || true + [ -n "$SLIRP_PID" ] && kill "$SLIRP_PID" 2>/dev/null || true + [ -n "$DNS_PID" ] && kill "$DNS_PID" 2>/dev/null || true + [ -n "$WORK" ] && rm -rf "$WORK" 2>/dev/null || true +} +trap cleanup EXIT + say "versions" bwrap --version || true slirp4netns --version || true python3 --version || true say "bwrap help — fd handshake flags" -bwrap --help 2>&1 | grep -iE "info-fd|block-fd|sync-fd|userns-block|unshare-net|--bind|--chdir" || true +bwrap --help 2>&1 | grep -iE "info-fd|block-fd|sync-fd|userns-block|unshare-net|--chdir" || true say "slirp4netns help — config/ready/dns flags" slirp4netns --help 2>&1 | grep -iE "ready-fd|configure|disable-host-loopback|mtu|netns|--dns|outbound" || true say "unprivileged port start (need <=53 to bind :53 rootless)" sysctl net.ipv4.ip_unprivileged_port_start 2>/dev/null || true -echo "relaxing to 53 for the proxy..." sudo sysctl -w net.ipv4.ip_unprivileged_port_start=53 || true # ── workspace ─────────────────────────────────────────────────────────────── WORK="$(mktemp -d)" -CTL="$WORK/ctl"; mkdir -p "$CTL" -mkfifo "$CTL/net-ready" CWD="$WORK/cwd"; mkdir -p "$CWD" echo "nameserver 10.0.2.2" > "$WORK/resolv.conf" # slirp gateway → host loopback echo "WORK=$WORK" @@ -43,7 +52,7 @@ echo "WORK=$WORK" # ── allowlisting DNS proxy on host 127.0.0.1:53 (allow example.com only) ────── say "start allowlist DNS proxy on 127.0.0.1:53" cat > "$WORK/dns.py" <<'PY' -import socket, struct, sys +import socket ALLOW = {"example.com", "www.example.com"} UP = ("1.1.1.1", 53) def qname(b): @@ -77,27 +86,25 @@ PY python3 "$WORK/dns.py" & DNS_PID=$! sleep 0.5 -if ! kill -0 "$DNS_PID" 2>/dev/null; then echo "!! DNS proxy failed to start (port 53 bind?)"; fi +kill -0 "$DNS_PID" 2>/dev/null || echo "!! DNS proxy failed to start (port 53 bind?)" -# ── bwrap with own netns; inner cmd waits for slirp readiness via FIFO ──────── +# ── bwrap with own netns; inner cmd sleeps to let slirp configure, then curls ─ say "spawn bwrap (--unshare-net), capture child-pid via --info-fd" -INNER='cat /dc-ctl/net-ready >/dev/null 2>&1 +INNER='sleep 3 echo "--- inside sandbox: interfaces ---" -ip addr show 2>/dev/null | grep -E "tap0|inet " || true +ip addr 2>/dev/null | grep -E "tap0|inet " || echo "(no ip tool / no addrs)" echo "--- resolv.conf ---"; cat /etc/resolv.conf echo "--- curl ALLOWED (example.com) ---" -curl -sS --max-time 12 -o /dev/null -w "allowed_http=%{http_code}\n" https://example.com 2>&1 || echo "allowed_curl_exit=$?" +curl -sS --max-time 10 -o /dev/null -w "allowed_http=%{http_code}\n" https://example.com 2>&1 || echo "allowed_curl_exit=$?" echo "--- curl DENIED (github.com) ---" -curl -sS --max-time 12 -o /dev/null -w "denied_http=%{http_code}\n" https://github.com 2>&1 || echo "denied_curl_exit=$?" -echo "--- direct-IP DENIED-domain note (raw IP bypasses DNS allowlist; expected) ---"' +curl -sS --max-time 10 -o /dev/null -w "denied_http=%{http_code}\n" https://github.com 2>&1 || echo "denied_curl_exit=$?" +echo "--- sandbox inner done ---"' -# info-fd → fd 8 → a file we read for child-pid JSON bwrap \ --ro-bind-try /usr /usr --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \ --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /etc /etc \ --proc /proc --dev /dev --tmpfs /tmp \ --ro-bind "$WORK/resolv.conf" /etc/resolv.conf \ - --ro-bind "$CTL" /dc-ctl \ --bind "$CWD" "$CWD" \ --unshare-net --unshare-pid --unshare-ipc --unshare-uts \ --new-session --die-with-parent \ @@ -106,7 +113,11 @@ bwrap \ BWRAP_PID=$! echo "bwrap host pid=$BWRAP_PID" -# wait for info.json to be populated, then extract child-pid +# watchdog: hard-kill the sandbox after 45s no matter what +( sleep 45; kill "$BWRAP_PID" 2>/dev/null ) & +WATCH_PID=$! + +# wait for info.json, extract child-pid for _ in $(seq 1 50); do [ -s "$WORK/info.json" ] && break; sleep 0.1; done echo "--- info.json ---"; cat "$WORK/info.json" 2>/dev/null || echo "(empty)" CHILD_PID="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["child-pid"])' "$WORK/info.json" 2>/dev/null || echo "")" @@ -114,24 +125,13 @@ echo "child-pid=$CHILD_PID" if [ -n "$CHILD_PID" ]; then say "start slirp4netns attached to child-pid=$CHILD_PID" - slirp4netns --configure --mtu=65520 --ready-fd=9 "$CHILD_PID" tap0 9>"$WORK/slirp-ready" & + slirp4netns --configure --mtu=65520 "$CHILD_PID" tap0 & SLIRP_PID=$! - # wait for slirp ready byte - for _ in $(seq 1 50); do [ -s "$WORK/slirp-ready" ] && break; sleep 0.1; done - echo "slirp ready marker present: $([ -s "$WORK/slirp-ready" ] && echo yes || echo no)" - sleep 0.3 - say "signal sandbox to proceed (open FIFO for write)" - echo go > "$CTL/net-ready" + echo "slirp pid=$SLIRP_PID (inner sleeps 3s to let it configure)" else - echo "!! no child-pid; cannot attach slirp" - echo go > "$CTL/net-ready" 2>/dev/null || true + echo "!! no child-pid; cannot attach slirp (curls will fail)" fi -say "wait for sandbox to finish" +say "wait for sandbox to finish (bounded by 45s watchdog)" wait "$BWRAP_PID" 2>/dev/null; echo "bwrap exit=$?" - -say "cleanup" -kill "$SLIRP_PID" 2>/dev/null || true -kill "$DNS_PID" 2>/dev/null || true -rm -rf "$WORK" || true echo "PoC done." From b290caaf99354f300fc3cd052e6c69b39ae34314 Mon Sep 17 00:00:00 2001 From: oratis Date: Mon, 1 Jun 2026 23:31:53 +0800 Subject: [PATCH 3/7] chore(sandbox): fix resolv.conf bind (dangling symlink) + --disable-dns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bwrap couldn't bind our resolv.conf onto /etc/resolv.conf because on the runner it's a dangling symlink (→ /run/systemd/resolve/stub-resolv.conf, not mounted in the sandbox). Bind our file at the readlink-resolved real path so the preserved symlink leads to it. Add slirp4netns --disable-dns to close the 10.0.2.3 bypass (all resolution must traverse our allowlist). Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/sandbox-net-poc.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/sandbox-net-poc.sh b/scripts/sandbox-net-poc.sh index 8fee1d8..9c6ff40 100755 --- a/scripts/sandbox-net-poc.sh +++ b/scripts/sandbox-net-poc.sh @@ -49,6 +49,15 @@ CWD="$WORK/cwd"; mkdir -p "$CWD" echo "nameserver 10.0.2.2" > "$WORK/resolv.conf" # slirp gateway → host loopback echo "WORK=$WORK" +# /etc/resolv.conf is usually a dangling symlink (→ /run/systemd/resolve/...) +# which bwrap can't create a bind target for. Bind our file at the symlink's +# RESOLVED real path so the preserved /etc/resolv.conf symlink leads to it. +say "resolv.conf shape on host" +ls -l /etc/resolv.conf || true +RP="$(readlink -f /etc/resolv.conf 2>/dev/null || true)" +[ -n "$RP" ] || RP=/etc/resolv.conf +echo "resolv real path RP=$RP" + # ── allowlisting DNS proxy on host 127.0.0.1:53 (allow example.com only) ────── say "start allowlist DNS proxy on 127.0.0.1:53" cat > "$WORK/dns.py" <<'PY' @@ -104,7 +113,7 @@ bwrap \ --ro-bind-try /usr /usr --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \ --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /etc /etc \ --proc /proc --dev /dev --tmpfs /tmp \ - --ro-bind "$WORK/resolv.conf" /etc/resolv.conf \ + --ro-bind "$WORK/resolv.conf" "$RP" \ --bind "$CWD" "$CWD" \ --unshare-net --unshare-pid --unshare-ipc --unshare-uts \ --new-session --die-with-parent \ @@ -125,7 +134,9 @@ echo "child-pid=$CHILD_PID" if [ -n "$CHILD_PID" ]; then say "start slirp4netns attached to child-pid=$CHILD_PID" - slirp4netns --configure --mtu=65520 "$CHILD_PID" tap0 & + # --disable-dns closes the 10.0.2.3 host-DNS bypass so ALL resolution must go + # through our allowlisting proxy (guest resolv.conf points only at 10.0.2.2). + slirp4netns --configure --disable-dns --mtu=65520 "$CHILD_PID" tap0 & SLIRP_PID=$! echo "slirp pid=$SLIRP_PID (inner sleeps 3s to let it configure)" else From 769cd31109b2f60e7bc37fc508ab34ced8bb7a1c Mon Sep 17 00:00:00 2001 From: oratis Date: Mon, 1 Jun 2026 23:36:35 +0800 Subject: [PATCH 4/7] chore(sandbox): slirp --userns-path to fix setns EPERM into bwrap netns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit slirp4netns failed with setns(CLONE_NEWNET): Operation not permitted — bwrap's net namespace is owned by bwrap's child user namespace, which the host-user slirp process has no CAP_SYS_ADMIN over. Pass --userns-path=/proc//ns/user so slirp enters that userns (where it is root) before the netns. resolv.conf bind now confirmed working. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/sandbox-net-poc.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/sandbox-net-poc.sh b/scripts/sandbox-net-poc.sh index 9c6ff40..3a79114 100755 --- a/scripts/sandbox-net-poc.sh +++ b/scripts/sandbox-net-poc.sh @@ -36,8 +36,8 @@ python3 --version || true say "bwrap help — fd handshake flags" bwrap --help 2>&1 | grep -iE "info-fd|block-fd|sync-fd|userns-block|unshare-net|--chdir" || true -say "slirp4netns help — config/ready/dns flags" -slirp4netns --help 2>&1 | grep -iE "ready-fd|configure|disable-host-loopback|mtu|netns|--dns|outbound" || true +say "slirp4netns help — config/ready/dns/userns flags" +slirp4netns --help 2>&1 | grep -iE "ready-fd|configure|disable-host-loopback|mtu|netns|userns|--dns|outbound" || true say "unprivileged port start (need <=53 to bind :53 rootless)" sysctl net.ipv4.ip_unprivileged_port_start 2>/dev/null || true @@ -134,9 +134,13 @@ echo "child-pid=$CHILD_PID" if [ -n "$CHILD_PID" ]; then say "start slirp4netns attached to child-pid=$CHILD_PID" + # bwrap's netns is owned by bwrap's CHILD user namespace; slirp (host user) + # can't setns into it without entering that userns first. --userns-path makes + # slirp join /proc//ns/user (where it's root) before the netns. # --disable-dns closes the 10.0.2.3 host-DNS bypass so ALL resolution must go # through our allowlisting proxy (guest resolv.conf points only at 10.0.2.2). - slirp4netns --configure --disable-dns --mtu=65520 "$CHILD_PID" tap0 & + slirp4netns --configure --disable-dns --mtu=65520 \ + --userns-path="/proc/$CHILD_PID/ns/user" "$CHILD_PID" tap0 & SLIRP_PID=$! echo "slirp pid=$SLIRP_PID (inner sleeps 3s to let it configure)" else From dc7d2b00ab6e9d73e04119e6973b16b8f622d1da Mon Sep 17 00:00:00 2001 From: oratis Date: Mon, 1 Jun 2026 23:44:16 +0800 Subject: [PATCH 5/7] =?UTF-8?q?chore(sandbox):=20PoC=20=E2=80=94=20bwrap?= =?UTF-8?q?=20--uid=200=20+=20README-exact=20slirp=20+=20ns=20diagnostics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --userns-path did not resolve setns(CLONE_NEWNET) EPERM. Dump the namespace topology (child user/net ns links + uid_map/gid_map + lsns) to see whether bwrap nests the userns such that the netns is owned by a parent userns slirp can't gain caps over. Also try the slirp4netns README-exact incantation: bwrap --uid 0 --gid 0 (root-mapped userns) + plain `slirp4netns --configure tap0` (no --userns-path/--disable-dns). Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/sandbox-net-poc.sh | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/scripts/sandbox-net-poc.sh b/scripts/sandbox-net-poc.sh index 3a79114..a7f3fb2 100755 --- a/scripts/sandbox-net-poc.sh +++ b/scripts/sandbox-net-poc.sh @@ -109,12 +109,16 @@ echo "--- curl DENIED (github.com) ---" curl -sS --max-time 10 -o /dev/null -w "denied_http=%{http_code}\n" https://github.com 2>&1 || echo "denied_curl_exit=$?" echo "--- sandbox inner done ---"' +# NOTE: --uid 0 --gid 0 maps the sandbox to root inside its userns (matches the +# slirp4netns README bwrap example). This PoC tries the README-exact slirp +# incantation (plain PID, no --userns-path) since --userns-path did not help. bwrap \ --ro-bind-try /usr /usr --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \ --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /etc /etc \ --proc /proc --dev /dev --tmpfs /tmp \ --ro-bind "$WORK/resolv.conf" "$RP" \ --bind "$CWD" "$CWD" \ + --uid 0 --gid 0 \ --unshare-net --unshare-pid --unshare-ipc --unshare-uts \ --new-session --die-with-parent \ --info-fd 8 \ @@ -132,15 +136,22 @@ echo "--- info.json ---"; cat "$WORK/info.json" 2>/dev/null || echo "(empty)" CHILD_PID="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["child-pid"])' "$WORK/info.json" 2>/dev/null || echo "")" echo "child-pid=$CHILD_PID" +say "namespace topology — is bwrap's netns owned by a userns slirp can enter?" +echo "host user ns: $(readlink /proc/self/ns/user 2>/dev/null)" +echo "host net ns: $(readlink /proc/self/ns/net 2>/dev/null)" if [ -n "$CHILD_PID" ]; then - say "start slirp4netns attached to child-pid=$CHILD_PID" - # bwrap's netns is owned by bwrap's CHILD user namespace; slirp (host user) - # can't setns into it without entering that userns first. --userns-path makes - # slirp join /proc//ns/user (where it's root) before the netns. - # --disable-dns closes the 10.0.2.3 host-DNS bypass so ALL resolution must go - # through our allowlisting proxy (guest resolv.conf points only at 10.0.2.2). - slirp4netns --configure --disable-dns --mtu=65520 \ - --userns-path="/proc/$CHILD_PID/ns/user" "$CHILD_PID" tap0 & + echo "child user ns: $(readlink /proc/$CHILD_PID/ns/user 2>/dev/null)" + echo "child net ns: $(readlink /proc/$CHILD_PID/ns/net 2>/dev/null)" + echo "child uid_map: $(tr '\n' '|' < /proc/$CHILD_PID/uid_map 2>/dev/null)" + echo "child gid_map: $(tr '\n' '|' < /proc/$CHILD_PID/gid_map 2>/dev/null)" + # ioctl NS_GET_USERNS would tell the owning userns; approximate via lsns + command -v lsns >/dev/null && lsns -p "$CHILD_PID" 2>/dev/null || true +fi + +if [ -n "$CHILD_PID" ]; then + # README-exact incantation: plain PID, --configure, no userns-path/disable-dns. + say "ATTEMPT [readme]: slirp4netns --configure --mtu=65520 $CHILD_PID tap0" + slirp4netns --configure --mtu=65520 "$CHILD_PID" tap0 & SLIRP_PID=$! echo "slirp pid=$SLIRP_PID (inner sleeps 3s to let it configure)" else From 0223b1e5a504bd00e19eeb7feb4f748eed35f9a0 Mon Sep 17 00:00:00 2001 From: oratis Date: Mon, 1 Jun 2026 23:50:58 +0800 Subject: [PATCH 6/7] feat(sandbox): slirp4netns selective per-domain network allowlist (#10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the Linux selective network allowlist (the last gap in §3.9a's sandbox). When sandbox.network.allowedDomains is a non-empty allowlist, spawnNetworkSandbox (netns.ts) orchestrates: 1. the allowlisting DNS proxy (dns-proxy.ts) on 127.0.0.1:53 — forwards allowed lookups upstream, returns NXDOMAIN for everything else; 2. bwrap --unshare-net --uid 0 --gid 0 with our resolv.conf bound (at the symlink-resolved real path) and --info-fd/--block-fd for PID handoff + readiness gating; 3. slirp4netns --configure --disable-dns attached to bwrap's netns by PID, giving rootless userspace NAT (tap0, 10.0.2.100/24, gateway 10.0.2.2 → host loopback where the proxy listens). The decisive detail: --uid 0 --gid 0 maps the host user to root inside bwrap's userns, which is what lets slirp (the host user, owner of that userns) gain CAP_SYS_ADMIN on entry and setns() into the netns — without it setns(CLONE_NEWNET) is EPERM. Threat model: DNS-NAME allowlisting (raw-IP dials bypass it) — adequate for the git/npm/pip-over-https agent workload, and --disable-dns closes the 10.0.2.3 bypass. Requires binding :53 (CAP_NET_BIND_SERVICE or a relaxed ip_unprivileged_port_start); when unavailable, callers fail CLOSED via NetworkSandboxUnavailable rather than running unrestricted. Verified on the Linux CI runner by netns-integration.test.ts (gated on DC_SANDBOX_NET_TEST + bwrap + slirp4netns): an allowlisted domain returns HTTP 200 while a non-allowlisted domain fails to resolve. The mechanics were proven first via a throwaway CI PoC (now removed). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 19 +- packages/core/src/index.ts | 9 +- packages/core/src/sandbox/index.ts | 7 + .../src/sandbox/netns-integration.test.ts | 78 +++++ packages/core/src/sandbox/netns.ts | 299 ++++++++++++++++++ packages/core/src/sandbox/profile.ts | 18 +- scripts/sandbox-net-poc.sh | 163 ---------- 7 files changed, 415 insertions(+), 178 deletions(-) create mode 100644 packages/core/src/sandbox/netns-integration.test.ts create mode 100644 packages/core/src/sandbox/netns.ts delete mode 100755 scripts/sandbox-net-poc.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f899730..bbcafb7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,22 +40,17 @@ jobs: # Linux only: install bubblewrap + slirp4netns so the real-kernel sandbox # integration tests run (they skip when `bwrap` is absent, e.g. macOS/dev). - # Ubuntu 24.04 restricts unprivileged user namespaces via AppArmor — relax - # it so bwrap can unshare namespaces on the runner. + # · Ubuntu 24.04 restricts unprivileged user namespaces via AppArmor — + # relax it so bwrap can unshare namespaces on the runner. + # · The selective network-allowlist test runs an allowlisting DNS proxy on + # 127.0.0.1:53; relax ip_unprivileged_port_start so :53 binds rootless. - name: Install sandbox tools (Linux) if: runner.os == 'Linux' run: | sudo apt-get update sudo apt-get install -y bubblewrap slirp4netns curl sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true - - # TEMPORARY diagnostic — proves the bwrap+slirp4netns+DNS-allowlist - # mechanics on a real kernel before they are encoded in netns.ts. Removed - # once the TypeScript orchestrator + integration test land. - - name: Sandbox net PoC (Linux, diagnostic) - if: runner.os == 'Linux' - continue-on-error: true - run: bash scripts/sandbox-net-poc.sh + sudo sysctl -w net.ipv4.ip_unprivileged_port_start=53 || true - name: Typecheck run: pnpm typecheck @@ -67,6 +62,10 @@ jobs: run: pnpm format:check - name: Test + # DC_SANDBOX_NET_TEST opts the selective-allowlist integration test in; + # it self-skips on non-Linux / when bwrap/slirp4netns are absent. + env: + DC_SANDBOX_NET_TEST: '1' run: pnpm test - name: Build diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index f5078f9..2fc6f36 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -182,14 +182,21 @@ export { type Frontmatter, } from './skills/index.js'; -// Sandbox (M3.5 — macOS sandbox-exec + Linux bwrap) +// Sandbox (M3.5 — macOS sandbox-exec + Linux bwrap; M3.5-ext — slirp4netns +// selective per-domain network allowlist) export { wrapBashCommand, buildMacOsProfile, buildLinuxBwrapArgs, detectPlatform, + spawnNetworkSandbox, + NetworkSandboxUnavailable, + startDnsProxy, type SandboxPlatform, type SandboxedCommand, + type SpawnNetworkSandboxOpts, + type NetworkSandboxHandle, + type DnsProxyHandle, } from './sandbox/index.js'; // MCP client (M3c — stdio transport; http/sse → M3c-ext) + server (`mcp serve`) diff --git a/packages/core/src/sandbox/index.ts b/packages/core/src/sandbox/index.ts index 22c589a..a02bdb6 100644 --- a/packages/core/src/sandbox/index.ts +++ b/packages/core/src/sandbox/index.ts @@ -27,6 +27,13 @@ export { type DnsProxyHandle, } from './dns-proxy.js'; +export { + spawnNetworkSandbox, + NetworkSandboxUnavailable, + type SpawnNetworkSandboxOpts, + type NetworkSandboxHandle, +} from './netns.js'; + export type { BwrapArgsOpts } from './profile.js'; export interface SandboxedCommand { diff --git a/packages/core/src/sandbox/netns-integration.test.ts b/packages/core/src/sandbox/netns-integration.test.ts new file mode 100644 index 0000000..6f79841 --- /dev/null +++ b/packages/core/src/sandbox/netns-integration.test.ts @@ -0,0 +1,78 @@ +// Real-kernel integration test for the selective per-domain network allowlist +// (bwrap + slirp4netns + allowlisting DNS proxy). Proves that a domain on the +// allowlist resolves + connects while everything else is blocked at DNS. +// +// GATED: needs bwrap + slirp4netns + the ability to bind 127.0.0.1:53. The CI +// Linux job installs both tools, relaxes net.ipv4.ip_unprivileged_port_start so +// :53 is bindable rootless, and sets DC_SANDBOX_NET_TEST=1. Skips everywhere +// else (macOS / dev machines). +// Spec: docs/DEVELOPMENT_PLAN.md §3.9a + +import { execSync } from 'node:child_process'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { SandboxConfig } from '../config/types.js'; +import { spawnNetworkSandbox } from './netns.js'; + +function has(bin: string): boolean { + try { + execSync(`command -v ${bin}`, { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} + +const RUN = + process.env.DC_SANDBOX_NET_TEST === '1' && + process.platform === 'linux' && + has('bwrap') && + has('slirp4netns'); + +interface Res { + code: number | null; + stdout: string; + stderr: string; +} + +async function runNet(userCommand: string, cwd: string, allowedDomains: string[]): Promise { + const config: SandboxConfig = { enabled: true, network: { allowedDomains } }; + const handle = await spawnNetworkSandbox({ userCommand, cwd, config, dnsPort: 53 }); + let stdout = ''; + let stderr = ''; + handle.child.stdout?.on('data', (d: Buffer) => (stdout += d.toString())); + handle.child.stderr?.on('data', (d: Buffer) => (stderr += d.toString())); + const code = await handle.exited; + await handle.close(); + return { code, stdout, stderr }; +} + +describe.skipIf(!RUN)('selective network allowlist (slirp4netns, real-kernel)', () => { + let cwd: string; + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), 'dc-netns-it-')); + }); + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }); + }); + + it('allows an allowlisted domain and blocks everything else', async () => { + const cmd = [ + 'curl -sS --max-time 15 -o /dev/null -w "ALLOWED=%{http_code}\\n" https://example.com 2>&1 || echo "ALLOWED_ERR=$?"', + 'curl -sS --max-time 15 -o /dev/null -w "DENIED=%{http_code}\\n" https://github.com 2>&1 || echo "DENIED_ERR=$?"', + ].join('\n'); + const r = await runNet(cmd, cwd, ['example.com', 'www.example.com']); + // The allowlisted domain resolves (via our proxy → upstream) and connects. + expect(r.stdout).toMatch(/ALLOWED=2\d\d/); + // The non-allowlisted domain gets NXDOMAIN from our proxy → can't resolve. + expect(r.stdout).toMatch(/Could not resolve host: github\.com|DENIED_ERR=6/i); + expect(r.stdout).not.toMatch(/DENIED=2\d\d/); + }, 45_000); + + it('resolv.conf inside the sandbox points at the slirp gateway', async () => { + const r = await runNet('cat /etc/resolv.conf', cwd, ['example.com']); + expect(r.stdout).toContain('nameserver 10.0.2.2'); + }, 20_000); +}); diff --git a/packages/core/src/sandbox/netns.ts b/packages/core/src/sandbox/netns.ts new file mode 100644 index 0000000..4f48279 --- /dev/null +++ b/packages/core/src/sandbox/netns.ts @@ -0,0 +1,299 @@ +// Linux selective network allowlist: bwrap (own netns) + slirp4netns (rootless +// userspace NAT for connectivity) + the allowlisting DNS proxy (NXDOMAIN for +// non-allowed domains). The guest's resolv.conf points at the slirp gateway +// (10.0.2.2 → host loopback) where the proxy listens on :53. +// Spec: docs/DEVELOPMENT_PLAN.md §3.9a +// +// THREAT MODEL: DNS-NAME allowlisting. A process that dials a raw IP bypasses +// the allowlist (it never resolves a name). This is adequate for the typical +// agent workload (git / npm / pip over https://host). slirp4netns --disable-dns +// closes the built-in 10.0.2.3 resolver so resolution can ONLY go through our +// allowlisting proxy. +// +// REQUIRES: `bwrap`, `slirp4netns`, and the ability to bind 127.0.0.1:53 (a +// privileged port — needs CAP_NET_BIND_SERVICE or a relaxed +// net.ipv4.ip_unprivileged_port_start). When the proxy can't bind, +// spawnNetworkSandbox throws NetworkSandboxUnavailable so callers fail CLOSED +// (deny-all network) rather than running the command unrestricted. + +import { spawn, type ChildProcess } from 'node:child_process'; +import { mkdtemp, realpath, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { Readable, Writable } from 'node:stream'; +import type { SandboxConfig } from '../config/types.js'; +import { startDnsProxy, type DnsProxyHandle } from './dns-proxy.js'; +import { buildLinuxBwrapArgs } from './profile.js'; + +/** slirp4netns gateway — maps to the host loopback (no --disable-host-loopback). */ +const SLIRP_GATEWAY = '10.0.2.2'; +const SLIRP_TAP = 'tap0'; +const SLIRP_MTU = 65520; +const DEFAULT_DNS_PORT = 53; +const DEFAULT_READY_TIMEOUT_MS = 10_000; + +/** Thrown when the selective allowlist can't be set up; callers fail closed. */ +export class NetworkSandboxUnavailable extends Error { + constructor(message: string) { + super(message); + this.name = 'NetworkSandboxUnavailable'; + } +} + +export interface SpawnNetworkSandboxOpts { + /** The user shell command to run inside the sandbox. */ + userCommand: string; + /** Working directory (rw-bound inside the sandbox). */ + cwd: string; + /** Sandbox config; network.allowedDomains is expected to be a non-empty allowlist. */ + config: SandboxConfig; + /** Override the bwrap binary (tests / non-standard installs). */ + bwrapPath?: string; + /** Override the slirp4netns binary. */ + slirpPath?: string; + /** Upstream resolver for ALLOWED lookups (default 1.1.1.1). */ + dnsUpstream?: string; + /** Host loopback port for the DNS proxy. MUST be 53 for the guest glibc resolver. */ + dnsPort?: number; + /** Milliseconds to wait for child-pid + slirp readiness before failing. */ + readyTimeoutMs?: number; + /** Diagnostic logger. */ + log?: (line: string) => void; +} + +export interface NetworkSandboxHandle { + /** The spawned bwrap process. stdout = stdio[1], stderr = stdio[2]. */ + child: ChildProcess; + /** Resolves with the bwrap exit code once the sandboxed command finishes. */ + exited: Promise; + /** Tear down slirp4netns + DNS proxy + temp dir. Idempotent. */ + close(): Promise; +} + +/** + * Spawn a bwrap sandbox whose network is restricted to `config.network.allowedDomains`. + * + * Orchestration: + * 1. Start the allowlisting DNS proxy on 127.0.0.1:53. + * 2. bwrap --unshare-net (own netns) with our resolv.conf bound + --info-fd + * (to learn the child PID) + --block-fd (to gate the inner command until + * the network is wired up). + * 3. slirp4netns attaches to the child's netns (entering its userns first) and + * provides rootless outbound connectivity via tap0. + * 4. Once slirp signals ready, release --block-fd so the command runs. + * + * On any setup failure this rejects with NetworkSandboxUnavailable after cleaning up. + */ +export async function spawnNetworkSandbox( + opts: SpawnNetworkSandboxOpts, +): Promise { + const log = opts.log ?? (() => {}); + const dnsPort = opts.dnsPort ?? DEFAULT_DNS_PORT; + const readyTimeout = opts.readyTimeoutMs ?? DEFAULT_READY_TIMEOUT_MS; + const domains = opts.config.network?.allowedDomains ?? []; + + // 1. Allowlisting DNS proxy on the host loopback. Must be :53 because the + // guest's glibc resolver always queries nameservers on port 53. + let dns: DnsProxyHandle; + try { + dns = await startDnsProxy({ + allowedDomains: domains, + upstream: opts.dnsUpstream, + bindAddr: '127.0.0.1', + bindPort: dnsPort, + log, + }); + } catch (err) { + throw new NetworkSandboxUnavailable( + `cannot bind DNS allowlist proxy on 127.0.0.1:${dnsPort} (${errMsg(err)}); selective ` + + `network allowlisting needs CAP_NET_BIND_SERVICE or a relaxed ` + + `net.ipv4.ip_unprivileged_port_start`, + ); + } + + // 2. Temp dir + resolv.conf pointing at the slirp gateway. + const work = await mkdtemp(join(tmpdir(), 'dc-netns-')); + const resolvSrc = join(work, 'resolv.conf'); + await writeFile(resolvSrc, `nameserver ${SLIRP_GATEWAY}\noptions timeout:2 attempts:2\n`, 'utf8'); + // /etc/resolv.conf is usually a dangling symlink (→ /run/systemd/resolve/...) + // that bwrap can't create a bind target for; bind at the resolved real path. + let resolvDest = '/etc/resolv.conf'; + try { + resolvDest = await realpath('/etc/resolv.conf'); + } catch { + /* absent / not a symlink — bind directly */ + } + + // 3. bwrap with its own netns + our resolv.conf + info/block fds. + // --uid 0 --gid 0 maps the host user to root INSIDE bwrap's user namespace. + // This is what lets slirp4netns (running as the host user, which owns that + // userns) gain CAP_SYS_ADMIN on entry and setns() into the netns — without + // it, setns(CLONE_NEWNET) fails with EPERM. + const bwrapArgs = buildLinuxBwrapArgs(opts.config, opts.cwd, { + dnsProxyPort: dnsPort, + resolvConfPath: resolvSrc, + resolvConfDest: resolvDest, + }); + const args = [ + ...bwrapArgs, + '--uid', + '0', + '--gid', + '0', + '--info-fd', + '3', + '--block-fd', + '4', + '/bin/sh', + '-c', + opts.userCommand, + ]; + // stdio: 0 ignore · 1/2 piped (caller captures) · 3 info-fd (we read) · 4 block-fd (we write) + const child = spawn(opts.bwrapPath ?? 'bwrap', args, { + stdio: ['ignore', 'pipe', 'pipe', 'pipe', 'pipe'], + cwd: opts.cwd, + }); + + let slirp: ChildProcess | undefined; + let closed = false; + const close = async (): Promise => { + if (closed) return; + closed = true; + killQuietly(slirp); + killQuietly(child); + await dns.close().catch(() => {}); + await rm(work, { recursive: true, force: true }).catch(() => {}); + }; + + try { + // 4. Read the sandbox child-pid from --info-fd (a host-visible PID). + const childPid = await readChildPid(child.stdio[3] as Readable, child, readyTimeout); + log(`[netns] bwrap child-pid=${childPid}`); + + // 5. Attach slirp4netns to the sandbox's netns by PID. slirp enters the + // target's userns (where the host user is now root, via --uid 0) before + // the netns, so the setns is permitted. --disable-dns closes slirp's + // built-in 10.0.2.3 resolver so ALL resolution must traverse our proxy. + slirp = spawn( + opts.slirpPath ?? 'slirp4netns', + [ + '--configure', + '--disable-dns', + `--mtu=${SLIRP_MTU}`, + '--ready-fd', + '3', + String(childPid), + SLIRP_TAP, + ], + { stdio: ['ignore', 'pipe', 'pipe', 'pipe'] }, + ); + pipeLog(slirp.stdio[1] as Readable | null, '[slirp]', log); + pipeLog(slirp.stdio[2] as Readable | null, '[slirp!]', log); + + // 6. Wait for slirp to signal the interface is configured. + await waitForReady(slirp.stdio[3] as Readable, slirp, 'slirp4netns ready', readyTimeout); + log('[netns] slirp4netns ready'); + + // 7. Release the inner command — network is now wired up. + const blockFd = child.stdio[4] as Writable; + blockFd.write('go'); + blockFd.end(); + } catch (err) { + await close(); + throw err instanceof NetworkSandboxUnavailable + ? err + : new NetworkSandboxUnavailable(`network sandbox setup failed: ${errMsg(err)}`); + } + + // 8. Auto-teardown slirp + proxy + tmp when the sandboxed command exits. + const exited = new Promise((resolve) => { + child.once('close', (code) => { + void close(); + resolve(code); + }); + }); + + return { child, exited, close }; +} + +/** Parse the `child-pid` out of bwrap's --info-fd JSON (tolerant of chunking). */ +function readChildPid(fd: Readable, child: ChildProcess, timeoutMs: number): Promise { + return new Promise((resolve, reject) => { + let buf = ''; + const timer = setTimeout(() => { + cleanup(); + reject(new Error('timed out reading bwrap --info-fd')); + }, timeoutMs); + const onData = (d: Buffer): void => { + buf += d.toString('utf8'); + const m = buf.match(/"child-pid"\s*:\s*(\d+)/); + if (m) { + cleanup(); + resolve(Number(m[1])); + } + }; + const onExit = (): void => { + cleanup(); + reject(new Error('bwrap exited before emitting child-pid')); + }; + function cleanup(): void { + clearTimeout(timer); + fd.off('data', onData); + child.off('exit', onExit); + } + fd.on('data', onData); + child.once('exit', onExit); + }); +} + +/** Resolve when the process writes any byte to `fd` (e.g. slirp --ready-fd). */ +function waitForReady( + fd: Readable, + proc: ChildProcess, + label: string, + timeoutMs: number, +): Promise { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + cleanup(); + reject(new Error(`timed out waiting for ${label}`)); + }, timeoutMs); + const onData = (): void => { + cleanup(); + resolve(); + }; + const onExit = (): void => { + cleanup(); + reject(new Error(`process exited before ${label}`)); + }; + function cleanup(): void { + clearTimeout(timer); + fd.off('data', onData); + proc.off('exit', onExit); + } + fd.on('data', onData); + proc.once('exit', onExit); + }); +} + +function pipeLog(fd: Readable | null, prefix: string, log: (s: string) => void): void { + if (!fd) return; + fd.on('data', (d: Buffer) => { + const s = d.toString('utf8').trimEnd(); + if (s) log(`${prefix} ${s}`); + }); +} + +function killQuietly(proc: ChildProcess | undefined): void { + if (proc && !proc.killed) { + try { + proc.kill('SIGTERM'); + } catch { + /* already gone */ + } + } +} + +function errMsg(err: unknown): string { + return err instanceof Error ? err.message : String(err); +} diff --git a/packages/core/src/sandbox/profile.ts b/packages/core/src/sandbox/profile.ts index f82becd..df97aca 100644 --- a/packages/core/src/sandbox/profile.ts +++ b/packages/core/src/sandbox/profile.ts @@ -4,9 +4,11 @@ // // M3.5: macOS sandbox-exec SBPL profile generation + Linux bwrap arg generation // (ro system mounts, rw cwd, read/write allowlists, net unshare, pid/ipc/uts -// unshare, --new-session + --die-with-parent hardening). The one remaining gap -// is the selective-domain net allowlist, which needs a slirp4netns helper to -// bridge UDP into the netns (deny-all-net and full-net modes both work today). +// unshare, --new-session + --die-with-parent hardening). +// M3.5-ext: the selective-domain net allowlist is implemented in netns.ts +// (bwrap own-netns + slirp4netns userspace NAT + allowlisting DNS proxy); this +// module just emits the bwrap args (--unshare-net + the resolv.conf bind) that +// netns.ts orchestrates. deny-all-net and full-net modes work standalone here. // Windows: disabled per §0.2. import { homedir, platform } from 'node:os'; @@ -140,6 +142,14 @@ export interface BwrapArgsOpts { dnsProxyPort?: number; /** Path to a generated resolv.conf to bind into the sandbox. */ resolvConfPath?: string; + /** + * In-sandbox destination for the resolv.conf bind. Defaults to + * `/etc/resolv.conf`, but on systemd hosts that path is a dangling symlink + * (→ /run/systemd/resolve/stub-resolv.conf) which bwrap can't create a bind + * target for. The orchestrator resolves the symlink (realpath) and passes the + * real path here so the preserved /etc/resolv.conf symlink leads to our file. + */ + resolvConfDest?: string; } export function buildLinuxBwrapArgs( @@ -185,7 +195,7 @@ export function buildLinuxBwrapArgs( } else if (whitelisted) { args.push('--unshare-net'); if (opts.resolvConfPath) { - args.push('--ro-bind', opts.resolvConfPath, '/etc/resolv.conf'); + args.push('--ro-bind', opts.resolvConfPath, opts.resolvConfDest ?? '/etc/resolv.conf'); } } diff --git a/scripts/sandbox-net-poc.sh b/scripts/sandbox-net-poc.sh deleted file mode 100755 index a7f3fb2..0000000 --- a/scripts/sandbox-net-poc.sh +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env bash -# DIAGNOSTIC PoC for the Linux selective-network-allowlist sandbox. -# NOT part of the product — a throwaway probe run in CI (Linux only) to nail -# down the exact bwrap + slirp4netns + DNS-proxy mechanics before encoding them -# in TypeScript (packages/core/src/sandbox/netns.ts). Removed once proven. -# -# Proves the full flow: -# bwrap --unshare-net (own netns, no connectivity) -# + slirp4netns (userspace NAT → outbound connectivity, rootless) -# + allowlisting DNS (resolv.conf → host proxy; NXDOMAIN for non-allowed) -# => allowed domain resolves+connects; denied domain fails to resolve. -# -# HANG-PROOF: no blocking FIFO (a `sleep` window covers slirp configuration), -# a background watchdog hard-kills the sandbox, and a trap always cleans up. -# Everything is best-effort + verbose so one CI run reveals what works. - -set -uo pipefail - -say() { echo ""; echo "===== $* ====="; } - -DNS_PID=""; SLIRP_PID=""; BWRAP_PID=""; WATCH_PID=""; WORK="" -cleanup() { - [ -n "$WATCH_PID" ] && kill "$WATCH_PID" 2>/dev/null || true - [ -n "$BWRAP_PID" ] && kill "$BWRAP_PID" 2>/dev/null || true - [ -n "$SLIRP_PID" ] && kill "$SLIRP_PID" 2>/dev/null || true - [ -n "$DNS_PID" ] && kill "$DNS_PID" 2>/dev/null || true - [ -n "$WORK" ] && rm -rf "$WORK" 2>/dev/null || true -} -trap cleanup EXIT - -say "versions" -bwrap --version || true -slirp4netns --version || true -python3 --version || true - -say "bwrap help — fd handshake flags" -bwrap --help 2>&1 | grep -iE "info-fd|block-fd|sync-fd|userns-block|unshare-net|--chdir" || true - -say "slirp4netns help — config/ready/dns/userns flags" -slirp4netns --help 2>&1 | grep -iE "ready-fd|configure|disable-host-loopback|mtu|netns|userns|--dns|outbound" || true - -say "unprivileged port start (need <=53 to bind :53 rootless)" -sysctl net.ipv4.ip_unprivileged_port_start 2>/dev/null || true -sudo sysctl -w net.ipv4.ip_unprivileged_port_start=53 || true - -# ── workspace ─────────────────────────────────────────────────────────────── -WORK="$(mktemp -d)" -CWD="$WORK/cwd"; mkdir -p "$CWD" -echo "nameserver 10.0.2.2" > "$WORK/resolv.conf" # slirp gateway → host loopback -echo "WORK=$WORK" - -# /etc/resolv.conf is usually a dangling symlink (→ /run/systemd/resolve/...) -# which bwrap can't create a bind target for. Bind our file at the symlink's -# RESOLVED real path so the preserved /etc/resolv.conf symlink leads to it. -say "resolv.conf shape on host" -ls -l /etc/resolv.conf || true -RP="$(readlink -f /etc/resolv.conf 2>/dev/null || true)" -[ -n "$RP" ] || RP=/etc/resolv.conf -echo "resolv real path RP=$RP" - -# ── allowlisting DNS proxy on host 127.0.0.1:53 (allow example.com only) ────── -say "start allowlist DNS proxy on 127.0.0.1:53" -cat > "$WORK/dns.py" <<'PY' -import socket -ALLOW = {"example.com", "www.example.com"} -UP = ("1.1.1.1", 53) -def qname(b): - i, parts = 12, [] - while i < len(b): - n = b[i] - if n == 0: break - parts.append(b[i+1:i+1+n].decode("latin1")); i += 1+n - return ".".join(parts).lower().rstrip(".") -s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) -s.bind(("127.0.0.1", 53)) -print("dns-proxy listening :53", flush=True) -while True: - try: - data, addr = s.recvfrom(2048) - name = qname(data) - if name in ALLOW: - print(f"ALLOW {name}", flush=True) - u = socket.socket(socket.AF_INET, socket.SOCK_DGRAM); u.settimeout(5) - u.sendto(data, UP) - try: resp,_ = u.recvfrom(2048); s.sendto(resp, addr) - except Exception as e: print("upstream err", e, flush=True) - u.close() - else: - print(f"DENY {name}", flush=True) - r = bytearray(data[:12]); r[2]=0x81; r[3]=0x83 # QR=1, RCODE=3 NXDOMAIN - s.sendto(bytes(r)+data[12:], addr) - except Exception as e: - print("proxy err", e, flush=True) -PY -python3 "$WORK/dns.py" & -DNS_PID=$! -sleep 0.5 -kill -0 "$DNS_PID" 2>/dev/null || echo "!! DNS proxy failed to start (port 53 bind?)" - -# ── bwrap with own netns; inner cmd sleeps to let slirp configure, then curls ─ -say "spawn bwrap (--unshare-net), capture child-pid via --info-fd" -INNER='sleep 3 -echo "--- inside sandbox: interfaces ---" -ip addr 2>/dev/null | grep -E "tap0|inet " || echo "(no ip tool / no addrs)" -echo "--- resolv.conf ---"; cat /etc/resolv.conf -echo "--- curl ALLOWED (example.com) ---" -curl -sS --max-time 10 -o /dev/null -w "allowed_http=%{http_code}\n" https://example.com 2>&1 || echo "allowed_curl_exit=$?" -echo "--- curl DENIED (github.com) ---" -curl -sS --max-time 10 -o /dev/null -w "denied_http=%{http_code}\n" https://github.com 2>&1 || echo "denied_curl_exit=$?" -echo "--- sandbox inner done ---"' - -# NOTE: --uid 0 --gid 0 maps the sandbox to root inside its userns (matches the -# slirp4netns README bwrap example). This PoC tries the README-exact slirp -# incantation (plain PID, no --userns-path) since --userns-path did not help. -bwrap \ - --ro-bind-try /usr /usr --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \ - --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /etc /etc \ - --proc /proc --dev /dev --tmpfs /tmp \ - --ro-bind "$WORK/resolv.conf" "$RP" \ - --bind "$CWD" "$CWD" \ - --uid 0 --gid 0 \ - --unshare-net --unshare-pid --unshare-ipc --unshare-uts \ - --new-session --die-with-parent \ - --info-fd 8 \ - /bin/sh -c "$INNER" 8>"$WORK/info.json" & -BWRAP_PID=$! -echo "bwrap host pid=$BWRAP_PID" - -# watchdog: hard-kill the sandbox after 45s no matter what -( sleep 45; kill "$BWRAP_PID" 2>/dev/null ) & -WATCH_PID=$! - -# wait for info.json, extract child-pid -for _ in $(seq 1 50); do [ -s "$WORK/info.json" ] && break; sleep 0.1; done -echo "--- info.json ---"; cat "$WORK/info.json" 2>/dev/null || echo "(empty)" -CHILD_PID="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["child-pid"])' "$WORK/info.json" 2>/dev/null || echo "")" -echo "child-pid=$CHILD_PID" - -say "namespace topology — is bwrap's netns owned by a userns slirp can enter?" -echo "host user ns: $(readlink /proc/self/ns/user 2>/dev/null)" -echo "host net ns: $(readlink /proc/self/ns/net 2>/dev/null)" -if [ -n "$CHILD_PID" ]; then - echo "child user ns: $(readlink /proc/$CHILD_PID/ns/user 2>/dev/null)" - echo "child net ns: $(readlink /proc/$CHILD_PID/ns/net 2>/dev/null)" - echo "child uid_map: $(tr '\n' '|' < /proc/$CHILD_PID/uid_map 2>/dev/null)" - echo "child gid_map: $(tr '\n' '|' < /proc/$CHILD_PID/gid_map 2>/dev/null)" - # ioctl NS_GET_USERNS would tell the owning userns; approximate via lsns - command -v lsns >/dev/null && lsns -p "$CHILD_PID" 2>/dev/null || true -fi - -if [ -n "$CHILD_PID" ]; then - # README-exact incantation: plain PID, --configure, no userns-path/disable-dns. - say "ATTEMPT [readme]: slirp4netns --configure --mtu=65520 $CHILD_PID tap0" - slirp4netns --configure --mtu=65520 "$CHILD_PID" tap0 & - SLIRP_PID=$! - echo "slirp pid=$SLIRP_PID (inner sleeps 3s to let it configure)" -else - echo "!! no child-pid; cannot attach slirp (curls will fail)" -fi - -say "wait for sandbox to finish (bounded by 45s watchdog)" -wait "$BWRAP_PID" 2>/dev/null; echo "bwrap exit=$?" -echo "PoC done." From 228de55d667f09613d5758304ab3fddefdbda46d Mon Sep 17 00:00:00 2001 From: oratis Date: Mon, 1 Jun 2026 23:54:31 +0800 Subject: [PATCH 7/7] fix(sandbox): swallow stream errors on netns teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The integration test's assertions passed but the run failed: SIGTERM-ing slirp4netns / bwrap in close() reset their stdio pipes, emitting `read ECONNRESET` with no 'error' listener → vitest flagged 2 unhandled errors. Attach no-op 'error' handlers to both child processes and all their stdio streams so teardown resets are absorbed. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/sandbox/netns.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/packages/core/src/sandbox/netns.ts b/packages/core/src/sandbox/netns.ts index 4f48279..6a2137b 100644 --- a/packages/core/src/sandbox/netns.ts +++ b/packages/core/src/sandbox/netns.ts @@ -153,6 +153,10 @@ export async function spawnNetworkSandbox( stdio: ['ignore', 'pipe', 'pipe', 'pipe', 'pipe'], cwd: opts.cwd, }); + // Swallow stream/process errors so a SIGTERM-induced ECONNRESET on the info / + // block / stdio pipes during teardown doesn't surface as an unhandled error. + ignoreErrors(child); + child.stdio.forEach((s) => ignoreErrors(s)); let slirp: ChildProcess | undefined; let closed = false; @@ -187,6 +191,8 @@ export async function spawnNetworkSandbox( ], { stdio: ['ignore', 'pipe', 'pipe', 'pipe'] }, ); + ignoreErrors(slirp); + slirp.stdio.forEach((s) => ignoreErrors(s)); pipeLog(slirp.stdio[1] as Readable | null, '[slirp]', log); pipeLog(slirp.stdio[2] as Readable | null, '[slirp!]', log); @@ -284,6 +290,17 @@ function pipeLog(fd: Readable | null, prefix: string, log: (s: string) => void): }); } +/** + * Attach a no-op 'error' listener so a stream/process error during teardown + * (e.g. ECONNRESET on the stdio pipes when slirp/bwrap is SIGTERM'd) doesn't + * bubble up as an unhandled error. Accepts ChildProcess, streams, or null. + */ +function ignoreErrors( + emitter: { on(event: 'error', cb: (err: unknown) => void): unknown } | null | undefined, +): void { + emitter?.on('error', () => {}); +} + function killQuietly(proc: ChildProcess | undefined): void { if (proc && !proc.killed) { try {