From 12644ae31e6b313549d3b0d0b64b163371df848d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20T=C3=B3th?= Date: Wed, 8 Apr 2026 08:34:34 +0200 Subject: [PATCH] Apply a seccomp-BPF syscall allowlist by default Derived from Podman's default profile, stripped of capability-conditional rules (we never grant capabilities), argument filters, and the explicit EPERM block. Dangerous syscalls (mount, unshare, ptrace, bpf, perf_event_open, io_uring_*, keyctl, kexec_*, ...) fall through to the default ENOSYS action, which also keeps glibc's clone3 -> clone fallback working. x86_64 and aarch64 are supported; other archs error out. Toggle with --seccomp / --no-seccomp or seccomp = in config. --- Cargo.lock | 18 ++ Cargo.toml | 3 + README.md | 4 + src/cli.rs | 8 + src/config.rs | 71 ++++++++ src/errors.rs | 7 + src/lib.rs | 2 + src/sandbox.rs | 11 ++ src/seccomp.rs | 194 ++++++++++++++++++++ src/seccomp_allowlist.in | 369 +++++++++++++++++++++++++++++++++++++++ tests/integration.rs | 85 +++++++++ 11 files changed, 772 insertions(+) create mode 100644 src/seccomp.rs create mode 100644 src/seccomp_allowlist.in diff --git a/Cargo.lock b/Cargo.lock index b2ed16d..24ac26e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,8 +8,11 @@ version = "0.1.0" dependencies = [ "clap", "glob", + "libc", + "seccompiler", "serde", "shlex", + "syscalls", "tempfile", "toml", ] @@ -315,6 +318,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "seccompiler" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae55de56877481d112a559bbc12667635fdaf5e005712fd4e2b2fa50ffc884" +dependencies = [ + "libc", +] + [[package]] name = "semver" version = "1.0.27" @@ -396,6 +408,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syscalls" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81c645a4de0d803ced6ef0388a2646aa1ef8467173b5d59a2c33c88de4ab76e7" + [[package]] name = "tempfile" version = "3.27.0" diff --git a/Cargo.toml b/Cargo.toml index 5305929..5e65a91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,8 +14,11 @@ path = "src/main.rs" [dependencies] clap = { version = "4", features = ["derive"] } glob = "0.3" +libc = "0.2" +seccompiler = "0.5" serde = { version = "1", features = ["derive"] } shlex = "1.3.0" +syscalls = { version = "0.8", default-features = false, features = ["std"] } toml = "1" [dev-dependencies] diff --git a/README.md b/README.md index 8da6a8f..1390450 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,10 @@ The threat model is prompt injection and accidental damage, not a determined att **Not protected in blacklist mode:** arbitrary readable files outside the sensitive paths list, and D-Bus method calls (access control is daemon-side). +## Seccomp + +Both modes apply a seccomp-BPF syscall allowlist derived from Podman's default profile. Dangerous syscalls (`mount`, `unshare`, `ptrace`, `bpf`, `perf_event_open`, `io_uring_*`, `keyctl`, `kexec_*`, …) return `ENOSYS`. Disable with `--no-seccomp` or `seccomp = false` in the config file. + ## Configuration file Settings can be stored in a TOML config file at `$XDG_CONFIG_HOME/agent-sandbox/config.toml` (or pass `--config `). Use `--no-config` to skip loading it. The config file accepts the same options as the corresponding CLI flags. diff --git a/src/cli.rs b/src/cli.rs index ccbf64c..e530b95 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -34,6 +34,14 @@ pub struct Args { #[arg(long, overrides_with = "unshare_net")] pub share_net: bool, + /// Enable seccomp syscall filtering (on by default; overrides config-file `seccomp = false`) + #[arg(long, overrides_with = "no_seccomp")] + pub seccomp: bool, + + /// Disable seccomp syscall filtering (overrides config-file `seccomp = true`) + #[arg(long, overrides_with = "seccomp")] + pub no_seccomp: bool, + /// Bind an extra path read-write (repeatable) #[arg(long = "rw", value_name = "PATH", action = clap::ArgAction::Append)] pub extra_rw: Vec, diff --git a/src/config.rs b/src/config.rs index 297acea..b52e314 100644 --- a/src/config.rs +++ b/src/config.rs @@ -38,6 +38,12 @@ pub fn build(args: Args, file_config: Option) -> Result, profile: Option, globals: Option) - cli.or(profile).or(globals).unwrap_or(false) } +fn merge_flag_with_default( + cli: Option, + profile: Option, + globals: Option, + default: bool, +) -> bool { + cli.or(profile).or(globals).unwrap_or(default) +} + fn merge_flag_pair(enable: bool, disable: bool) -> Option { if enable { Some(true) @@ -244,6 +259,7 @@ pub struct Options { pub whitelist: Option, pub hardened: Option, pub unshare_net: Option, + pub seccomp: Option, pub entrypoint: Option, pub command: Option, pub dry_run: Option, @@ -380,6 +396,7 @@ mod tests { const FULL_CONFIG_TOML: &str = r#" hardened = true unshare-net = true + seccomp = false rw = ["/tmp/a", "/tmp/b"] command = "zsh" @@ -403,6 +420,7 @@ mod tests { fn globals_scalars() { assert_eq!(CONFIG.options.hardened, Some(true)); assert_eq!(CONFIG.options.unshare_net, Some(true)); + assert_eq!(CONFIG.options.seccomp, Some(false)); } #[test] @@ -522,6 +540,59 @@ mod tests { assert!(config.unshare_net); } + #[test] + fn build_seccomp_default_is_true() { + let config = build(args_with_command(), None).unwrap(); + assert!(config.seccomp); + } + + #[test] + fn build_seccomp_disabled_via_config() { + let file_config = FileConfig { + options: Options { + seccomp: Some(false), + ..Options::default() + }, + ..FileConfig::default() + }; + let config = build(args_with_command(), Some(file_config)).unwrap(); + assert!(!config.seccomp); + } + + #[test] + fn build_cli_seccomp_overrides_profile() { + let file_config = FileConfig { + options: Options { + seccomp: Some(false), + ..Options::default() + }, + ..FileConfig::default() + }; + let args = Args { + seccomp: true, + ..args_with_command() + }; + let config = build(args, Some(file_config)).unwrap(); + assert!(config.seccomp); + } + + #[test] + fn build_cli_no_seccomp_overrides_profile() { + let file_config = FileConfig { + options: Options { + seccomp: Some(true), + ..Options::default() + }, + ..FileConfig::default() + }; + let args = Args { + no_seccomp: true, + ..args_with_command() + }; + let config = build(args, Some(file_config)).unwrap(); + assert!(!config.seccomp); + } + #[test] fn build_cli_no_hardened_overrides_profile() { let file_config = FileConfig { diff --git a/src/errors.rs b/src/errors.rs index 530255b..98a5523 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -26,6 +26,8 @@ pub enum SandboxError { ConfigPathNotAbsolute(PathBuf), InvalidBwrapArg(String), NoCommand, + Seccomp(String), + SeccompUnsupportedArch(String), } impl std::fmt::Display for SandboxError { @@ -74,6 +76,11 @@ impl std::fmt::Display for SandboxError { f, "no command to run; specify a command via config, entrypoint, or pass one after --" ), + Self::Seccomp(msg) => write!(f, "failed to build seccomp filter: {msg}"), + Self::SeccompUnsupportedArch(arch) => write!( + f, + "seccomp filtering is not supported on this architecture: {arch} (use --no-seccomp to disable)" + ), } } } diff --git a/src/lib.rs b/src/lib.rs index 4ed6702..303d09e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ pub mod config; mod errors; mod preflight; mod sandbox; +mod seccomp; pub use errors::SandboxError; @@ -23,6 +24,7 @@ pub struct SandboxConfig { pub mode: SandboxMode, pub hardened: bool, pub unshare_net: bool, + pub seccomp: bool, pub extra_rw: Vec, pub extra_ro: Vec, pub mask: Vec, diff --git a/src/sandbox.rs b/src/sandbox.rs index f28cf32..f957978 100644 --- a/src/sandbox.rs +++ b/src/sandbox.rs @@ -3,6 +3,7 @@ use std::process::Command; use crate::agents; use crate::blacklist; +use crate::seccomp; use crate::{SandboxConfig, SandboxError, SandboxMode}; pub fn build_command(config: &SandboxConfig) -> Result { @@ -41,6 +42,10 @@ pub fn build_command(config: &SandboxConfig) -> Result { apply_masks(&mut cmd, &config.mask); + if config.seccomp { + add_seccomp_filter(&mut cmd)?; + } + cmd.args(&config.bwrap_args); cmd.arg("--") @@ -194,3 +199,9 @@ fn add_ro_bind(cmd: &mut Command, path: &Path) -> Result<(), SandboxError> { fn resolve_bind_source(path: &Path) -> Result { std::fs::canonicalize(path).map_err(|_| SandboxError::PathMissing(path.to_path_buf())) } + +fn add_seccomp_filter(cmd: &mut Command) -> Result<(), SandboxError> { + let fd = seccomp::write_program_to_memfd()?; + cmd.arg("--seccomp").arg(fd.to_string()); + Ok(()) +} diff --git a/src/seccomp.rs b/src/seccomp.rs new file mode 100644 index 0000000..32fd7d5 --- /dev/null +++ b/src/seccomp.rs @@ -0,0 +1,194 @@ +//! seccomp BPF allowlist for sandboxed processes. +//! +//! Derived from Podman's containers/common default profile: +//! +//! +//! Simplifications vs upstream (intentional): +//! - No capability-conditional rules (we never grant capabilities, so all +//! of Podman's `caps` blocks collapse to "deny" — we just omit them). +//! - No argument filters. `personality` stays out of the allowlist; `socket` +//! is allowed unconditionally rather than gated on AF_VSOCK. +//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS +//! via the default action. +//! - x86_64 and aarch64 only. +//! +//! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's +//! stance and is critical for the glibc clone3 -> clone fallback path. +//! +//! Syscall name -> number resolution uses the `syscalls` crate. Names that +//! don't exist on the host architecture (e.g. legacy 32-bit aliases like +//! `_llseek`, or aarch64-only `set_tls` when building on x86_64) are silently +//! skipped — they would just return ENOSYS anyway under the default action. + +use std::collections::BTreeMap; +use std::io::{Seek, SeekFrom, Write}; +use std::os::fd::{FromRawFd, IntoRawFd, RawFd}; +use std::str::FromStr; + +use seccompiler::{ + BackendError, BpfProgram, SeccompAction, SeccompFilter, SeccompRule, TargetArch, sock_filter, +}; +use syscalls::Sysno; + +use crate::SandboxError; + +/// Syscall allowlist. Includes Podman's unconditional allow set (minus syscalls +/// we deny on top, see module docs) plus arch-specific syscalls for the targets +/// we support. Names absent from the host arch are skipped at filter-build time. +const ALLOWED_SYSCALLS: &[&str] = &include!("seccomp_allowlist.in"); + +/// Build a seccomp BPF program, write it to an anonymous in-memory file, and +/// return the raw fd. The fd is intentionally leaked from Rust's ownership: it +/// is created without `MFD_CLOEXEC` so it survives `exec` into bwrap, which +/// inherits and closes it after reading the filter. +pub fn write_program_to_memfd() -> Result { + let bytes = build_program_bytes()?; + + // Safety: memfd_create is a normal Linux syscall. We pass a valid C string + // and flags=0, so the fd is created without MFD_CLOEXEC and survives exec + // into bwrap. The name is only a debugging label (shows up in /proc//maps). + let raw_fd = unsafe { libc::memfd_create(c"agent-sandbox-seccomp".as_ptr(), 0) }; + if raw_fd < 0 { + return Err(SandboxError::Io(std::io::Error::last_os_error())); + } + + // Safety: raw_fd is owned by us and currently uniquely held. + let mut file = unsafe { std::fs::File::from_raw_fd(raw_fd) }; + file.write_all(&bytes)?; + file.seek(SeekFrom::Start(0))?; + // into_raw_fd consumes the File without closing the underlying fd. + Ok(file.into_raw_fd()) +} + +fn build_program_bytes() -> Result, SandboxError> { + let target_arch = current_target_arch()?; + let mut rules: BTreeMap> = BTreeMap::new(); + for name in ALLOWED_SYSCALLS { + if let Ok(sysno) = Sysno::from_str(name) { + rules.insert(i64::from(sysno.id()), vec![]); + } + } + let filter = SeccompFilter::new( + rules, + SeccompAction::Errno(libc::ENOSYS as u32), + SeccompAction::Allow, + target_arch, + ) + .map_err(|e| SandboxError::Seccomp(e.to_string()))?; + let program: BpfProgram = filter + .try_into() + .map_err(|e: BackendError| SandboxError::Seccomp(e.to_string()))?; + Ok(serialize(&program)) +} + +fn current_target_arch() -> Result { + match std::env::consts::ARCH { + "x86_64" => Ok(TargetArch::x86_64), + "aarch64" => Ok(TargetArch::aarch64), + other => Err(SandboxError::SeccompUnsupportedArch(other.to_string())), + } +} + +fn serialize(program: &[sock_filter]) -> Vec { + // Flatten the in-memory BpfProgram into the raw byte stream the kernel's + // seccomp(2) interface expects. A BpfProgram is &[sock_filter], where + // sock_filter is the classic-BPF instruction format from : + // + // struct sock_filter { + // __u16 code; // opcode + // __u8 jt; // jump-if-true offset + // __u8 jf; // jump-if-false offset + // __u32 k; // generic immediate operand + // }; + // + // Exactly 8 bytes, no padding. Native endian because producer and consumer + // are the same machine — there is no cross-host serialization. + let mut out = Vec::with_capacity(program.len() * 8); + for insn in program { + out.extend_from_slice(&insn.code.to_ne_bytes()); + out.push(insn.jt); + out.push(insn.jf); + out.extend_from_slice(&insn.k.to_ne_bytes()); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_on_supported_arch() { + let bytes = build_program_bytes().expect("seccomp program should build"); + assert!(!bytes.is_empty(), "serialized BPF program is empty"); + assert_eq!(bytes.len() % 8, 0, "BPF byte stream must be 8-byte aligned"); + } + + #[test] + fn allowlist_contains_essential_syscalls() { + for needed in &[ + "read", + "write", + "openat", + "close", + "execve", + "exit_group", + "mmap", + "brk", + "clone", + ] { + assert!( + ALLOWED_SYSCALLS.contains(needed), + "allowlist missing essential syscall: {needed}" + ); + } + } + + #[test] + fn allowlist_excludes_dangerous_syscalls() { + for denied in &[ + "bpf", + "perf_event_open", + "userfaultfd", + "kexec_load", + "kexec_file_load", + "init_module", + "finit_module", + "delete_module", + "mount", + "umount", + "umount2", + "unshare", + "setns", + "pivot_root", + "ptrace", + "process_vm_readv", + "process_vm_writev", + "keyctl", + "personality", + "clone3", + "io_uring_setup", + "io_uring_register", + "io_uring_enter", + "fanotify_init", + "fanotify_mark", + "open_by_handle_at", + "name_to_handle_at", + "fsopen", + "fsconfig", + "fsmount", + "fspick", + "open_tree", + "move_mount", + "mount_setattr", + "reboot", + "swapon", + "swapoff", + ] { + assert!( + !ALLOWED_SYSCALLS.contains(denied), + "allowlist must not contain dangerous syscall: {denied}" + ); + } + } +} diff --git a/src/seccomp_allowlist.in b/src/seccomp_allowlist.in new file mode 100644 index 0000000..20c6b01 --- /dev/null +++ b/src/seccomp_allowlist.in @@ -0,0 +1,369 @@ +// Syscall allowlist included verbatim by src/seccomp.rs via include!. +// +// Source: Podman containers/common default profile, block 1 (the unconditional +// allow set), minus the deny list documented in src/seccomp.rs, plus `socket` +// (which Podman gates on argument filters we deliberately don't replicate) and +// the x86_64/aarch64 arch-specific syscalls Podman ships in its arch blocks. +// +// Names absent from the host architecture (e.g. legacy 32-bit aliases like +// `_llseek`, or aarch64-only `set_tls` when building on x86_64) are skipped at +// filter-build time by the syscalls crate's name resolver. +[ + // Core syscalls (Podman allow block minus our extra denies plus socket) + "_llseek", + "_newselect", + "accept", + "accept4", + "access", + "adjtimex", + "alarm", + "bind", + "brk", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "clock_adjtime", + "clock_adjtime64", + "clock_getres", + "clock_getres_time64", + "clock_gettime", + "clock_gettime64", + "clock_nanosleep", + "clock_nanosleep_time64", + "clone", + "close", + "close_range", + "connect", + "copy_file_range", + "creat", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_pwait2", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "faccessat2", + "fadvise64", + "fadvise64_64", + "fallocate", + "fchdir", + "fchmod", + "fchmodat", + "fchmodat2", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsetxattr", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futex_time64", + "futimesat", + "get_robust_list", + "get_thread_area", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "io_destroy", + "io_getevents", + "io_setup", + "io_submit", + "ioctl", + "ioprio_get", + "ioprio_set", + "ipc", + "kill", + "landlock_add_rule", + "landlock_create_ruleset", + "landlock_restrict_self", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listxattr", + "llistxattr", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "membarrier", + "memfd_create", + "memfd_secret", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "mmap", + "mmap2", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedreceive_time64", + "mq_timedsend", + "mq_timedsend_time64", + "mq_unlink", + "mremap", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "nanosleep", + "newfstatat", + "open", + "openat", + "openat2", + "pause", + "pidfd_getfd", + "pidfd_open", + "pidfd_send_signal", + "pipe", + "pipe2", + "pkey_alloc", + "pkey_free", + "pkey_mprotect", + "poll", + "ppoll", + "ppoll_time64", + "prctl", + "pread64", + "preadv", + "preadv2", + "prlimit64", + "process_mrelease", + "pselect6", + "pselect6_time64", + "pwrite64", + "pwritev", + "pwritev2", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "recv", + "recvfrom", + "recvmmsg", + "recvmmsg_time64", + "recvmsg", + "remap_file_pages", + "removexattr", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "rmdir", + "rseq", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_sigtimedwait_time64", + "rt_tgsigqueueinfo", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_rr_get_interval_time64", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "semtimedop_time64", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "set_robust_list", + "set_thread_area", + "set_tid_address", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "setsid", + "setsockopt", + "setuid", + "setuid32", + "setxattr", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signal", + "signalfd", + "signalfd4", + "sigprocmask", + "sigreturn", + "socket", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "statx", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "syncfs", + "sysinfo", + "syslog", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timer_getoverrun", + "timer_gettime", + "timer_gettime64", + "timer_settime", + "timer_settime64", + "timerfd_create", + "timerfd_gettime", + "timerfd_gettime64", + "timerfd_settime", + "timerfd_settime64", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "uname", + "unlink", + "unlinkat", + "utime", + "utimensat", + "utimensat_time64", + "utimes", + "vfork", + "wait4", + "waitid", + "waitpid", + "write", + "writev", + // x86_64-specific + "arch_prctl", + "modify_ldt", + // aarch64-specific + "arm_fadvise64_64", + "arm_sync_file_range", + "breakpoint", + "cacheflush", + "set_tls", + "sync_file_range2", +] diff --git a/tests/integration.rs b/tests/integration.rs index d5c6435..3c6ff21 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -879,3 +879,88 @@ fn mask_nonexistent_path_becomes_tmpfs() { "tmpfs writes should not leak to host" ); } + +#[test] +fn seccomp_on_by_default_blocks_unshare() { + let output = sandbox(&[]) + .args(["--", "unshare", "--user", "--map-root-user", "/bin/true"]) + .output() + .expect("agent-sandbox binary failed to execute"); + + assert!( + !output.status.success(), + "expected unshare(2) to be blocked by default seccomp filter, but it succeeded" + ); +} + +#[test] +fn seccomp_off_allows_blocked_syscall() { + let output = sandbox(&["--no-seccomp"]) + .args(["--", "unshare", "--user", "--map-root-user", "/bin/true"]) + .output() + .expect("agent-sandbox binary failed to execute"); + + assert!( + output.status.success(), + "expected unshare(2) to succeed without seccomp, stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn seccomp_dry_run_emits_seccomp_arg() { + let output = sandbox(&["--dry-run"]) + .args(["--", "/bin/true"]) + .output() + .expect("agent-sandbox binary failed to execute"); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("--seccomp"), + "expected --seccomp in dry-run output, got: {stdout}" + ); +} + +#[test] +fn seccomp_dry_run_no_seccomp_omits_arg() { + let output = sandbox(&["--dry-run", "--no-seccomp"]) + .args(["--", "/bin/true"]) + .output() + .expect("agent-sandbox binary failed to execute"); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + !stdout.contains("--seccomp"), + "expected no --seccomp in dry-run output with --no-seccomp, got: {stdout}" + ); +} + +#[test] +fn seccomp_normal_workload_succeeds() { + let output = sandbox(&[]) + .args(["--", "bash", "-c", "ls /etc > /dev/null && date"]) + .output() + .expect("agent-sandbox binary failed to execute"); + + assert!( + output.status.success(), + "expected normal workload to succeed under default seccomp, stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn seccomp_bash_pthread_fallback_works() { + // Verifies the ENOSYS-not-EPERM choice for clone3 doesn't break libc's + // clone3 -> clone fallback path that bash uses internally. + let output = sandbox(&[]) + .args(["--", "bash", "-c", "true"]) + .output() + .expect("agent-sandbox binary failed to execute"); + + assert!( + output.status.success(), + "expected bash to succeed under default seccomp (clone3 fallback), stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +}