From 0d0682b04e5ae1d7ffe6fe3111d087e912a6dbe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20T=C3=B3th?= Date: Sun, 12 Apr 2026 15:58:50 +0200 Subject: [PATCH] Replace --new-session with seccomp TIOCSTI/TIOCLINUX filter --- src/sandbox.rs | 1 - src/seccomp.rs | 30 ++++++++++++++++++++---- tests/integration.rs | 54 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/src/sandbox.rs b/src/sandbox.rs index 6091291..277d881 100644 --- a/src/sandbox.rs +++ b/src/sandbox.rs @@ -40,7 +40,6 @@ pub fn build_command(config: &SandboxConfig) -> Result { add_user_env_overrides(&mut cmd, config); cmd.args(["--remount-ro", "/"]); - cmd.arg("--new-session"); cmd.arg("--die-with-parent"); cmd.arg("--chdir").arg(&config.chdir); diff --git a/src/seccomp.rs b/src/seccomp.rs index 32fd7d5..d3d7ab2 100644 --- a/src/seccomp.rs +++ b/src/seccomp.rs @@ -6,12 +6,15 @@ //! Simplifications vs upstream (intentional): //! - No capability-conditional rules (we never grant capabilities, so all //! of Podman's `caps` blocks collapse to "deny" — we just omit them). -//! - No argument filters. `personality` stays out of the allowlist; `socket` -//! is allowed unconditionally rather than gated on AF_VSOCK. //! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS //! via the default action. //! - x86_64 and aarch64 only. //! +//! Argument filters (modelled after Flatpak's seccomp policy): +//! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input +//! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the +//! snapd bypass (CVE-2019-10063). +//! //! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's //! stance and is critical for the glibc clone3 -> clone fallback path. //! @@ -26,7 +29,8 @@ use std::os::fd::{FromRawFd, IntoRawFd, RawFd}; use std::str::FromStr; use seccompiler::{ - BackendError, BpfProgram, SeccompAction, SeccompFilter, SeccompRule, TargetArch, sock_filter, + BackendError, BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition, + SeccompFilter, SeccompRule, TargetArch, sock_filter, }; use syscalls::Sysno; @@ -65,7 +69,12 @@ fn build_program_bytes() -> Result, SandboxError> { let mut rules: BTreeMap> = BTreeMap::new(); for name in ALLOWED_SYSCALLS { if let Ok(sysno) = Sysno::from_str(name) { - rules.insert(i64::from(sysno.id()), vec![]); + let nr = i64::from(sysno.id()); + let filtered = match *name { + "ioctl" => ioctl_rules().map_err(|e| SandboxError::Seccomp(e.to_string()))?, + _ => vec![], + }; + rules.insert(nr, filtered); } } let filter = SeccompFilter::new( @@ -81,6 +90,19 @@ fn build_program_bytes() -> Result, SandboxError> { Ok(serialize(&program)) } +const TIOCSTI: u64 = 0x5412; +const TIOCLINUX: u64 = 0x541C; + +/// Allow ioctl except for TIOCSTI and TIOCLINUX terminal injection attacks. +/// Dword (32-bit) comparison prevents the CVE-2019-10063 bypass where the +/// kernel ignores the high 32 bits of the ioctl command argument. +fn ioctl_rules() -> Result, BackendError> { + Ok(vec![SeccompRule::new(vec![ + SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCSTI)?, + SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCLINUX)?, + ])?]) +} + fn current_target_arch() -> Result { match std::env::consts::ARCH { "x86_64" => Ok(TargetArch::x86_64), diff --git a/tests/integration.rs b/tests/integration.rs index 429232e..7f69e6b 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -417,8 +417,8 @@ fn whitelist_sys_is_readable() { } #[test] -fn new_session_isolates_sid() { - let inner_sid = read_sid_inside_sandbox(&[]); +fn hardened_isolates_sid() { + let inner_sid = read_sid_inside_sandbox(&["--hardened"]); let outer_sid = read_sid_current_process(); assert_ne!( @@ -427,6 +427,17 @@ fn new_session_isolates_sid() { ); } +#[test] +fn default_mode_shares_session() { + let inner_sid = read_sid_inside_sandbox(&[]); + let outer_sid = read_sid_current_process(); + + assert_eq!( + inner_sid, outer_sid, + "default-mode sandbox should share the session ID (got {inner_sid} != {outer_sid})" + ); +} + #[test] fn blacklist_run_is_tmpfs() { let output = sandbox(&[]) @@ -1331,3 +1342,42 @@ fn seccomp_bash_pthread_fallback_works() { String::from_utf8_lossy(&output.stderr) ); } + +#[test] +fn seccomp_blocks_tiocsti() { + // TIOCSTI (0x5412) injects keystrokes into the terminal input queue. + // Without --new-session, this is the primary defense against CVE-2017-5226. + // + // On kernels >= 6.2 with CONFIG_LEGACY_TIOCSTI=n, the kernel blocks TIOCSTI + // before seccomp sees it. We test with --no-seccomp first to detect that and + // skip, so the test only asserts our filter's behaviour. + let baseline = sandbox(&["--no-seccomp"]) + .args([ + "--", + "python3", + "-c", + "import fcntl; fcntl.ioctl(0, 0x5412, b'x')", + ]) + .output() + .expect("agent-sandbox binary failed to execute"); + + if !baseline.status.success() { + // Kernel already blocks TIOCSTI; seccomp filter is untestable here. + return; + } + + let output = sandbox(&[]) + .args([ + "--", + "python3", + "-c", + "import fcntl; fcntl.ioctl(0, 0x5412, b'x')", + ]) + .output() + .expect("agent-sandbox binary failed to execute"); + + assert!( + !output.status.success(), + "expected TIOCSTI to be blocked by seccomp filter" + ); +}