Replace --new-session with seccomp TIOCSTI/TIOCLINUX filter

This commit is contained in:
2026-04-12 15:58:50 +02:00
parent 8f30d28965
commit 0d0682b04e
3 changed files with 78 additions and 7 deletions
-1
View File
@@ -40,7 +40,6 @@ pub fn build_command(config: &SandboxConfig) -> Result<Command, SandboxError> {
add_user_env_overrides(&mut cmd, config); add_user_env_overrides(&mut cmd, config);
cmd.args(["--remount-ro", "/"]); cmd.args(["--remount-ro", "/"]);
cmd.arg("--new-session");
cmd.arg("--die-with-parent"); cmd.arg("--die-with-parent");
cmd.arg("--chdir").arg(&config.chdir); cmd.arg("--chdir").arg(&config.chdir);
+26 -4
View File
@@ -6,12 +6,15 @@
//! Simplifications vs upstream (intentional): //! Simplifications vs upstream (intentional):
//! - No capability-conditional rules (we never grant capabilities, so all //! - No capability-conditional rules (we never grant capabilities, so all
//! of Podman's `caps` blocks collapse to "deny" — we just omit them). //! of Podman's `caps` blocks collapse to "deny" — we just omit them).
//! - No argument filters. `personality` stays out of the allowlist; `socket`
//! is allowed unconditionally rather than gated on AF_VSOCK.
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS //! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS
//! via the default action. //! via the default action.
//! - x86_64 and aarch64 only. //! - x86_64 and aarch64 only.
//! //!
//! Argument filters (modelled after Flatpak's seccomp policy):
//! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input
//! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the
//! snapd bypass (CVE-2019-10063).
//!
//! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's //! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's
//! stance and is critical for the glibc clone3 -> clone fallback path. //! stance and is critical for the glibc clone3 -> clone fallback path.
//! //!
@@ -26,7 +29,8 @@ use std::os::fd::{FromRawFd, IntoRawFd, RawFd};
use std::str::FromStr; use std::str::FromStr;
use seccompiler::{ use seccompiler::{
BackendError, BpfProgram, SeccompAction, SeccompFilter, SeccompRule, TargetArch, sock_filter, BackendError, BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition,
SeccompFilter, SeccompRule, TargetArch, sock_filter,
}; };
use syscalls::Sysno; use syscalls::Sysno;
@@ -65,7 +69,12 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new(); let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
for name in ALLOWED_SYSCALLS { for name in ALLOWED_SYSCALLS {
if let Ok(sysno) = Sysno::from_str(name) { if let Ok(sysno) = Sysno::from_str(name) {
rules.insert(i64::from(sysno.id()), vec![]); let nr = i64::from(sysno.id());
let filtered = match *name {
"ioctl" => ioctl_rules().map_err(|e| SandboxError::Seccomp(e.to_string()))?,
_ => vec![],
};
rules.insert(nr, filtered);
} }
} }
let filter = SeccompFilter::new( let filter = SeccompFilter::new(
@@ -81,6 +90,19 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
Ok(serialize(&program)) Ok(serialize(&program))
} }
const TIOCSTI: u64 = 0x5412;
const TIOCLINUX: u64 = 0x541C;
/// Allow ioctl except for TIOCSTI and TIOCLINUX terminal injection attacks.
/// Dword (32-bit) comparison prevents the CVE-2019-10063 bypass where the
/// kernel ignores the high 32 bits of the ioctl command argument.
fn ioctl_rules() -> Result<Vec<SeccompRule>, BackendError> {
Ok(vec![SeccompRule::new(vec![
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCSTI)?,
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCLINUX)?,
])?])
}
fn current_target_arch() -> Result<TargetArch, SandboxError> { fn current_target_arch() -> Result<TargetArch, SandboxError> {
match std::env::consts::ARCH { match std::env::consts::ARCH {
"x86_64" => Ok(TargetArch::x86_64), "x86_64" => Ok(TargetArch::x86_64),
+52 -2
View File
@@ -417,8 +417,8 @@ fn whitelist_sys_is_readable() {
} }
#[test] #[test]
fn new_session_isolates_sid() { fn hardened_isolates_sid() {
let inner_sid = read_sid_inside_sandbox(&[]); let inner_sid = read_sid_inside_sandbox(&["--hardened"]);
let outer_sid = read_sid_current_process(); let outer_sid = read_sid_current_process();
assert_ne!( assert_ne!(
@@ -427,6 +427,17 @@ fn new_session_isolates_sid() {
); );
} }
#[test]
fn default_mode_shares_session() {
let inner_sid = read_sid_inside_sandbox(&[]);
let outer_sid = read_sid_current_process();
assert_eq!(
inner_sid, outer_sid,
"default-mode sandbox should share the session ID (got {inner_sid} != {outer_sid})"
);
}
#[test] #[test]
fn blacklist_run_is_tmpfs() { fn blacklist_run_is_tmpfs() {
let output = sandbox(&[]) let output = sandbox(&[])
@@ -1331,3 +1342,42 @@ fn seccomp_bash_pthread_fallback_works() {
String::from_utf8_lossy(&output.stderr) String::from_utf8_lossy(&output.stderr)
); );
} }
#[test]
fn seccomp_blocks_tiocsti() {
// TIOCSTI (0x5412) injects keystrokes into the terminal input queue.
// Without --new-session, this is the primary defense against CVE-2017-5226.
//
// On kernels >= 6.2 with CONFIG_LEGACY_TIOCSTI=n, the kernel blocks TIOCSTI
// before seccomp sees it. We test with --no-seccomp first to detect that and
// skip, so the test only asserts our filter's behaviour.
let baseline = sandbox(&["--no-seccomp"])
.args([
"--",
"python3",
"-c",
"import fcntl; fcntl.ioctl(0, 0x5412, b'x')",
])
.output()
.expect("agent-sandbox binary failed to execute");
if !baseline.status.success() {
// Kernel already blocks TIOCSTI; seccomp filter is untestable here.
return;
}
let output = sandbox(&[])
.args([
"--",
"python3",
"-c",
"import fcntl; fcntl.ioctl(0, 0x5412, b'x')",
])
.output()
.expect("agent-sandbox binary failed to execute");
assert!(
!output.status.success(),
"expected TIOCSTI to be blocked by seccomp filter"
);
}