Replace --new-session with seccomp TIOCSTI/TIOCLINUX filter

This commit is contained in:
2026-04-12 15:58:50 +02:00
parent 8f30d28965
commit 0d0682b04e
3 changed files with 78 additions and 7 deletions
-1
View File
@@ -40,7 +40,6 @@ pub fn build_command(config: &SandboxConfig) -> Result<Command, SandboxError> {
add_user_env_overrides(&mut cmd, config);
cmd.args(["--remount-ro", "/"]);
cmd.arg("--new-session");
cmd.arg("--die-with-parent");
cmd.arg("--chdir").arg(&config.chdir);
+26 -4
View File
@@ -6,12 +6,15 @@
//! Simplifications vs upstream (intentional):
//! - No capability-conditional rules (we never grant capabilities, so all
//! of Podman's `caps` blocks collapse to "deny" — we just omit them).
//! - No argument filters. `personality` stays out of the allowlist; `socket`
//! is allowed unconditionally rather than gated on AF_VSOCK.
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS
//! via the default action.
//! - x86_64 and aarch64 only.
//!
//! Argument filters (modelled after Flatpak's seccomp policy):
//! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input
//! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the
//! snapd bypass (CVE-2019-10063).
//!
//! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's
//! stance and is critical for the glibc clone3 -> clone fallback path.
//!
@@ -26,7 +29,8 @@ use std::os::fd::{FromRawFd, IntoRawFd, RawFd};
use std::str::FromStr;
use seccompiler::{
BackendError, BpfProgram, SeccompAction, SeccompFilter, SeccompRule, TargetArch, sock_filter,
BackendError, BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition,
SeccompFilter, SeccompRule, TargetArch, sock_filter,
};
use syscalls::Sysno;
@@ -65,7 +69,12 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
for name in ALLOWED_SYSCALLS {
if let Ok(sysno) = Sysno::from_str(name) {
rules.insert(i64::from(sysno.id()), vec![]);
let nr = i64::from(sysno.id());
let filtered = match *name {
"ioctl" => ioctl_rules().map_err(|e| SandboxError::Seccomp(e.to_string()))?,
_ => vec![],
};
rules.insert(nr, filtered);
}
}
let filter = SeccompFilter::new(
@@ -81,6 +90,19 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
Ok(serialize(&program))
}
const TIOCSTI: u64 = 0x5412;
const TIOCLINUX: u64 = 0x541C;
/// Allow ioctl except for TIOCSTI and TIOCLINUX terminal injection attacks.
/// Dword (32-bit) comparison prevents the CVE-2019-10063 bypass where the
/// kernel ignores the high 32 bits of the ioctl command argument.
fn ioctl_rules() -> Result<Vec<SeccompRule>, BackendError> {
Ok(vec![SeccompRule::new(vec![
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCSTI)?,
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCLINUX)?,
])?])
}
fn current_target_arch() -> Result<TargetArch, SandboxError> {
match std::env::consts::ARCH {
"x86_64" => Ok(TargetArch::x86_64),