Replace --new-session with seccomp TIOCSTI/TIOCLINUX filter
This commit is contained in:
@@ -40,7 +40,6 @@ pub fn build_command(config: &SandboxConfig) -> Result<Command, SandboxError> {
|
||||
add_user_env_overrides(&mut cmd, config);
|
||||
|
||||
cmd.args(["--remount-ro", "/"]);
|
||||
cmd.arg("--new-session");
|
||||
cmd.arg("--die-with-parent");
|
||||
cmd.arg("--chdir").arg(&config.chdir);
|
||||
|
||||
|
||||
+26
-4
@@ -6,12 +6,15 @@
|
||||
//! Simplifications vs upstream (intentional):
|
||||
//! - No capability-conditional rules (we never grant capabilities, so all
|
||||
//! of Podman's `caps` blocks collapse to "deny" — we just omit them).
|
||||
//! - No argument filters. `personality` stays out of the allowlist; `socket`
|
||||
//! is allowed unconditionally rather than gated on AF_VSOCK.
|
||||
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS
|
||||
//! via the default action.
|
||||
//! - x86_64 and aarch64 only.
|
||||
//!
|
||||
//! Argument filters (modelled after Flatpak's seccomp policy):
|
||||
//! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input
|
||||
//! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the
|
||||
//! snapd bypass (CVE-2019-10063).
|
||||
//!
|
||||
//! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's
|
||||
//! stance and is critical for the glibc clone3 -> clone fallback path.
|
||||
//!
|
||||
@@ -26,7 +29,8 @@ use std::os::fd::{FromRawFd, IntoRawFd, RawFd};
|
||||
use std::str::FromStr;
|
||||
|
||||
use seccompiler::{
|
||||
BackendError, BpfProgram, SeccompAction, SeccompFilter, SeccompRule, TargetArch, sock_filter,
|
||||
BackendError, BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition,
|
||||
SeccompFilter, SeccompRule, TargetArch, sock_filter,
|
||||
};
|
||||
use syscalls::Sysno;
|
||||
|
||||
@@ -65,7 +69,12 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
|
||||
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
|
||||
for name in ALLOWED_SYSCALLS {
|
||||
if let Ok(sysno) = Sysno::from_str(name) {
|
||||
rules.insert(i64::from(sysno.id()), vec![]);
|
||||
let nr = i64::from(sysno.id());
|
||||
let filtered = match *name {
|
||||
"ioctl" => ioctl_rules().map_err(|e| SandboxError::Seccomp(e.to_string()))?,
|
||||
_ => vec![],
|
||||
};
|
||||
rules.insert(nr, filtered);
|
||||
}
|
||||
}
|
||||
let filter = SeccompFilter::new(
|
||||
@@ -81,6 +90,19 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
|
||||
Ok(serialize(&program))
|
||||
}
|
||||
|
||||
const TIOCSTI: u64 = 0x5412;
|
||||
const TIOCLINUX: u64 = 0x541C;
|
||||
|
||||
/// Allow ioctl except for TIOCSTI and TIOCLINUX terminal injection attacks.
|
||||
/// Dword (32-bit) comparison prevents the CVE-2019-10063 bypass where the
|
||||
/// kernel ignores the high 32 bits of the ioctl command argument.
|
||||
fn ioctl_rules() -> Result<Vec<SeccompRule>, BackendError> {
|
||||
Ok(vec![SeccompRule::new(vec![
|
||||
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCSTI)?,
|
||||
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCLINUX)?,
|
||||
])?])
|
||||
}
|
||||
|
||||
fn current_target_arch() -> Result<TargetArch, SandboxError> {
|
||||
match std::env::consts::ARCH {
|
||||
"x86_64" => Ok(TargetArch::x86_64),
|
||||
|
||||
+52
-2
@@ -417,8 +417,8 @@ fn whitelist_sys_is_readable() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_session_isolates_sid() {
|
||||
let inner_sid = read_sid_inside_sandbox(&[]);
|
||||
fn hardened_isolates_sid() {
|
||||
let inner_sid = read_sid_inside_sandbox(&["--hardened"]);
|
||||
let outer_sid = read_sid_current_process();
|
||||
|
||||
assert_ne!(
|
||||
@@ -427,6 +427,17 @@ fn new_session_isolates_sid() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_mode_shares_session() {
|
||||
let inner_sid = read_sid_inside_sandbox(&[]);
|
||||
let outer_sid = read_sid_current_process();
|
||||
|
||||
assert_eq!(
|
||||
inner_sid, outer_sid,
|
||||
"default-mode sandbox should share the session ID (got {inner_sid} != {outer_sid})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blacklist_run_is_tmpfs() {
|
||||
let output = sandbox(&[])
|
||||
@@ -1331,3 +1342,42 @@ fn seccomp_bash_pthread_fallback_works() {
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seccomp_blocks_tiocsti() {
|
||||
// TIOCSTI (0x5412) injects keystrokes into the terminal input queue.
|
||||
// Without --new-session, this is the primary defense against CVE-2017-5226.
|
||||
//
|
||||
// On kernels >= 6.2 with CONFIG_LEGACY_TIOCSTI=n, the kernel blocks TIOCSTI
|
||||
// before seccomp sees it. We test with --no-seccomp first to detect that and
|
||||
// skip, so the test only asserts our filter's behaviour.
|
||||
let baseline = sandbox(&["--no-seccomp"])
|
||||
.args([
|
||||
"--",
|
||||
"python3",
|
||||
"-c",
|
||||
"import fcntl; fcntl.ioctl(0, 0x5412, b'x')",
|
||||
])
|
||||
.output()
|
||||
.expect("agent-sandbox binary failed to execute");
|
||||
|
||||
if !baseline.status.success() {
|
||||
// Kernel already blocks TIOCSTI; seccomp filter is untestable here.
|
||||
return;
|
||||
}
|
||||
|
||||
let output = sandbox(&[])
|
||||
.args([
|
||||
"--",
|
||||
"python3",
|
||||
"-c",
|
||||
"import fcntl; fcntl.ioctl(0, 0x5412, b'x')",
|
||||
])
|
||||
.output()
|
||||
.expect("agent-sandbox binary failed to execute");
|
||||
|
||||
assert!(
|
||||
!output.status.success(),
|
||||
"expected TIOCSTI to be blocked by seccomp filter"
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user