Replace --new-session with seccomp TIOCSTI/TIOCLINUX filter
This commit is contained in:
@@ -40,7 +40,6 @@ pub fn build_command(config: &SandboxConfig) -> Result<Command, SandboxError> {
|
|||||||
add_user_env_overrides(&mut cmd, config);
|
add_user_env_overrides(&mut cmd, config);
|
||||||
|
|
||||||
cmd.args(["--remount-ro", "/"]);
|
cmd.args(["--remount-ro", "/"]);
|
||||||
cmd.arg("--new-session");
|
|
||||||
cmd.arg("--die-with-parent");
|
cmd.arg("--die-with-parent");
|
||||||
cmd.arg("--chdir").arg(&config.chdir);
|
cmd.arg("--chdir").arg(&config.chdir);
|
||||||
|
|
||||||
|
|||||||
+26
-4
@@ -6,12 +6,15 @@
|
|||||||
//! Simplifications vs upstream (intentional):
|
//! Simplifications vs upstream (intentional):
|
||||||
//! - No capability-conditional rules (we never grant capabilities, so all
|
//! - No capability-conditional rules (we never grant capabilities, so all
|
||||||
//! of Podman's `caps` blocks collapse to "deny" — we just omit them).
|
//! of Podman's `caps` blocks collapse to "deny" — we just omit them).
|
||||||
//! - No argument filters. `personality` stays out of the allowlist; `socket`
|
|
||||||
//! is allowed unconditionally rather than gated on AF_VSOCK.
|
|
||||||
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS
|
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS
|
||||||
//! via the default action.
|
//! via the default action.
|
||||||
//! - x86_64 and aarch64 only.
|
//! - x86_64 and aarch64 only.
|
||||||
//!
|
//!
|
||||||
|
//! Argument filters (modelled after Flatpak's seccomp policy):
|
||||||
|
//! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input
|
||||||
|
//! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the
|
||||||
|
//! snapd bypass (CVE-2019-10063).
|
||||||
|
//!
|
||||||
//! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's
|
//! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's
|
||||||
//! stance and is critical for the glibc clone3 -> clone fallback path.
|
//! stance and is critical for the glibc clone3 -> clone fallback path.
|
||||||
//!
|
//!
|
||||||
@@ -26,7 +29,8 @@ use std::os::fd::{FromRawFd, IntoRawFd, RawFd};
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use seccompiler::{
|
use seccompiler::{
|
||||||
BackendError, BpfProgram, SeccompAction, SeccompFilter, SeccompRule, TargetArch, sock_filter,
|
BackendError, BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition,
|
||||||
|
SeccompFilter, SeccompRule, TargetArch, sock_filter,
|
||||||
};
|
};
|
||||||
use syscalls::Sysno;
|
use syscalls::Sysno;
|
||||||
|
|
||||||
@@ -65,7 +69,12 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
|
|||||||
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
|
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
|
||||||
for name in ALLOWED_SYSCALLS {
|
for name in ALLOWED_SYSCALLS {
|
||||||
if let Ok(sysno) = Sysno::from_str(name) {
|
if let Ok(sysno) = Sysno::from_str(name) {
|
||||||
rules.insert(i64::from(sysno.id()), vec![]);
|
let nr = i64::from(sysno.id());
|
||||||
|
let filtered = match *name {
|
||||||
|
"ioctl" => ioctl_rules().map_err(|e| SandboxError::Seccomp(e.to_string()))?,
|
||||||
|
_ => vec![],
|
||||||
|
};
|
||||||
|
rules.insert(nr, filtered);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let filter = SeccompFilter::new(
|
let filter = SeccompFilter::new(
|
||||||
@@ -81,6 +90,19 @@ fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
|
|||||||
Ok(serialize(&program))
|
Ok(serialize(&program))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const TIOCSTI: u64 = 0x5412;
|
||||||
|
const TIOCLINUX: u64 = 0x541C;
|
||||||
|
|
||||||
|
/// Allow ioctl except for TIOCSTI and TIOCLINUX terminal injection attacks.
|
||||||
|
/// Dword (32-bit) comparison prevents the CVE-2019-10063 bypass where the
|
||||||
|
/// kernel ignores the high 32 bits of the ioctl command argument.
|
||||||
|
fn ioctl_rules() -> Result<Vec<SeccompRule>, BackendError> {
|
||||||
|
Ok(vec![SeccompRule::new(vec![
|
||||||
|
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCSTI)?,
|
||||||
|
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Ne, TIOCLINUX)?,
|
||||||
|
])?])
|
||||||
|
}
|
||||||
|
|
||||||
fn current_target_arch() -> Result<TargetArch, SandboxError> {
|
fn current_target_arch() -> Result<TargetArch, SandboxError> {
|
||||||
match std::env::consts::ARCH {
|
match std::env::consts::ARCH {
|
||||||
"x86_64" => Ok(TargetArch::x86_64),
|
"x86_64" => Ok(TargetArch::x86_64),
|
||||||
|
|||||||
+52
-2
@@ -417,8 +417,8 @@ fn whitelist_sys_is_readable() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn new_session_isolates_sid() {
|
fn hardened_isolates_sid() {
|
||||||
let inner_sid = read_sid_inside_sandbox(&[]);
|
let inner_sid = read_sid_inside_sandbox(&["--hardened"]);
|
||||||
let outer_sid = read_sid_current_process();
|
let outer_sid = read_sid_current_process();
|
||||||
|
|
||||||
assert_ne!(
|
assert_ne!(
|
||||||
@@ -427,6 +427,17 @@ fn new_session_isolates_sid() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn default_mode_shares_session() {
|
||||||
|
let inner_sid = read_sid_inside_sandbox(&[]);
|
||||||
|
let outer_sid = read_sid_current_process();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
inner_sid, outer_sid,
|
||||||
|
"default-mode sandbox should share the session ID (got {inner_sid} != {outer_sid})"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn blacklist_run_is_tmpfs() {
|
fn blacklist_run_is_tmpfs() {
|
||||||
let output = sandbox(&[])
|
let output = sandbox(&[])
|
||||||
@@ -1331,3 +1342,42 @@ fn seccomp_bash_pthread_fallback_works() {
|
|||||||
String::from_utf8_lossy(&output.stderr)
|
String::from_utf8_lossy(&output.stderr)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seccomp_blocks_tiocsti() {
|
||||||
|
// TIOCSTI (0x5412) injects keystrokes into the terminal input queue.
|
||||||
|
// Without --new-session, this is the primary defense against CVE-2017-5226.
|
||||||
|
//
|
||||||
|
// On kernels >= 6.2 with CONFIG_LEGACY_TIOCSTI=n, the kernel blocks TIOCSTI
|
||||||
|
// before seccomp sees it. We test with --no-seccomp first to detect that and
|
||||||
|
// skip, so the test only asserts our filter's behaviour.
|
||||||
|
let baseline = sandbox(&["--no-seccomp"])
|
||||||
|
.args([
|
||||||
|
"--",
|
||||||
|
"python3",
|
||||||
|
"-c",
|
||||||
|
"import fcntl; fcntl.ioctl(0, 0x5412, b'x')",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
if !baseline.status.success() {
|
||||||
|
// Kernel already blocks TIOCSTI; seccomp filter is untestable here.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let output = sandbox(&[])
|
||||||
|
.args([
|
||||||
|
"--",
|
||||||
|
"python3",
|
||||||
|
"-c",
|
||||||
|
"import fcntl; fcntl.ioctl(0, 0x5412, b'x')",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
!output.status.success(),
|
||||||
|
"expected TIOCSTI to be blocked by seccomp filter"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user