Fix misinformation in seccomp comments

This commit is contained in:
2026-04-13 17:03:38 +02:00
parent 7b7294d94e
commit 76c5be0e72
+15 -8
View File
@@ -6,14 +6,15 @@
//! Simplifications vs upstream (intentional): //! Simplifications vs upstream (intentional):
//! - No capability-conditional rules (we never grant capabilities, so all //! - No capability-conditional rules (we never grant capabilities, so all
//! of Podman's `caps` blocks collapse to "deny" — we just omit them). //! of Podman's `caps` blocks collapse to "deny" — we just omit them).
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS //! - No explicit-EPERM list — anything outside the allowlist on the host
//! via the default action. //! ABI returns ENOSYS via the default action; cross-ABI calls are killed
//! by seccompiler's arch prologue before the rule lookup runs.
//! - x86_64 and aarch64 only. //! - x86_64 and aarch64 only.
//! //!
//! Argument filters (modelled after Flatpak and Docker/Podman seccomp policies): //! Argument filters (modelled after Flatpak and Docker/Podman seccomp policies):
//! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input //! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input
//! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the //! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the
//! snapd bypass (CVE-2019-10063). //! Flatpak bypass (CVE-2019-10063).
//! - `clone`: blocks CLONE_NEWUSER to prevent user-namespace privilege //! - `clone`: blocks CLONE_NEWUSER to prevent user-namespace privilege
//! escalation. `clone3` remains fully denied (not in the allowlist) //! escalation. `clone3` remains fully denied (not in the allowlist)
//! because BPF cannot inspect its struct argument. //! because BPF cannot inspect its struct argument.
@@ -24,7 +25,10 @@
//! Syscall name -> number resolution uses the `syscalls` crate. Names that //! Syscall name -> number resolution uses the `syscalls` crate. Names that
//! don't exist on the host architecture (e.g. legacy 32-bit aliases like //! don't exist on the host architecture (e.g. legacy 32-bit aliases like
//! `_llseek`, or aarch64-only `set_tls` when building on x86_64) are silently //! `_llseek`, or aarch64-only `set_tls` when building on x86_64) are silently
//! skipped — they would just return ENOSYS anyway under the default action. //! skipped. This is safe: such names are unreachable via the host ABI, and
//! any cross-ABI attempt to call them (e.g. int 0x80 from a 64-bit process)
//! is killed by seccompiler's arch-validation prologue (SECCOMP_RET_KILL_PROCESS)
//! before the syscall table is even consulted.
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::{Seek, SeekFrom, Write}; use std::io::{Seek, SeekFrom, Write};
@@ -53,7 +57,8 @@ pub fn write_program_to_memfd() -> Result<RawFd, SandboxError> {
// Safety: memfd_create is a normal Linux syscall. We pass a valid C string // Safety: memfd_create is a normal Linux syscall. We pass a valid C string
// and flags=0, so the fd is created without MFD_CLOEXEC and survives exec // and flags=0, so the fd is created without MFD_CLOEXEC and survives exec
// into bwrap. The name is only a debugging label (shows up in /proc/<pid>/maps). // into bwrap. The name is only a debugging label (shows up as the symlink
// target in /proc/<pid>/fd/<n>).
let raw_fd = unsafe { libc::memfd_create(c"agent-sandbox-seccomp".as_ptr(), 0) }; let raw_fd = unsafe { libc::memfd_create(c"agent-sandbox-seccomp".as_ptr(), 0) };
if raw_fd < 0 { if raw_fd < 0 {
return Err(SandboxError::Io(std::io::Error::last_os_error())); return Err(SandboxError::Io(std::io::Error::last_os_error()));
@@ -133,9 +138,11 @@ fn current_target_arch() -> Result<TargetArch, SandboxError> {
} }
fn serialize(program: &[sock_filter]) -> Vec<u8> { fn serialize(program: &[sock_filter]) -> Vec<u8> {
// Flatten the in-memory BpfProgram into the raw byte stream the kernel's // Flatten the in-memory BpfProgram into the byte stream bwrap expects to
// seccomp(2) interface expects. A BpfProgram is &[sock_filter], where // read from the fd passed via --seccomp. bwrap then constructs a
// sock_filter is the classic-BPF instruction format from <linux/filter.h>: // `struct sock_fprog` over that buffer and hands it to the kernel via
// seccomp(2). A BpfProgram is &[sock_filter], where sock_filter is the
// classic-BPF instruction format from <linux/filter.h>:
// //
// struct sock_filter { // struct sock_filter {
// __u16 code; // opcode // __u16 code; // opcode