Fix misinformation in seccomp comments
This commit is contained in:
+15
-8
@@ -6,14 +6,15 @@
|
||||
//! Simplifications vs upstream (intentional):
|
||||
//! - No capability-conditional rules (we never grant capabilities, so all
|
||||
//! of Podman's `caps` blocks collapse to "deny" — we just omit them).
|
||||
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS
|
||||
//! via the default action.
|
||||
//! - No explicit-EPERM list — anything outside the allowlist on the host
|
||||
//! ABI returns ENOSYS via the default action; cross-ABI calls are killed
|
||||
//! by seccompiler's arch prologue before the rule lookup runs.
|
||||
//! - x86_64 and aarch64 only.
|
||||
//!
|
||||
//! Argument filters (modelled after Flatpak and Docker/Podman seccomp policies):
|
||||
//! - `ioctl`: blocks TIOCSTI and TIOCLINUX to prevent terminal input
|
||||
//! injection (CVE-2017-5226). Uses 32-bit arg comparison to avoid the
|
||||
//! snapd bypass (CVE-2019-10063).
|
||||
//! Flatpak bypass (CVE-2019-10063).
|
||||
//! - `clone`: blocks CLONE_NEWUSER to prevent user-namespace privilege
|
||||
//! escalation. `clone3` remains fully denied (not in the allowlist)
|
||||
//! because BPF cannot inspect its struct argument.
|
||||
@@ -24,7 +25,10 @@
|
||||
//! Syscall name -> number resolution uses the `syscalls` crate. Names that
|
||||
//! don't exist on the host architecture (e.g. legacy 32-bit aliases like
|
||||
//! `_llseek`, or aarch64-only `set_tls` when building on x86_64) are silently
|
||||
//! skipped — they would just return ENOSYS anyway under the default action.
|
||||
//! skipped. This is safe: such names are unreachable via the host ABI, and
|
||||
//! any cross-ABI attempt to call them (e.g. int 0x80 from a 64-bit process)
|
||||
//! is killed by seccompiler's arch-validation prologue (SECCOMP_RET_KILL_PROCESS)
|
||||
//! before the syscall table is even consulted.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
@@ -53,7 +57,8 @@ pub fn write_program_to_memfd() -> Result<RawFd, SandboxError> {
|
||||
|
||||
// Safety: memfd_create is a normal Linux syscall. We pass a valid C string
|
||||
// and flags=0, so the fd is created without MFD_CLOEXEC and survives exec
|
||||
// into bwrap. The name is only a debugging label (shows up in /proc/<pid>/maps).
|
||||
// into bwrap. The name is only a debugging label (shows up as the symlink
|
||||
// target in /proc/<pid>/fd/<n>).
|
||||
let raw_fd = unsafe { libc::memfd_create(c"agent-sandbox-seccomp".as_ptr(), 0) };
|
||||
if raw_fd < 0 {
|
||||
return Err(SandboxError::Io(std::io::Error::last_os_error()));
|
||||
@@ -133,9 +138,11 @@ fn current_target_arch() -> Result<TargetArch, SandboxError> {
|
||||
}
|
||||
|
||||
fn serialize(program: &[sock_filter]) -> Vec<u8> {
|
||||
// Flatten the in-memory BpfProgram into the raw byte stream the kernel's
|
||||
// seccomp(2) interface expects. A BpfProgram is &[sock_filter], where
|
||||
// sock_filter is the classic-BPF instruction format from <linux/filter.h>:
|
||||
// Flatten the in-memory BpfProgram into the byte stream bwrap expects to
|
||||
// read from the fd passed via --seccomp. bwrap then constructs a
|
||||
// `struct sock_fprog` over that buffer and hands it to the kernel via
|
||||
// seccomp(2). A BpfProgram is &[sock_filter], where sock_filter is the
|
||||
// classic-BPF instruction format from <linux/filter.h>:
|
||||
//
|
||||
// struct sock_filter {
|
||||
// __u16 code; // opcode
|
||||
|
||||
Reference in New Issue
Block a user