Apply a seccomp-BPF syscall allowlist by default
Derived from Podman's default profile, stripped of capability-conditional rules (we never grant capabilities), argument filters, and the explicit EPERM block. Dangerous syscalls (mount, unshare, ptrace, bpf, perf_event_open, io_uring_*, keyctl, kexec_*, ...) fall through to the default ENOSYS action, which also keeps glibc's clone3 -> clone fallback working. x86_64 and aarch64 are supported; other archs error out. Toggle with --seccomp / --no-seccomp or seccomp = <bool> in config.
This commit is contained in:
18
Cargo.lock
generated
18
Cargo.lock
generated
@@ -8,8 +8,11 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"glob",
|
"glob",
|
||||||
|
"libc",
|
||||||
|
"seccompiler",
|
||||||
"serde",
|
"serde",
|
||||||
"shlex",
|
"shlex",
|
||||||
|
"syscalls",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"toml",
|
"toml",
|
||||||
]
|
]
|
||||||
@@ -315,6 +318,15 @@ dependencies = [
|
|||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "seccompiler"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4ae55de56877481d112a559bbc12667635fdaf5e005712fd4e2b2fa50ffc884"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "semver"
|
name = "semver"
|
||||||
version = "1.0.27"
|
version = "1.0.27"
|
||||||
@@ -396,6 +408,12 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syscalls"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "81c645a4de0d803ced6ef0388a2646aa1ef8467173b5d59a2c33c88de4ab76e7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tempfile"
|
name = "tempfile"
|
||||||
version = "3.27.0"
|
version = "3.27.0"
|
||||||
|
|||||||
@@ -14,8 +14,11 @@ path = "src/main.rs"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "4", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
glob = "0.3"
|
glob = "0.3"
|
||||||
|
libc = "0.2"
|
||||||
|
seccompiler = "0.5"
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
shlex = "1.3.0"
|
shlex = "1.3.0"
|
||||||
|
syscalls = { version = "0.8", default-features = false, features = ["std"] }
|
||||||
toml = "1"
|
toml = "1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|||||||
@@ -16,6 +16,10 @@ The threat model is prompt injection and accidental damage, not a determined att
|
|||||||
|
|
||||||
**Not protected in blacklist mode:** arbitrary readable files outside the sensitive paths list, and D-Bus method calls (access control is daemon-side).
|
**Not protected in blacklist mode:** arbitrary readable files outside the sensitive paths list, and D-Bus method calls (access control is daemon-side).
|
||||||
|
|
||||||
|
## Seccomp
|
||||||
|
|
||||||
|
Both modes apply a seccomp-BPF syscall allowlist derived from Podman's default profile. Dangerous syscalls (`mount`, `unshare`, `ptrace`, `bpf`, `perf_event_open`, `io_uring_*`, `keyctl`, `kexec_*`, …) return `ENOSYS`. Disable with `--no-seccomp` or `seccomp = false` in the config file.
|
||||||
|
|
||||||
## Configuration file
|
## Configuration file
|
||||||
|
|
||||||
Settings can be stored in a TOML config file at `$XDG_CONFIG_HOME/agent-sandbox/config.toml` (or pass `--config <path>`). Use `--no-config` to skip loading it. The config file accepts the same options as the corresponding CLI flags.
|
Settings can be stored in a TOML config file at `$XDG_CONFIG_HOME/agent-sandbox/config.toml` (or pass `--config <path>`). Use `--no-config` to skip loading it. The config file accepts the same options as the corresponding CLI flags.
|
||||||
|
|||||||
@@ -34,6 +34,14 @@ pub struct Args {
|
|||||||
#[arg(long, overrides_with = "unshare_net")]
|
#[arg(long, overrides_with = "unshare_net")]
|
||||||
pub share_net: bool,
|
pub share_net: bool,
|
||||||
|
|
||||||
|
/// Enable seccomp syscall filtering (on by default; overrides config-file `seccomp = false`)
|
||||||
|
#[arg(long, overrides_with = "no_seccomp")]
|
||||||
|
pub seccomp: bool,
|
||||||
|
|
||||||
|
/// Disable seccomp syscall filtering (overrides config-file `seccomp = true`)
|
||||||
|
#[arg(long, overrides_with = "seccomp")]
|
||||||
|
pub no_seccomp: bool,
|
||||||
|
|
||||||
/// Bind an extra path read-write (repeatable)
|
/// Bind an extra path read-write (repeatable)
|
||||||
#[arg(long = "rw", value_name = "PATH", action = clap::ArgAction::Append)]
|
#[arg(long = "rw", value_name = "PATH", action = clap::ArgAction::Append)]
|
||||||
pub extra_rw: Vec<PathBuf>,
|
pub extra_rw: Vec<PathBuf>,
|
||||||
|
|||||||
@@ -38,6 +38,12 @@ pub fn build(args: Args, file_config: Option<FileConfig>) -> Result<SandboxConfi
|
|||||||
profile.unshare_net,
|
profile.unshare_net,
|
||||||
globals.unshare_net,
|
globals.unshare_net,
|
||||||
),
|
),
|
||||||
|
seccomp: merge_flag_with_default(
|
||||||
|
merge_flag_pair(args.seccomp, args.no_seccomp),
|
||||||
|
profile.seccomp,
|
||||||
|
globals.seccomp,
|
||||||
|
true,
|
||||||
|
),
|
||||||
dry_run: merge_flag(
|
dry_run: merge_flag(
|
||||||
merge_flag_pair(args.dry_run, args.no_dry_run),
|
merge_flag_pair(args.dry_run, args.no_dry_run),
|
||||||
profile.dry_run,
|
profile.dry_run,
|
||||||
@@ -86,6 +92,15 @@ fn merge_flag(cli: Option<bool>, profile: Option<bool>, globals: Option<bool>) -
|
|||||||
cli.or(profile).or(globals).unwrap_or(false)
|
cli.or(profile).or(globals).unwrap_or(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn merge_flag_with_default(
|
||||||
|
cli: Option<bool>,
|
||||||
|
profile: Option<bool>,
|
||||||
|
globals: Option<bool>,
|
||||||
|
default: bool,
|
||||||
|
) -> bool {
|
||||||
|
cli.or(profile).or(globals).unwrap_or(default)
|
||||||
|
}
|
||||||
|
|
||||||
fn merge_flag_pair(enable: bool, disable: bool) -> Option<bool> {
|
fn merge_flag_pair(enable: bool, disable: bool) -> Option<bool> {
|
||||||
if enable {
|
if enable {
|
||||||
Some(true)
|
Some(true)
|
||||||
@@ -244,6 +259,7 @@ pub struct Options {
|
|||||||
pub whitelist: Option<bool>,
|
pub whitelist: Option<bool>,
|
||||||
pub hardened: Option<bool>,
|
pub hardened: Option<bool>,
|
||||||
pub unshare_net: Option<bool>,
|
pub unshare_net: Option<bool>,
|
||||||
|
pub seccomp: Option<bool>,
|
||||||
pub entrypoint: Option<CommandValue>,
|
pub entrypoint: Option<CommandValue>,
|
||||||
pub command: Option<CommandValue>,
|
pub command: Option<CommandValue>,
|
||||||
pub dry_run: Option<bool>,
|
pub dry_run: Option<bool>,
|
||||||
@@ -380,6 +396,7 @@ mod tests {
|
|||||||
const FULL_CONFIG_TOML: &str = r#"
|
const FULL_CONFIG_TOML: &str = r#"
|
||||||
hardened = true
|
hardened = true
|
||||||
unshare-net = true
|
unshare-net = true
|
||||||
|
seccomp = false
|
||||||
rw = ["/tmp/a", "/tmp/b"]
|
rw = ["/tmp/a", "/tmp/b"]
|
||||||
command = "zsh"
|
command = "zsh"
|
||||||
|
|
||||||
@@ -403,6 +420,7 @@ mod tests {
|
|||||||
fn globals_scalars() {
|
fn globals_scalars() {
|
||||||
assert_eq!(CONFIG.options.hardened, Some(true));
|
assert_eq!(CONFIG.options.hardened, Some(true));
|
||||||
assert_eq!(CONFIG.options.unshare_net, Some(true));
|
assert_eq!(CONFIG.options.unshare_net, Some(true));
|
||||||
|
assert_eq!(CONFIG.options.seccomp, Some(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -522,6 +540,59 @@ mod tests {
|
|||||||
assert!(config.unshare_net);
|
assert!(config.unshare_net);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_seccomp_default_is_true() {
|
||||||
|
let config = build(args_with_command(), None).unwrap();
|
||||||
|
assert!(config.seccomp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_seccomp_disabled_via_config() {
|
||||||
|
let file_config = FileConfig {
|
||||||
|
options: Options {
|
||||||
|
seccomp: Some(false),
|
||||||
|
..Options::default()
|
||||||
|
},
|
||||||
|
..FileConfig::default()
|
||||||
|
};
|
||||||
|
let config = build(args_with_command(), Some(file_config)).unwrap();
|
||||||
|
assert!(!config.seccomp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_cli_seccomp_overrides_profile() {
|
||||||
|
let file_config = FileConfig {
|
||||||
|
options: Options {
|
||||||
|
seccomp: Some(false),
|
||||||
|
..Options::default()
|
||||||
|
},
|
||||||
|
..FileConfig::default()
|
||||||
|
};
|
||||||
|
let args = Args {
|
||||||
|
seccomp: true,
|
||||||
|
..args_with_command()
|
||||||
|
};
|
||||||
|
let config = build(args, Some(file_config)).unwrap();
|
||||||
|
assert!(config.seccomp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_cli_no_seccomp_overrides_profile() {
|
||||||
|
let file_config = FileConfig {
|
||||||
|
options: Options {
|
||||||
|
seccomp: Some(true),
|
||||||
|
..Options::default()
|
||||||
|
},
|
||||||
|
..FileConfig::default()
|
||||||
|
};
|
||||||
|
let args = Args {
|
||||||
|
no_seccomp: true,
|
||||||
|
..args_with_command()
|
||||||
|
};
|
||||||
|
let config = build(args, Some(file_config)).unwrap();
|
||||||
|
assert!(!config.seccomp);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn build_cli_no_hardened_overrides_profile() {
|
fn build_cli_no_hardened_overrides_profile() {
|
||||||
let file_config = FileConfig {
|
let file_config = FileConfig {
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ pub enum SandboxError {
|
|||||||
ConfigPathNotAbsolute(PathBuf),
|
ConfigPathNotAbsolute(PathBuf),
|
||||||
InvalidBwrapArg(String),
|
InvalidBwrapArg(String),
|
||||||
NoCommand,
|
NoCommand,
|
||||||
|
Seccomp(String),
|
||||||
|
SeccompUnsupportedArch(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for SandboxError {
|
impl std::fmt::Display for SandboxError {
|
||||||
@@ -74,6 +76,11 @@ impl std::fmt::Display for SandboxError {
|
|||||||
f,
|
f,
|
||||||
"no command to run; specify a command via config, entrypoint, or pass one after --"
|
"no command to run; specify a command via config, entrypoint, or pass one after --"
|
||||||
),
|
),
|
||||||
|
Self::Seccomp(msg) => write!(f, "failed to build seccomp filter: {msg}"),
|
||||||
|
Self::SeccompUnsupportedArch(arch) => write!(
|
||||||
|
f,
|
||||||
|
"seccomp filtering is not supported on this architecture: {arch} (use --no-seccomp to disable)"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ pub mod config;
|
|||||||
mod errors;
|
mod errors;
|
||||||
mod preflight;
|
mod preflight;
|
||||||
mod sandbox;
|
mod sandbox;
|
||||||
|
mod seccomp;
|
||||||
|
|
||||||
pub use errors::SandboxError;
|
pub use errors::SandboxError;
|
||||||
|
|
||||||
@@ -23,6 +24,7 @@ pub struct SandboxConfig {
|
|||||||
pub mode: SandboxMode,
|
pub mode: SandboxMode,
|
||||||
pub hardened: bool,
|
pub hardened: bool,
|
||||||
pub unshare_net: bool,
|
pub unshare_net: bool,
|
||||||
|
pub seccomp: bool,
|
||||||
pub extra_rw: Vec<PathBuf>,
|
pub extra_rw: Vec<PathBuf>,
|
||||||
pub extra_ro: Vec<PathBuf>,
|
pub extra_ro: Vec<PathBuf>,
|
||||||
pub mask: Vec<PathBuf>,
|
pub mask: Vec<PathBuf>,
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use std::process::Command;
|
|||||||
|
|
||||||
use crate::agents;
|
use crate::agents;
|
||||||
use crate::blacklist;
|
use crate::blacklist;
|
||||||
|
use crate::seccomp;
|
||||||
use crate::{SandboxConfig, SandboxError, SandboxMode};
|
use crate::{SandboxConfig, SandboxError, SandboxMode};
|
||||||
|
|
||||||
pub fn build_command(config: &SandboxConfig) -> Result<Command, SandboxError> {
|
pub fn build_command(config: &SandboxConfig) -> Result<Command, SandboxError> {
|
||||||
@@ -41,6 +42,10 @@ pub fn build_command(config: &SandboxConfig) -> Result<Command, SandboxError> {
|
|||||||
|
|
||||||
apply_masks(&mut cmd, &config.mask);
|
apply_masks(&mut cmd, &config.mask);
|
||||||
|
|
||||||
|
if config.seccomp {
|
||||||
|
add_seccomp_filter(&mut cmd)?;
|
||||||
|
}
|
||||||
|
|
||||||
cmd.args(&config.bwrap_args);
|
cmd.args(&config.bwrap_args);
|
||||||
|
|
||||||
cmd.arg("--")
|
cmd.arg("--")
|
||||||
@@ -194,3 +199,9 @@ fn add_ro_bind(cmd: &mut Command, path: &Path) -> Result<(), SandboxError> {
|
|||||||
fn resolve_bind_source(path: &Path) -> Result<PathBuf, SandboxError> {
|
fn resolve_bind_source(path: &Path) -> Result<PathBuf, SandboxError> {
|
||||||
std::fs::canonicalize(path).map_err(|_| SandboxError::PathMissing(path.to_path_buf()))
|
std::fs::canonicalize(path).map_err(|_| SandboxError::PathMissing(path.to_path_buf()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn add_seccomp_filter(cmd: &mut Command) -> Result<(), SandboxError> {
|
||||||
|
let fd = seccomp::write_program_to_memfd()?;
|
||||||
|
cmd.arg("--seccomp").arg(fd.to_string());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|||||||
194
src/seccomp.rs
Normal file
194
src/seccomp.rs
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
//! seccomp BPF allowlist for sandboxed processes.
|
||||||
|
//!
|
||||||
|
//! Derived from Podman's containers/common default profile:
|
||||||
|
//! <https://github.com/containers/common/blob/main/pkg/seccomp/seccomp.json>
|
||||||
|
//!
|
||||||
|
//! Simplifications vs upstream (intentional):
|
||||||
|
//! - No capability-conditional rules (we never grant capabilities, so all
|
||||||
|
//! of Podman's `caps` blocks collapse to "deny" — we just omit them).
|
||||||
|
//! - No argument filters. `personality` stays out of the allowlist; `socket`
|
||||||
|
//! is allowed unconditionally rather than gated on AF_VSOCK.
|
||||||
|
//! - No explicit-EPERM list — anything outside the allowlist returns ENOSYS
|
||||||
|
//! via the default action.
|
||||||
|
//! - x86_64 and aarch64 only.
|
||||||
|
//!
|
||||||
|
//! Default action is ENOSYS (errno 38), not EPERM. This matches Podman's
|
||||||
|
//! stance and is critical for the glibc clone3 -> clone fallback path.
|
||||||
|
//!
|
||||||
|
//! Syscall name -> number resolution uses the `syscalls` crate. Names that
|
||||||
|
//! don't exist on the host architecture (e.g. legacy 32-bit aliases like
|
||||||
|
//! `_llseek`, or aarch64-only `set_tls` when building on x86_64) are silently
|
||||||
|
//! skipped — they would just return ENOSYS anyway under the default action.
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::io::{Seek, SeekFrom, Write};
|
||||||
|
use std::os::fd::{FromRawFd, IntoRawFd, RawFd};
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use seccompiler::{
|
||||||
|
BackendError, BpfProgram, SeccompAction, SeccompFilter, SeccompRule, TargetArch, sock_filter,
|
||||||
|
};
|
||||||
|
use syscalls::Sysno;
|
||||||
|
|
||||||
|
use crate::SandboxError;
|
||||||
|
|
||||||
|
/// Syscall allowlist. Includes Podman's unconditional allow set (minus syscalls
|
||||||
|
/// we deny on top, see module docs) plus arch-specific syscalls for the targets
|
||||||
|
/// we support. Names absent from the host arch are skipped at filter-build time.
|
||||||
|
const ALLOWED_SYSCALLS: &[&str] = &include!("seccomp_allowlist.in");
|
||||||
|
|
||||||
|
/// Build a seccomp BPF program, write it to an anonymous in-memory file, and
|
||||||
|
/// return the raw fd. The fd is intentionally leaked from Rust's ownership: it
|
||||||
|
/// is created without `MFD_CLOEXEC` so it survives `exec` into bwrap, which
|
||||||
|
/// inherits and closes it after reading the filter.
|
||||||
|
pub fn write_program_to_memfd() -> Result<RawFd, SandboxError> {
|
||||||
|
let bytes = build_program_bytes()?;
|
||||||
|
|
||||||
|
// Safety: memfd_create is a normal Linux syscall. We pass a valid C string
|
||||||
|
// and flags=0, so the fd is created without MFD_CLOEXEC and survives exec
|
||||||
|
// into bwrap. The name is only a debugging label (shows up in /proc/<pid>/maps).
|
||||||
|
let raw_fd = unsafe { libc::memfd_create(c"agent-sandbox-seccomp".as_ptr(), 0) };
|
||||||
|
if raw_fd < 0 {
|
||||||
|
return Err(SandboxError::Io(std::io::Error::last_os_error()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safety: raw_fd is owned by us and currently uniquely held.
|
||||||
|
let mut file = unsafe { std::fs::File::from_raw_fd(raw_fd) };
|
||||||
|
file.write_all(&bytes)?;
|
||||||
|
file.seek(SeekFrom::Start(0))?;
|
||||||
|
// into_raw_fd consumes the File without closing the underlying fd.
|
||||||
|
Ok(file.into_raw_fd())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_program_bytes() -> Result<Vec<u8>, SandboxError> {
|
||||||
|
let target_arch = current_target_arch()?;
|
||||||
|
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
|
||||||
|
for name in ALLOWED_SYSCALLS {
|
||||||
|
if let Ok(sysno) = Sysno::from_str(name) {
|
||||||
|
rules.insert(i64::from(sysno.id()), vec![]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let filter = SeccompFilter::new(
|
||||||
|
rules,
|
||||||
|
SeccompAction::Errno(libc::ENOSYS as u32),
|
||||||
|
SeccompAction::Allow,
|
||||||
|
target_arch,
|
||||||
|
)
|
||||||
|
.map_err(|e| SandboxError::Seccomp(e.to_string()))?;
|
||||||
|
let program: BpfProgram = filter
|
||||||
|
.try_into()
|
||||||
|
.map_err(|e: BackendError| SandboxError::Seccomp(e.to_string()))?;
|
||||||
|
Ok(serialize(&program))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current_target_arch() -> Result<TargetArch, SandboxError> {
|
||||||
|
match std::env::consts::ARCH {
|
||||||
|
"x86_64" => Ok(TargetArch::x86_64),
|
||||||
|
"aarch64" => Ok(TargetArch::aarch64),
|
||||||
|
other => Err(SandboxError::SeccompUnsupportedArch(other.to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize(program: &[sock_filter]) -> Vec<u8> {
|
||||||
|
// Flatten the in-memory BpfProgram into the raw byte stream the kernel's
|
||||||
|
// seccomp(2) interface expects. A BpfProgram is &[sock_filter], where
|
||||||
|
// sock_filter is the classic-BPF instruction format from <linux/filter.h>:
|
||||||
|
//
|
||||||
|
// struct sock_filter {
|
||||||
|
// __u16 code; // opcode
|
||||||
|
// __u8 jt; // jump-if-true offset
|
||||||
|
// __u8 jf; // jump-if-false offset
|
||||||
|
// __u32 k; // generic immediate operand
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// Exactly 8 bytes, no padding. Native endian because producer and consumer
|
||||||
|
// are the same machine — there is no cross-host serialization.
|
||||||
|
let mut out = Vec::with_capacity(program.len() * 8);
|
||||||
|
for insn in program {
|
||||||
|
out.extend_from_slice(&insn.code.to_ne_bytes());
|
||||||
|
out.push(insn.jt);
|
||||||
|
out.push(insn.jf);
|
||||||
|
out.extend_from_slice(&insn.k.to_ne_bytes());
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn builds_on_supported_arch() {
|
||||||
|
let bytes = build_program_bytes().expect("seccomp program should build");
|
||||||
|
assert!(!bytes.is_empty(), "serialized BPF program is empty");
|
||||||
|
assert_eq!(bytes.len() % 8, 0, "BPF byte stream must be 8-byte aligned");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allowlist_contains_essential_syscalls() {
|
||||||
|
for needed in &[
|
||||||
|
"read",
|
||||||
|
"write",
|
||||||
|
"openat",
|
||||||
|
"close",
|
||||||
|
"execve",
|
||||||
|
"exit_group",
|
||||||
|
"mmap",
|
||||||
|
"brk",
|
||||||
|
"clone",
|
||||||
|
] {
|
||||||
|
assert!(
|
||||||
|
ALLOWED_SYSCALLS.contains(needed),
|
||||||
|
"allowlist missing essential syscall: {needed}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allowlist_excludes_dangerous_syscalls() {
|
||||||
|
for denied in &[
|
||||||
|
"bpf",
|
||||||
|
"perf_event_open",
|
||||||
|
"userfaultfd",
|
||||||
|
"kexec_load",
|
||||||
|
"kexec_file_load",
|
||||||
|
"init_module",
|
||||||
|
"finit_module",
|
||||||
|
"delete_module",
|
||||||
|
"mount",
|
||||||
|
"umount",
|
||||||
|
"umount2",
|
||||||
|
"unshare",
|
||||||
|
"setns",
|
||||||
|
"pivot_root",
|
||||||
|
"ptrace",
|
||||||
|
"process_vm_readv",
|
||||||
|
"process_vm_writev",
|
||||||
|
"keyctl",
|
||||||
|
"personality",
|
||||||
|
"clone3",
|
||||||
|
"io_uring_setup",
|
||||||
|
"io_uring_register",
|
||||||
|
"io_uring_enter",
|
||||||
|
"fanotify_init",
|
||||||
|
"fanotify_mark",
|
||||||
|
"open_by_handle_at",
|
||||||
|
"name_to_handle_at",
|
||||||
|
"fsopen",
|
||||||
|
"fsconfig",
|
||||||
|
"fsmount",
|
||||||
|
"fspick",
|
||||||
|
"open_tree",
|
||||||
|
"move_mount",
|
||||||
|
"mount_setattr",
|
||||||
|
"reboot",
|
||||||
|
"swapon",
|
||||||
|
"swapoff",
|
||||||
|
] {
|
||||||
|
assert!(
|
||||||
|
!ALLOWED_SYSCALLS.contains(denied),
|
||||||
|
"allowlist must not contain dangerous syscall: {denied}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
369
src/seccomp_allowlist.in
Normal file
369
src/seccomp_allowlist.in
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
// Syscall allowlist included verbatim by src/seccomp.rs via include!.
|
||||||
|
//
|
||||||
|
// Source: Podman containers/common default profile, block 1 (the unconditional
|
||||||
|
// allow set), minus the deny list documented in src/seccomp.rs, plus `socket`
|
||||||
|
// (which Podman gates on argument filters we deliberately don't replicate) and
|
||||||
|
// the x86_64/aarch64 arch-specific syscalls Podman ships in its arch blocks.
|
||||||
|
//
|
||||||
|
// Names absent from the host architecture (e.g. legacy 32-bit aliases like
|
||||||
|
// `_llseek`, or aarch64-only `set_tls` when building on x86_64) are skipped at
|
||||||
|
// filter-build time by the syscalls crate's name resolver.
|
||||||
|
[
|
||||||
|
// Core syscalls (Podman allow block minus our extra denies plus socket)
|
||||||
|
"_llseek",
|
||||||
|
"_newselect",
|
||||||
|
"accept",
|
||||||
|
"accept4",
|
||||||
|
"access",
|
||||||
|
"adjtimex",
|
||||||
|
"alarm",
|
||||||
|
"bind",
|
||||||
|
"brk",
|
||||||
|
"capget",
|
||||||
|
"capset",
|
||||||
|
"chdir",
|
||||||
|
"chmod",
|
||||||
|
"chown",
|
||||||
|
"chown32",
|
||||||
|
"clock_adjtime",
|
||||||
|
"clock_adjtime64",
|
||||||
|
"clock_getres",
|
||||||
|
"clock_getres_time64",
|
||||||
|
"clock_gettime",
|
||||||
|
"clock_gettime64",
|
||||||
|
"clock_nanosleep",
|
||||||
|
"clock_nanosleep_time64",
|
||||||
|
"clone",
|
||||||
|
"close",
|
||||||
|
"close_range",
|
||||||
|
"connect",
|
||||||
|
"copy_file_range",
|
||||||
|
"creat",
|
||||||
|
"dup",
|
||||||
|
"dup2",
|
||||||
|
"dup3",
|
||||||
|
"epoll_create",
|
||||||
|
"epoll_create1",
|
||||||
|
"epoll_ctl",
|
||||||
|
"epoll_ctl_old",
|
||||||
|
"epoll_pwait",
|
||||||
|
"epoll_pwait2",
|
||||||
|
"epoll_wait",
|
||||||
|
"epoll_wait_old",
|
||||||
|
"eventfd",
|
||||||
|
"eventfd2",
|
||||||
|
"execve",
|
||||||
|
"execveat",
|
||||||
|
"exit",
|
||||||
|
"exit_group",
|
||||||
|
"faccessat",
|
||||||
|
"faccessat2",
|
||||||
|
"fadvise64",
|
||||||
|
"fadvise64_64",
|
||||||
|
"fallocate",
|
||||||
|
"fchdir",
|
||||||
|
"fchmod",
|
||||||
|
"fchmodat",
|
||||||
|
"fchmodat2",
|
||||||
|
"fchown",
|
||||||
|
"fchown32",
|
||||||
|
"fchownat",
|
||||||
|
"fcntl",
|
||||||
|
"fcntl64",
|
||||||
|
"fdatasync",
|
||||||
|
"fgetxattr",
|
||||||
|
"flistxattr",
|
||||||
|
"flock",
|
||||||
|
"fork",
|
||||||
|
"fremovexattr",
|
||||||
|
"fsetxattr",
|
||||||
|
"fstat",
|
||||||
|
"fstat64",
|
||||||
|
"fstatat64",
|
||||||
|
"fstatfs",
|
||||||
|
"fstatfs64",
|
||||||
|
"fsync",
|
||||||
|
"ftruncate",
|
||||||
|
"ftruncate64",
|
||||||
|
"futex",
|
||||||
|
"futex_time64",
|
||||||
|
"futimesat",
|
||||||
|
"get_robust_list",
|
||||||
|
"get_thread_area",
|
||||||
|
"getcpu",
|
||||||
|
"getcwd",
|
||||||
|
"getdents",
|
||||||
|
"getdents64",
|
||||||
|
"getegid",
|
||||||
|
"getegid32",
|
||||||
|
"geteuid",
|
||||||
|
"geteuid32",
|
||||||
|
"getgid",
|
||||||
|
"getgid32",
|
||||||
|
"getgroups",
|
||||||
|
"getgroups32",
|
||||||
|
"getitimer",
|
||||||
|
"getpeername",
|
||||||
|
"getpgid",
|
||||||
|
"getpgrp",
|
||||||
|
"getpid",
|
||||||
|
"getppid",
|
||||||
|
"getpriority",
|
||||||
|
"getrandom",
|
||||||
|
"getresgid",
|
||||||
|
"getresgid32",
|
||||||
|
"getresuid",
|
||||||
|
"getresuid32",
|
||||||
|
"getrlimit",
|
||||||
|
"getrusage",
|
||||||
|
"getsid",
|
||||||
|
"getsockname",
|
||||||
|
"getsockopt",
|
||||||
|
"gettid",
|
||||||
|
"gettimeofday",
|
||||||
|
"getuid",
|
||||||
|
"getuid32",
|
||||||
|
"getxattr",
|
||||||
|
"inotify_add_watch",
|
||||||
|
"inotify_init",
|
||||||
|
"inotify_init1",
|
||||||
|
"inotify_rm_watch",
|
||||||
|
"io_cancel",
|
||||||
|
"io_destroy",
|
||||||
|
"io_getevents",
|
||||||
|
"io_setup",
|
||||||
|
"io_submit",
|
||||||
|
"ioctl",
|
||||||
|
"ioprio_get",
|
||||||
|
"ioprio_set",
|
||||||
|
"ipc",
|
||||||
|
"kill",
|
||||||
|
"landlock_add_rule",
|
||||||
|
"landlock_create_ruleset",
|
||||||
|
"landlock_restrict_self",
|
||||||
|
"lchown",
|
||||||
|
"lchown32",
|
||||||
|
"lgetxattr",
|
||||||
|
"link",
|
||||||
|
"linkat",
|
||||||
|
"listen",
|
||||||
|
"listxattr",
|
||||||
|
"llistxattr",
|
||||||
|
"lremovexattr",
|
||||||
|
"lseek",
|
||||||
|
"lsetxattr",
|
||||||
|
"lstat",
|
||||||
|
"lstat64",
|
||||||
|
"madvise",
|
||||||
|
"membarrier",
|
||||||
|
"memfd_create",
|
||||||
|
"memfd_secret",
|
||||||
|
"mincore",
|
||||||
|
"mkdir",
|
||||||
|
"mkdirat",
|
||||||
|
"mknod",
|
||||||
|
"mknodat",
|
||||||
|
"mlock",
|
||||||
|
"mlock2",
|
||||||
|
"mlockall",
|
||||||
|
"mmap",
|
||||||
|
"mmap2",
|
||||||
|
"mprotect",
|
||||||
|
"mq_getsetattr",
|
||||||
|
"mq_notify",
|
||||||
|
"mq_open",
|
||||||
|
"mq_timedreceive",
|
||||||
|
"mq_timedreceive_time64",
|
||||||
|
"mq_timedsend",
|
||||||
|
"mq_timedsend_time64",
|
||||||
|
"mq_unlink",
|
||||||
|
"mremap",
|
||||||
|
"msgctl",
|
||||||
|
"msgget",
|
||||||
|
"msgrcv",
|
||||||
|
"msgsnd",
|
||||||
|
"msync",
|
||||||
|
"munlock",
|
||||||
|
"munlockall",
|
||||||
|
"munmap",
|
||||||
|
"nanosleep",
|
||||||
|
"newfstatat",
|
||||||
|
"open",
|
||||||
|
"openat",
|
||||||
|
"openat2",
|
||||||
|
"pause",
|
||||||
|
"pidfd_getfd",
|
||||||
|
"pidfd_open",
|
||||||
|
"pidfd_send_signal",
|
||||||
|
"pipe",
|
||||||
|
"pipe2",
|
||||||
|
"pkey_alloc",
|
||||||
|
"pkey_free",
|
||||||
|
"pkey_mprotect",
|
||||||
|
"poll",
|
||||||
|
"ppoll",
|
||||||
|
"ppoll_time64",
|
||||||
|
"prctl",
|
||||||
|
"pread64",
|
||||||
|
"preadv",
|
||||||
|
"preadv2",
|
||||||
|
"prlimit64",
|
||||||
|
"process_mrelease",
|
||||||
|
"pselect6",
|
||||||
|
"pselect6_time64",
|
||||||
|
"pwrite64",
|
||||||
|
"pwritev",
|
||||||
|
"pwritev2",
|
||||||
|
"read",
|
||||||
|
"readahead",
|
||||||
|
"readlink",
|
||||||
|
"readlinkat",
|
||||||
|
"readv",
|
||||||
|
"recv",
|
||||||
|
"recvfrom",
|
||||||
|
"recvmmsg",
|
||||||
|
"recvmmsg_time64",
|
||||||
|
"recvmsg",
|
||||||
|
"remap_file_pages",
|
||||||
|
"removexattr",
|
||||||
|
"rename",
|
||||||
|
"renameat",
|
||||||
|
"renameat2",
|
||||||
|
"restart_syscall",
|
||||||
|
"rmdir",
|
||||||
|
"rseq",
|
||||||
|
"rt_sigaction",
|
||||||
|
"rt_sigpending",
|
||||||
|
"rt_sigprocmask",
|
||||||
|
"rt_sigqueueinfo",
|
||||||
|
"rt_sigreturn",
|
||||||
|
"rt_sigsuspend",
|
||||||
|
"rt_sigtimedwait",
|
||||||
|
"rt_sigtimedwait_time64",
|
||||||
|
"rt_tgsigqueueinfo",
|
||||||
|
"sched_get_priority_max",
|
||||||
|
"sched_get_priority_min",
|
||||||
|
"sched_getaffinity",
|
||||||
|
"sched_getattr",
|
||||||
|
"sched_getparam",
|
||||||
|
"sched_getscheduler",
|
||||||
|
"sched_rr_get_interval",
|
||||||
|
"sched_rr_get_interval_time64",
|
||||||
|
"sched_setaffinity",
|
||||||
|
"sched_setattr",
|
||||||
|
"sched_setparam",
|
||||||
|
"sched_setscheduler",
|
||||||
|
"sched_yield",
|
||||||
|
"seccomp",
|
||||||
|
"select",
|
||||||
|
"semctl",
|
||||||
|
"semget",
|
||||||
|
"semop",
|
||||||
|
"semtimedop",
|
||||||
|
"semtimedop_time64",
|
||||||
|
"send",
|
||||||
|
"sendfile",
|
||||||
|
"sendfile64",
|
||||||
|
"sendmmsg",
|
||||||
|
"sendmsg",
|
||||||
|
"sendto",
|
||||||
|
"set_robust_list",
|
||||||
|
"set_thread_area",
|
||||||
|
"set_tid_address",
|
||||||
|
"setfsgid",
|
||||||
|
"setfsgid32",
|
||||||
|
"setfsuid",
|
||||||
|
"setfsuid32",
|
||||||
|
"setgid",
|
||||||
|
"setgid32",
|
||||||
|
"setgroups",
|
||||||
|
"setgroups32",
|
||||||
|
"setitimer",
|
||||||
|
"setpgid",
|
||||||
|
"setpriority",
|
||||||
|
"setregid",
|
||||||
|
"setregid32",
|
||||||
|
"setresgid",
|
||||||
|
"setresgid32",
|
||||||
|
"setresuid",
|
||||||
|
"setresuid32",
|
||||||
|
"setreuid",
|
||||||
|
"setreuid32",
|
||||||
|
"setrlimit",
|
||||||
|
"setsid",
|
||||||
|
"setsockopt",
|
||||||
|
"setuid",
|
||||||
|
"setuid32",
|
||||||
|
"setxattr",
|
||||||
|
"shmat",
|
||||||
|
"shmctl",
|
||||||
|
"shmdt",
|
||||||
|
"shmget",
|
||||||
|
"shutdown",
|
||||||
|
"sigaltstack",
|
||||||
|
"signal",
|
||||||
|
"signalfd",
|
||||||
|
"signalfd4",
|
||||||
|
"sigprocmask",
|
||||||
|
"sigreturn",
|
||||||
|
"socket",
|
||||||
|
"socketcall",
|
||||||
|
"socketpair",
|
||||||
|
"splice",
|
||||||
|
"stat",
|
||||||
|
"stat64",
|
||||||
|
"statfs",
|
||||||
|
"statfs64",
|
||||||
|
"statx",
|
||||||
|
"symlink",
|
||||||
|
"symlinkat",
|
||||||
|
"sync",
|
||||||
|
"sync_file_range",
|
||||||
|
"syncfs",
|
||||||
|
"sysinfo",
|
||||||
|
"syslog",
|
||||||
|
"tee",
|
||||||
|
"tgkill",
|
||||||
|
"time",
|
||||||
|
"timer_create",
|
||||||
|
"timer_delete",
|
||||||
|
"timer_getoverrun",
|
||||||
|
"timer_gettime",
|
||||||
|
"timer_gettime64",
|
||||||
|
"timer_settime",
|
||||||
|
"timer_settime64",
|
||||||
|
"timerfd_create",
|
||||||
|
"timerfd_gettime",
|
||||||
|
"timerfd_gettime64",
|
||||||
|
"timerfd_settime",
|
||||||
|
"timerfd_settime64",
|
||||||
|
"times",
|
||||||
|
"tkill",
|
||||||
|
"truncate",
|
||||||
|
"truncate64",
|
||||||
|
"ugetrlimit",
|
||||||
|
"umask",
|
||||||
|
"uname",
|
||||||
|
"unlink",
|
||||||
|
"unlinkat",
|
||||||
|
"utime",
|
||||||
|
"utimensat",
|
||||||
|
"utimensat_time64",
|
||||||
|
"utimes",
|
||||||
|
"vfork",
|
||||||
|
"wait4",
|
||||||
|
"waitid",
|
||||||
|
"waitpid",
|
||||||
|
"write",
|
||||||
|
"writev",
|
||||||
|
// x86_64-specific
|
||||||
|
"arch_prctl",
|
||||||
|
"modify_ldt",
|
||||||
|
// aarch64-specific
|
||||||
|
"arm_fadvise64_64",
|
||||||
|
"arm_sync_file_range",
|
||||||
|
"breakpoint",
|
||||||
|
"cacheflush",
|
||||||
|
"set_tls",
|
||||||
|
"sync_file_range2",
|
||||||
|
]
|
||||||
@@ -879,3 +879,88 @@ fn mask_nonexistent_path_becomes_tmpfs() {
|
|||||||
"tmpfs writes should not leak to host"
|
"tmpfs writes should not leak to host"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seccomp_on_by_default_blocks_unshare() {
|
||||||
|
let output = sandbox(&[])
|
||||||
|
.args(["--", "unshare", "--user", "--map-root-user", "/bin/true"])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
!output.status.success(),
|
||||||
|
"expected unshare(2) to be blocked by default seccomp filter, but it succeeded"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seccomp_off_allows_blocked_syscall() {
|
||||||
|
let output = sandbox(&["--no-seccomp"])
|
||||||
|
.args(["--", "unshare", "--user", "--map-root-user", "/bin/true"])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
output.status.success(),
|
||||||
|
"expected unshare(2) to succeed without seccomp, stderr: {}",
|
||||||
|
String::from_utf8_lossy(&output.stderr)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seccomp_dry_run_emits_seccomp_arg() {
|
||||||
|
let output = sandbox(&["--dry-run"])
|
||||||
|
.args(["--", "/bin/true"])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||||
|
assert!(
|
||||||
|
stdout.contains("--seccomp"),
|
||||||
|
"expected --seccomp in dry-run output, got: {stdout}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seccomp_dry_run_no_seccomp_omits_arg() {
|
||||||
|
let output = sandbox(&["--dry-run", "--no-seccomp"])
|
||||||
|
.args(["--", "/bin/true"])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||||
|
assert!(
|
||||||
|
!stdout.contains("--seccomp"),
|
||||||
|
"expected no --seccomp in dry-run output with --no-seccomp, got: {stdout}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seccomp_normal_workload_succeeds() {
|
||||||
|
let output = sandbox(&[])
|
||||||
|
.args(["--", "bash", "-c", "ls /etc > /dev/null && date"])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
output.status.success(),
|
||||||
|
"expected normal workload to succeed under default seccomp, stderr: {}",
|
||||||
|
String::from_utf8_lossy(&output.stderr)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seccomp_bash_pthread_fallback_works() {
|
||||||
|
// Verifies the ENOSYS-not-EPERM choice for clone3 doesn't break libc's
|
||||||
|
// clone3 -> clone fallback path that bash uses internally.
|
||||||
|
let output = sandbox(&[])
|
||||||
|
.args(["--", "bash", "-c", "true"])
|
||||||
|
.output()
|
||||||
|
.expect("agent-sandbox binary failed to execute");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
output.status.success(),
|
||||||
|
"expected bash to succeed under default seccomp (clone3 fallback), stderr: {}",
|
||||||
|
String::from_utf8_lossy(&output.stderr)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user