Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions crates/sandlock-core/examples/openat_audit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//! Audit every `openat(2)` that a sandboxed process performs.
//!
//! Demonstrates [`Sandbox::run_with_extra_handlers`]: a downstream crate
//! registers a user handler for `SYS_openat` that logs the call and falls
//! through to default (builtin) processing.
//!
//! Run:
//!
//! ```sh
//! # From the sandlock repo root.
//! cargo run --example openat_audit -- /usr/bin/python3 -c 'open("/etc/hostname").read()'
//! ```
//!
//! Expected output:
//!
//! ```text
//! [audit] pid=... openat
//! [audit] pid=... openat
//! [audit] pid=... openat
//! exit=Some(0) stdout=...
//! ```

use std::env;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;

use sandlock_core::seccomp::dispatch::{ExtraHandler, HandlerFn};
use sandlock_core::seccomp::notif::NotifAction;
use sandlock_core::{Policy, Sandbox};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let cmd: Vec<String> = env::args().skip(1).collect();
if cmd.is_empty() {
eprintln!("usage: openat_audit <cmd> [args...]");
std::process::exit(2);
}
let cmd_ref: Vec<&str> = cmd.iter().map(String::as_str).collect();

// Minimal policy: read /usr, /lib, /etc, /proc; write /tmp.
let policy = Policy::builder()
.fs_read("/usr")
.fs_read("/lib")
.fs_read("/lib64")
.fs_read("/etc")
.fs_read("/proc")
.fs_write("/tmp")
.build()?;

// User handler: count + log every openat, fall through to builtin.
let counter = Arc::new(AtomicUsize::new(0));
let counter_clone = Arc::clone(&counter);

let audit: HandlerFn = Box::new(move |notif, _ctx, _fd| {
let counter = Arc::clone(&counter_clone);
Box::pin(async move {
let n = counter.fetch_add(1, Ordering::SeqCst) + 1;
eprintln!("[audit #{n}] pid={} openat", notif.pid);
// Continue = let the default table and the kernel handle it.
NotifAction::Continue
})
});

let result = Sandbox::run_with_extra_handlers(
&policy,
&cmd_ref,
vec![ExtraHandler::new(libc::SYS_openat, audit)],
)
.await?;

println!(
"exit={:?} opens={} stdout={:?}",
result.code(),
counter.load(Ordering::SeqCst),
result.stdout_str().unwrap_or(""),
);
Ok(())
}
44 changes: 42 additions & 2 deletions crates/sandlock-core/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -641,11 +641,40 @@ fn write_id_maps_overflow() {
// Child-side confinement (never returns)
// ============================================================

/// Arguments threaded from the parent's `do_spawn` into the child-side
/// `confine_child`. Packed into a struct because `confine_child` historically
/// grew to seven positional parameters and a struct keeps the call site
/// readable when new flags get added (e.g. `extra_syscalls` for user
/// handlers). Lifetimes tie everything to the parent's stack frame — the
/// child never outlives the fork point because `confine_child` either execs
/// or exits.
pub(crate) struct ChildSpawnArgs<'a> {
pub policy: &'a Policy,
pub cmd: &'a [CString],
pub pipes: &'a PipePair,
pub cow_config: Option<&'a ChildMountConfig>,
pub nested: bool,
pub keep_fds: &'a [RawFd],
/// Syscall numbers for which the parent registered `ExtraHandler`s.
/// Merged into the child's BPF notif list so the kernel actually
/// raises USER_NOTIF for them.
pub extra_syscalls: &'a [u32],
}

/// Apply irreversible confinement (Landlock + seccomp) then exec the command.
///
/// This function **never returns**: it calls `execvp` on success or
/// `_exit(127)` on any error.
pub(crate) fn confine_child(policy: &Policy, cmd: &[CString], pipes: &PipePair, cow_config: Option<&ChildMountConfig>, nested: bool, keep_fds: &[RawFd]) -> ! {
pub(crate) fn confine_child(args: ChildSpawnArgs<'_>) -> ! {
let ChildSpawnArgs {
policy,
cmd,
pipes,
cow_config,
nested,
keep_fds,
extra_syscalls,
} = args;
// Helper: abort child on error. Includes the OS error automatically.
macro_rules! fail {
($msg:expr) => {{
Expand Down Expand Up @@ -859,7 +888,18 @@ pub(crate) fn confine_child(policy: &Policy, cmd: &[CString], pipes: &PipePair,
}
} else {
// First-level sandbox: notif + deny filter with NEW_LISTENER.
let notif = notif_syscalls(policy);
//
// Caller-supplied extra handlers must have their syscalls registered in
// the BPF filter, otherwise the kernel never raises a notification for
// them and the handler silently never fires. We merge `extra_syscalls`
// into the notif list and dedup so each syscall produces exactly one
// JEQ in the assembled program.
let mut notif = notif_syscalls(policy);
if !extra_syscalls.is_empty() {
notif.extend_from_slice(extra_syscalls);
notif.sort_unstable();
notif.dedup();
}
let filter = bpf::assemble_filter(&notif, &deny, &args);
let notif_fd = match bpf::install_filter(&filter) {
Ok(fd) => fd,
Expand Down
99 changes: 93 additions & 6 deletions crates/sandlock-core/src/sandbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ pub struct Sandbox {
/// Optional callback invoked when a port bind is recorded.
#[allow(clippy::type_complexity)]
on_bind: Option<Box<dyn Fn(&std::collections::HashMap<u16, u16>) + Send + Sync>>,
/// User-supplied extra syscall handlers. Taken on spawn and
/// appended to the dispatch table after all builtin handlers.
extra_handlers: Vec<crate::seccomp::dispatch::ExtraHandler>,
}

impl Sandbox {
Expand Down Expand Up @@ -163,15 +166,14 @@ impl Sandbox {
extra_fds: Vec::new(),
http_acl_handle: None,
on_bind: None,
extra_handlers: Vec::new(),
}
}

/// One-shot: spawn a sandboxed process, wait for it to exit, and return
/// the result. Stdout and stderr are captured.
pub async fn run(policy: &Policy, cmd: &[&str]) -> Result<RunResult, SandlockError> {
let mut sb = Self::new(policy)?;
sb.do_spawn(cmd, true).await?;
sb.wait().await
Self::run_with_extra_handlers(policy, cmd, Vec::new()).await
}

/// Run a sandboxed process with inherited stdio (interactive mode).
Expand All @@ -181,6 +183,68 @@ impl Sandbox {
sb.wait().await
}

/// One-shot run with user-supplied syscall handlers.
///
/// `extra_handlers` are registered in the dispatch table **after** all
/// builtin handlers for the same syscall. They observe the post-builtin
/// view (e.g. [`chroot`]-normalized paths on `openat`) and cannot be used
/// to bypass builtin confinement. See
/// [`crate::seccomp::dispatch::ExtraHandler`] for the ordering contract.
///
/// When called with an empty vector, this function is identical to
/// [`Self::run`].
///
/// # Example
///
/// ```ignore
/// use sandlock_core::{Policy, Sandbox};
/// use sandlock_core::seccomp::dispatch::{ExtraHandler, HandlerFn};
/// use sandlock_core::seccomp::notif::NotifAction;
///
/// # tokio_test::block_on(async {
/// let policy = Policy::builder().fs_read("/usr").build().unwrap();
///
/// let audit: HandlerFn = Box::new(|notif, _ctx, _fd| {
/// Box::pin(async move {
/// eprintln!("openat from pid {}", notif.data.pid);
/// NotifAction::Continue
/// })
/// });
///
/// let result = Sandbox::run_with_extra_handlers(
/// &policy,
/// &["/usr/bin/true"],
/// vec![ExtraHandler::new(libc::SYS_openat, audit)],
/// ).await.unwrap();
/// # });
/// ```
pub async fn run_with_extra_handlers(
policy: &Policy,
cmd: &[&str],
extra_handlers: Vec<crate::seccomp::dispatch::ExtraHandler>,
) -> Result<RunResult, SandlockError> {
// Reject extras that would weaken confinement (e.g. one registered
// on a default-deny syscall). See
// [`crate::seccomp::dispatch::validate_extras_against_policy`] for the
// rationale. Done before fork so the caller gets a clear error
// instead of a silently-broken sandbox.
if let Err(nr) =
crate::seccomp::dispatch::validate_extras_against_policy(&extra_handlers, policy)
{
return Err(SandboxError::Child(format!(
"ExtraHandler on syscall {} conflicts with the default-deny list and \
would let user code bypass it via SECCOMP_USER_NOTIF_FLAG_CONTINUE",
nr
))
.into());
}

let mut sb = Self::new(policy)?;
sb.extra_handlers = extra_handlers;
sb.do_spawn(cmd, true).await?;
sb.wait().await
}

/// Dry-run: spawn, wait, collect filesystem changes, then abort.
/// Returns the run result plus a list of changes that would have been
/// committed. The workdir is left unchanged.
Expand Down Expand Up @@ -838,8 +902,29 @@ impl Sandbox {
// Collect target fds from gather that must survive close_fds_above
let gather_keep_fds: Vec<i32> = self.extra_fds.iter().map(|&(target, _)| target).collect();

// Collect extra-handler syscall numbers for the BPF filter the child
// is about to install. This must be a plain `Vec<u32>` because the
// child does not need (and cannot use after exec) the heap-allocated
// closures stored in `self.extra_handlers` — only the registered
// syscall numbers must be added to the BPF notif list so the kernel
// raises notifications for them. The supervisor in the parent owns
// the closures themselves.
let extra_syscalls: Vec<u32> = self
.extra_handlers
.iter()
.map(|h| h.syscall_nr as u32)
.collect();

// This never returns.
context::confine_child(&self.policy, &c_cmd, &pipes, cow_config.as_ref(), nested, &gather_keep_fds);
context::confine_child(context::ChildSpawnArgs {
policy: &self.policy,
cmd: &c_cmd,
pipes: &pipes,
cow_config: cow_config.as_ref(),
nested,
keep_fds: &gather_keep_fds,
extra_syscalls: &extra_syscalls,
});
}

// ===== PARENT PROCESS =====
Expand Down Expand Up @@ -1044,9 +1129,11 @@ impl Sandbox {
notif_fd: notif_raw_fd,
});

// Spawn notif supervisor
// Spawn notif supervisor. `extra_handlers` is consumed here
// (moved into the supervisor task) because HandlerFn is not Clone.
let extra_handlers = std::mem::take(&mut self.extra_handlers);
self.notif_handle = Some(tokio::spawn(
notif::supervisor(notif_fd, ctx),
notif::supervisor(notif_fd, ctx, extra_handlers),
));

// Spawn load average sampling task (every 5s, like the kernel)
Expand Down
Loading