Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ and this project adheres to

### Added

- \[[#1](https://github.com/superserve-ai/firecracker/pull/1)\]: Add opt-in
`--landlock` flag to the jailer that uses the Linux Landlock LSM (kernel >=
5.13) as a defense-in-depth mechanism. When enabled, the jailed Firecracker
process is restricted to only accessing files within the jail directory, even
if it escapes the `pivot_root` chroot via a kernel exploit.

- [#5323](https://github.com/firecracker-microvm/firecracker/pull/5323): Add
support for Vsock Unix domain socket path overriding on snapshot restore. More
information can be found in the
Expand Down
32 changes: 32 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/jailer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ bench = false
tracing = ["log-instrument", "utils/tracing"]

[dependencies]
landlock = "0.4"
libc = "0.2.183"
log-instrument = { path = "../log-instrument", optional = true }
regex = { version = "1.12.3", default-features = false, features = ["std"] }
Expand Down
26 changes: 26 additions & 0 deletions src/jailer/src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use vmm_sys_util::syscall::SyscallReturnCode;
use crate::JailerError;
use crate::cgroup::{CgroupConfiguration, CgroupConfigurationBuilder};
use crate::chroot::chroot;
use crate::landlock;
use crate::resource_limits::{FSIZE_ARG, NO_FILE_ARG, ResourceLimits};

pub const PROC_MOUNTS: &str = "/proc/mounts";
Expand Down Expand Up @@ -124,6 +125,7 @@ pub struct Env {
netns: Option<String>,
daemonize: bool,
new_pid_ns: bool,
landlock: bool,
start_time_us: u64,
start_time_cpu_us: u64,
jailer_cpu_time_us: u64,
Expand Down Expand Up @@ -187,6 +189,8 @@ impl Env {

let new_pid_ns = arguments.flag_present("new-pid-ns");

let landlock = arguments.flag_present("landlock-restrict-fs");

// Optional arguments.
let mut cgroup_conf = None;
let parent_cgroup = match arguments.single_value("parent-cgroup") {
Expand Down Expand Up @@ -263,6 +267,7 @@ impl Env {
netns,
daemonize,
new_pid_ns,
landlock,
start_time_us,
start_time_cpu_us,
jailer_cpu_time_us: 0,
Expand Down Expand Up @@ -669,6 +674,14 @@ impl Env {
#[cfg(target_arch = "aarch64")]
self.copy_midr_el1_info()?;

// Prepare the Landlock ruleset before chrooting, while the jail directory is still
// reachable by its host path. The PathFd captures the inode and survives pivot_root.
let landlock_ruleset = if self.landlock {
Some(landlock::prepare_ruleset(self.chroot_dir())?)
} else {
None
};

// Jail self.
chroot(self.chroot_dir())?;

Expand Down Expand Up @@ -762,6 +775,12 @@ impl Env {
self.jailer_cpu_time_us += get_time_us(ClockType::ProcessCpu);
}

// Enforce the Landlock ruleset right before exec so that the restrictions are inherited
// by the jailed Firecracker process.
if let Some(ruleset) = landlock_ruleset {
landlock::enforce(ruleset)?;
}

// If specified, exec the provided binary into a new PID namespace.
if self.new_pid_ns {
self.exec_into_new_pid_ns(chroot_exec_file)
Expand Down Expand Up @@ -804,6 +823,7 @@ mod tests {
pub netns: Option<&'a str>,
pub daemonize: bool,
pub new_pid_ns: bool,
pub landlock: bool,
pub cgroups: Vec<&'a str>,
pub resource_limits: Vec<&'a str>,
pub parent_cgroup: Option<&'a str>,
Expand All @@ -823,6 +843,7 @@ mod tests {
netns: Some("zzzns"),
daemonize: true,
new_pid_ns: true,
landlock: false,
cgroups: vec!["cpu.shares=2", "cpuset.mems=0"],
resource_limits: vec!["no-file=1024", "fsize=1048575"],
parent_cgroup: None,
Expand Down Expand Up @@ -873,6 +894,10 @@ mod tests {
arg_vec.push("--new-pid-ns".to_string());
}

if arg_vals.landlock {
arg_vec.push("--landlock-restrict-fs".to_string());
}

if let Some(parent_cg) = arg_vals.parent_cgroup {
arg_vec.push("--parent-cgroup".to_string());
arg_vec.push(parent_cg.to_string());
Expand Down Expand Up @@ -1226,6 +1251,7 @@ mod tests {
netns: Some("zzzns"),
daemonize: false,
new_pid_ns: false,
landlock: false,
cgroups: Vec::new(),
resource_limits: Vec::new(),
parent_cgroup: None,
Expand Down
108 changes: 108 additions & 0 deletions src/jailer/src/landlock.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//! Landlock LSM integration for the Firecracker jailer.
//!
//! [Landlock](https://docs.kernel.org/userspace-api/landlock.html) is a Linux security module
//! (available since kernel 5.13) that
//! allows a process to restrict its own file system access. The jailer uses it as a
//! defense-in-depth mechanism: even if a guest VM escapes the pivot_root chroot, the Landlock
//! rules—applied before the exec—prevent Firecracker from accessing files outside the jail
//! directory.
//!
//! Usage:
//! 1. Call [`prepare_ruleset`] **before** `chroot()` to open a file descriptor referencing the
//! jail directory by inode. The inode reference survives `pivot_root`.
//! 2. After all post-chroot setup is done, call [`enforce`] on the returned ruleset right before
//! `exec`. The restrictions are inherited by the exec'd process.

use std::path::Path;

use landlock::{
ABI, Access, AccessFs, PathBeneath, PathFd, Ruleset, RulesetAttr, RulesetCreated,
RulesetCreatedAttr,
};

use crate::JailerError;

/// Create a Landlock ruleset that grants all file-system access rights within `jail_dir` and
/// denies everything outside.
///
/// Must be called **before** `chroot()`/`pivot_root()` so that the `PathFd` captures the inode
/// of the jail directory while it is still reachable by its host path. The returned
/// [`RulesetCreated`] holds the open `PathFd` and can safely be passed across the `pivot_root`
/// boundary.
///
/// # Errors
///
/// Returns [`JailerError::Landlock`] if the kernel does not support Landlock,
/// if `jail_dir` cannot be opened, or if any ruleset syscall fails.
pub fn prepare_ruleset(jail_dir: &Path) -> Result<RulesetCreated, JailerError> {
let abi = ABI::V4;

let path_fd = PathFd::new(jail_dir).map_err(|err| {
JailerError::Landlock(format!(
"Failed to open Landlock path fd for {:?}: {}",
jail_dir, err
))
})?;

Ruleset::default()
.handle_access(AccessFs::from_all(abi))
.map_err(|err| JailerError::Landlock(format!("Failed to create Landlock ruleset: {err}")))?
.create()
.map_err(|err| JailerError::Landlock(format!("Failed to create Landlock ruleset: {err}")))?
.add_rule(PathBeneath::new(path_fd, AccessFs::from_all(abi)))
.map_err(|err| JailerError::Landlock(format!("Failed to add Landlock rule: {err}")))
}

/// Enforce a prepared Landlock ruleset on the current thread.
///
/// The restrictions are inherited across `exec`, so calling this right before `execve` will
/// confine the jailed Firecracker process to only the paths allowed by the ruleset.
///
/// # Errors
///
/// Returns [`JailerError::Landlock`] if `restrict_self` fails.
pub fn enforce(ruleset: RulesetCreated) -> Result<(), JailerError> {
ruleset.restrict_self().map_err(|err| {
JailerError::Landlock(format!("Failed to enforce Landlock ruleset: {err}"))
})?;
Ok(())
}

#[cfg(test)]
mod tests {
use landlock::CompatLevel;
use vmm_sys_util::tempdir::TempDir;

use super::*;

/// Returns true if the running kernel supports Landlock (any ABI version).
///
/// Uses the Landlock crate's own compatibility check (via `HardRequirement`) rather than
/// parsing the kernel version string, since Landlock may be backported to older kernels.
fn is_landlock_supported() -> bool {
Ruleset::default()
.set_compatibility(CompatLevel::HardRequirement)
.handle_access(AccessFs::from_all(ABI::V1))
.and_then(|r| r.create())
.is_ok()
}

#[test]
fn test_prepare_ruleset_valid_dir() {
if !is_landlock_supported() {
// Skip on kernels that don't support Landlock.
return;
}
let tmp = TempDir::new_with_prefix("landlock_test_").unwrap();
prepare_ruleset(tmp.as_path()).unwrap();
}

#[test]
fn test_prepare_ruleset_nonexistent_dir() {
let result = prepare_ruleset(Path::new("/nonexistent/path/for/landlock/test"));
result.unwrap_err();
}
}
8 changes: 8 additions & 0 deletions src/jailer/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::env::Env;
mod cgroup;
mod chroot;
mod env;
mod landlock;
mod resource_limits;

const JAILER_VERSION: &str = env!("CARGO_PKG_VERSION");
Expand Down Expand Up @@ -152,6 +153,8 @@ pub enum JailerError {
UTF8Parsing(std::str::Utf8Error),
#[error("{}", format!("Failed to write to {:?}: {}", .0, .1).replace('\"', ""))]
Write(PathBuf, io::Error),
#[error("Landlock error: {0}")]
Landlock(String),
}

/// Create an ArgParser object which contains info about the command line argument parser and
Expand Down Expand Up @@ -231,6 +234,11 @@ pub fn build_arg_parser() -> ArgParser<'static> {
.takes_value(false)
.help("Print the binary version number."),
)
.arg(Argument::new("landlock-restrict-fs").takes_value(false).help(
"Restrict the jailed process's filesystem access to the jail directory using \
the Linux Landlock LSM. Requires kernel >= 5.13. If the kernel does not \
support Landlock, the jailer will exit with an error.",
))
}

// It's called writeln_special because we have to use this rather convoluted way of writing
Expand Down
5 changes: 5 additions & 0 deletions tests/framework/jailer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class JailerContext:
chroot_base = None
daemonize = None
new_pid_ns = None
landlock = None
extra_args = None
api_socket_name = None
cgroups = None
Expand All @@ -49,6 +50,7 @@ def __init__(
netns=None,
daemonize=True,
new_pid_ns=False,
landlock=False,
cgroups=None,
resource_limits=None,
cgroup_ver=None,
Expand All @@ -70,6 +72,7 @@ def __init__(
self.netns = netns
self.daemonize = daemonize
self.new_pid_ns = new_pid_ns
self.landlock = landlock
self.extra_args = extra_args
self.api_socket_name = DEFAULT_USOCKET_NAME
self.cgroups = cgroups or []
Expand Down Expand Up @@ -108,6 +111,8 @@ def construct_param_list(self):
jailer_param_list.append("--daemonize")
if self.new_pid_ns:
jailer_param_list.append("--new-pid-ns")
if self.landlock:
jailer_param_list.append("--landlock-restrict-fs")
if self.parent_cgroup:
jailer_param_list.extend(["--parent-cgroup", str(self.parent_cgroup)])
if self.cgroup_ver:
Expand Down