Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Commit

Permalink
Experiment with pivot_root in child workers
Browse files Browse the repository at this point in the history
This is an attempt at an improved chroot jail that doesn't require root, but
still allows us to use sockets and artifacts from the host.
  • Loading branch information
mrcnski committed Aug 23, 2023
1 parent 8807302 commit cd41603
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 100 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions node/core/pvf/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master

[target.'cfg(target_os = "linux")'.dependencies]
landlock = "0.2.0"
rand = "0.8.5"

[dev-dependencies]
assert_matches = "1.4.0"
Expand Down
143 changes: 102 additions & 41 deletions node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,52 +117,12 @@ pub fn worker_event_loop<F, Fut>(
let worker_pid = std::process::id();
gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id);

// Change root to be the artifact directory.
//
// SAFETY: TODO
#[cfg(target_os = "linux")]
{
use std::ffi::CString;

let cache_path_c = CString::new(cache_path).unwrap();
let root_relative_c = CString::new(".").unwrap();
let oldroot_relative_c = CString::new("./oldroot").unwrap();
let root_absolute_c = CString::new("/").unwrap();
let oldroot_absolute_c = CString::new("/oldroot").unwrap();

unsafe {
// 1. `unshare` the user and the mount namespaces.
libc::unshare(libc::CLONE_NEWUSER);
libc::unshare(libc::CLONE_NEWNS);

// 2. `pivot_root` to the artifact directory.
libc::chdir(cache_path_c.as_ptr());
libc::mount(
root_relative_c.as_ptr(),
root_relative_c.as_ptr(),
std::ptr::null(), // ignored when MS_BIND is used
libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC,
std::ptr::null(), // ignored when MS_BIND is used
);
libc::mkdir(oldroot_relative_c.as_ptr(), 0755);
libc::syscall(
libc::SYS_pivot_root,
root_relative_c.as_ptr(),
oldroot_relative_c.as_ptr(),
);

// 3. Change to the new root, `unmount2` and remove the old root.
libc::chdir(root_absolute_c.as_ptr());
libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH);
libc::rmdir(oldroot_absolute_c.as_ptr());
}
}

// Check for a mismatch between the node and worker versions.
if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) {
if node_version != worker_version {
gum::error!(
target: LOG_TARGET,
%debug_id,
%worker_pid,
%node_version,
%worker_version,
Expand All @@ -175,8 +135,28 @@ pub fn worker_event_loop<F, Fut>(
}
}

#[cfg(target_os = "linux")]
{
if let Err(err_ctx) = change_root(cache_path) {
let err = io::Error::last_os_error();
gum::error!(
target: LOG_TARGET,
%debug_id,
%worker_pid,
%err_ctx,
?cache_path,
"Could not change root to be the cache path: {}",
err
);
worker_shutdown_message(debug_id, worker_pid, err);
return
}
}

remove_env_vars(debug_id);

gum::info!(target: LOG_TARGET, "5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

// Run the main worker loop.
let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
let err = rt
Expand All @@ -199,6 +179,87 @@ pub fn worker_event_loop<F, Fut>(
rt.shutdown_background();
}

/// Change root to be the artifact directory.
#[cfg(target_os = "linux")]
fn change_root(cache_path: &Path) -> Result<(), &'static str> {
use rand::{distributions::Alphanumeric, Rng};
use std::{ffi::CString, os::unix::ffi::OsStrExt, ptr};

const RANDOM_LEN: usize = 10;
let mut buf = Vec::with_capacity(RANDOM_LEN);
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(RANDOM_LEN));
let s = std::str::from_utf8(&buf)
.expect("the string is collected from a valid utf-8 sequence; qed");

let cache_path_str = match cache_path.to_str() {
Some(s) => s,
None => return Err("cache path is not valid UTF-8")
};
let cache_path_c = CString::new(cache_path.as_os_str().as_bytes()).unwrap();
let root_absolute_c = CString::new("/").unwrap();
// Append a random string to prevent races and to avoid dealing with the dir already existing.
let oldroot_relative_c = CString::new(format!("{}/oldroot-{}", cache_path_str, s)).unwrap();
let oldroot_absolute_c = CString::new(format!("/oldroot-{}", s)).unwrap();

// SAFETY: TODO
unsafe {
// 1. `unshare` the user and the mount namespaces.
if libc::unshare(libc::CLONE_NEWUSER) < 0 {
return Err("unshare user namespace")
}
if libc::unshare(libc::CLONE_NEWNS) < 0 {
return Err("unshare mount namespace")
}

// 2. `pivot_root` to the artifact directory.
gum::info!(target: LOG_TARGET, "1. {:?}", std::env::current_dir());
gum::info!(target: LOG_TARGET, "1.5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());
// TODO: Ensure that 'new_root' and its parent mount don't have shared propagation.
if libc::mount(ptr::null(), root_absolute_c.as_ptr(), ptr::null(), libc::MS_REC | libc::MS_PRIVATE, ptr::null()) < 0 {
return Err("mount MS_PRIVATE")
}
if libc::mount(
cache_path_c.as_ptr(),
cache_path_c.as_ptr(),
ptr::null(), // ignored when MS_BIND is used
libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC,
ptr::null(), // ignored when MS_BIND is used
) < 0
{
return Err("mount MS_BIND")
}
if libc::mkdir(oldroot_relative_c.as_ptr(), 0755) < 0 {
return Err("mkdir oldroot")
}
if libc::syscall(
libc::SYS_pivot_root,
cache_path_c.as_ptr(),
oldroot_relative_c.as_ptr(),
) < 0
{
return Err("pivot_root")
}

// 3. Change to the new root, `unmount2` and remove the old root.
if libc::chdir(root_absolute_c.as_ptr()) < 0 {
return Err("chdir to new root")
}
gum::info!(target: LOG_TARGET, "2. {:?}", std::env::current_dir());
gum::info!(target: LOG_TARGET, "3. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());
if libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH) < 0 {
return Err("umount2 the oldroot")
}
if libc::rmdir(oldroot_absolute_c.as_ptr()) < 0 {
return Err("rmdir the oldroot")
}
gum::info!(target: LOG_TARGET, "4. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

// TODO: do some assertions
}

Ok(())
}

/// Delete all env vars to prevent malicious code from accessing them.
fn remove_env_vars(debug_id: &'static str) {
for (key, value) in std::env::vars_os() {
Expand Down
84 changes: 44 additions & 40 deletions node/core/pvf/prepare-worker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,46 +150,50 @@ pub fn worker_entrypoint(
|mut stream| async move {
let worker_pid = std::process::id();

gum::info!(target: LOG_TARGET, "10. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

let Handshake { landlock_enabled } = recv_handshake(&mut stream).await?;

gum::info!(target: LOG_TARGET, "11. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

// Try to enable landlock.
{
#[cfg(target_os = "linux")]
let landlock_status = {
use polkadot_node_core_pvf_common::worker::security::landlock::{
path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI,
};

// Allow an exception for writing to the artifact cache, with no allowance for
// listing the directory contents. Since we prepend artifact names with a random
// hash, this means attackers can't discover artifacts apart from the current
// job.
try_restrict(path_beneath_rules(
&[cache_path],
AccessFs::from_write(LANDLOCK_ABI),
))
.map(LandlockStatus::from_ruleset_status)
.map_err(|e| e.to_string())
};
#[cfg(not(target_os = "linux"))]
let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);

// Error if the host determined that landlock is fully enabled and we couldn't fully
// enforce it here.
if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced))
{
gum::warn!(
target: LOG_TARGET,
%worker_pid,
"could not fully enable landlock: {:?}",
landlock_status
);
return Err(io::Error::new(
io::ErrorKind::Other,
format!("could not fully enable landlock: {:?}", landlock_status),
))
}
}
// {
// #[cfg(target_os = "linux")]
// let landlock_status = {
// use polkadot_node_core_pvf_common::worker::security::landlock::{
// path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI,
// };

// // Allow an exception for writing to the artifact cache, with no allowance for
// // listing the directory contents. Since we prepend artifact names with a random
// // hash, this means attackers can't discover artifacts apart from the current
// // job.
// try_restrict(path_beneath_rules(
// &[cache_path],
// AccessFs::from_write(LANDLOCK_ABI),
// ))
// .map(LandlockStatus::from_ruleset_status)
// .map_err(|e| e.to_string())
// };
// #[cfg(not(target_os = "linux"))]
// let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);

// // Error if the host determined that landlock is fully enabled and we couldn't fully
// // enforce it here.
// if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced))
// {
// gum::warn!(
// target: LOG_TARGET,
// %worker_pid,
// "could not fully enable landlock: {:?}",
// landlock_status
// );
// return Err(io::Error::new(
// io::ErrorKind::Other,
// format!("could not fully enable landlock: {:?}", landlock_status),
// ))
// }
// }

loop {
let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?;
Expand All @@ -199,9 +203,9 @@ pub fn worker_entrypoint(
"worker: preparing artifact",
);

if !temp_artifact_dest.starts_with(cache_path) {
return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}")))
}
// if !temp_artifact_dest.starts_with(cache_path) {
// return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}")))
// }

let preparation_timeout = pvf.prep_timeout();
let prepare_job_kind = pvf.prep_kind();
Expand Down
6 changes: 3 additions & 3 deletions node/core/pvf/src/execute/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,17 @@ pub async fn spawn(
cache_path: &Path,
landlock_enabled: bool,
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
let cache_path = match cache_path.to_str() {
let cache_path_str = match cache_path.to_str() {
Some(a) => a,
None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())),
};
let mut extra_args = vec!["execute-worker", "--cache-path", cache_path];
let mut extra_args = vec!["execute-worker", "--cache-path", cache_path_str];
if let Some(node_version) = node_version {
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
}

let (mut idle_worker, worker_handle) =
spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?;
spawn_with_program_path("execute", program_path, Some(cache_path), &extra_args, spawn_timeout).await?;
send_handshake(&mut idle_worker.stream, Handshake { executor_params, landlock_enabled })
.await
.map_err(|error| {
Expand Down
20 changes: 16 additions & 4 deletions node/core/pvf/src/prepare/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,23 @@ pub async fn spawn(
cache_path: &Path,
landlock_enabled: bool,
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
let cache_path = match cache_path.to_str() {
let cache_path_str = match cache_path.to_str() {
Some(a) => a,
None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())),
};
let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path];
let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path_str];
if let Some(node_version) = node_version {
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
}

let (mut idle_worker, worker_handle) =
spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await?;
let (mut idle_worker, worker_handle) = spawn_with_program_path(
"prepare",
program_path,
Some(cache_path),
&extra_args,
spawn_timeout,
)
.await?;
send_handshake(&mut idle_worker.stream, Handshake { landlock_enabled })
.await
.map_err(|error| {
Expand Down Expand Up @@ -117,6 +123,12 @@ pub async fn start_work(
);

with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move {
// Linux: Pass the socket path relative to the cache_path (what the child thinks is root).
#[cfg(target_os = "linux")]
let tmp_file = Path::new(".").with_file_name(
tmp_file.file_name().expect("tmp files are created with a filename; qed"),
);

let preparation_timeout = pvf.prep_timeout();
if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await {
gum::warn!(
Expand Down
Loading

0 comments on commit cd41603

Please sign in to comment.