From cd416037f7e4ecdceb7d1f87919419854a975b95 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Wed, 23 Aug 2023 16:47:25 +0200 Subject: [PATCH] Experiment with pivot_root in child workers This is an attempt at an improved chroot jail that doesn't require root, but still allows us to use sockets and artifacts from the host. --- Cargo.lock | 3 +- node/core/pvf/common/Cargo.toml | 1 + node/core/pvf/common/src/worker/mod.rs | 143 ++++++++++++++++------- node/core/pvf/prepare-worker/src/lib.rs | 84 ++++++------- node/core/pvf/src/execute/worker_intf.rs | 6 +- node/core/pvf/src/prepare/worker_intf.rs | 20 +++- node/core/pvf/src/worker_intf.rs | 37 +++++- node/core/pvf/tests/it/main.rs | 2 +- node/core/pvf/tests/it/worker_common.rs | 10 +- 9 files changed, 206 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 033ed8d58e9f..27c1abaf8935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6601,7 +6601,7 @@ dependencies = [ "frame-support", "frame-system", "log", - "parity-scale-codec 3.6.4", + "parity-scale-codec", "scale-info", "sp-arithmetic", "sp-core", @@ -7934,6 +7934,7 @@ dependencies = [ "parity-scale-codec", "polkadot-parachain", "polkadot-primitives", + "rand 0.8.5", "sc-executor", "sc-executor-common", "sc-executor-wasmtime", diff --git a/node/core/pvf/common/Cargo.toml b/node/core/pvf/common/Cargo.toml index dfb490455b3d..8de983180851 100644 --- a/node/core/pvf/common/Cargo.toml +++ b/node/core/pvf/common/Cargo.toml @@ -29,6 +29,7 @@ sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master [target.'cfg(target_os = "linux")'.dependencies] landlock = "0.2.0" +rand = "0.8.5" [dev-dependencies] assert_matches = "1.4.0" diff --git a/node/core/pvf/common/src/worker/mod.rs b/node/core/pvf/common/src/worker/mod.rs index 08d5fcca4c1c..e40c03dcf7ae 100644 --- a/node/core/pvf/common/src/worker/mod.rs +++ b/node/core/pvf/common/src/worker/mod.rs @@ -117,52 +117,12 @@ pub fn worker_event_loop( let worker_pid = std::process::id(); gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id); - // Change root to be the artifact directory. - // - // SAFETY: TODO - #[cfg(target_os = "linux")] - { - use std::ffi::CString; - - let cache_path_c = CString::new(cache_path).unwrap(); - let root_relative_c = CString::new(".").unwrap(); - let oldroot_relative_c = CString::new("./oldroot").unwrap(); - let root_absolute_c = CString::new("/").unwrap(); - let oldroot_absolute_c = CString::new("/oldroot").unwrap(); - - unsafe { - // 1. `unshare` the user and the mount namespaces. - libc::unshare(libc::CLONE_NEWUSER); - libc::unshare(libc::CLONE_NEWNS); - - // 2. `pivot_root` to the artifact directory. - libc::chdir(cache_path_c.as_ptr()); - libc::mount( - root_relative_c.as_ptr(), - root_relative_c.as_ptr(), - std::ptr::null(), // ignored when MS_BIND is used - libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC, - std::ptr::null(), // ignored when MS_BIND is used - ); - libc::mkdir(oldroot_relative_c.as_ptr(), 0755); - libc::syscall( - libc::SYS_pivot_root, - root_relative_c.as_ptr(), - oldroot_relative_c.as_ptr(), - ); - - // 3. Change to the new root, `unmount2` and remove the old root. - libc::chdir(root_absolute_c.as_ptr()); - libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH); - libc::rmdir(oldroot_absolute_c.as_ptr()); - } - } - // Check for a mismatch between the node and worker versions. if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) { if node_version != worker_version { gum::error!( target: LOG_TARGET, + %debug_id, %worker_pid, %node_version, %worker_version, @@ -175,8 +135,28 @@ pub fn worker_event_loop( } } + #[cfg(target_os = "linux")] + { + if let Err(err_ctx) = change_root(cache_path) { + let err = io::Error::last_os_error(); + gum::error!( + target: LOG_TARGET, + %debug_id, + %worker_pid, + %err_ctx, + ?cache_path, + "Could not change root to be the cache path: {}", + err + ); + worker_shutdown_message(debug_id, worker_pid, err); + return + } + } + remove_env_vars(debug_id); + gum::info!(target: LOG_TARGET, "5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + // Run the main worker loop. let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it."); let err = rt @@ -199,6 +179,87 @@ pub fn worker_event_loop( rt.shutdown_background(); } +/// Change root to be the artifact directory. +#[cfg(target_os = "linux")] +fn change_root(cache_path: &Path) -> Result<(), &'static str> { + use rand::{distributions::Alphanumeric, Rng}; + use std::{ffi::CString, os::unix::ffi::OsStrExt, ptr}; + + const RANDOM_LEN: usize = 10; + let mut buf = Vec::with_capacity(RANDOM_LEN); + buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(RANDOM_LEN)); + let s = std::str::from_utf8(&buf) + .expect("the string is collected from a valid utf-8 sequence; qed"); + + let cache_path_str = match cache_path.to_str() { + Some(s) => s, + None => return Err("cache path is not valid UTF-8") + }; + let cache_path_c = CString::new(cache_path.as_os_str().as_bytes()).unwrap(); + let root_absolute_c = CString::new("/").unwrap(); + // Append a random string to prevent races and to avoid dealing with the dir already existing. + let oldroot_relative_c = CString::new(format!("{}/oldroot-{}", cache_path_str, s)).unwrap(); + let oldroot_absolute_c = CString::new(format!("/oldroot-{}", s)).unwrap(); + + // SAFETY: TODO + unsafe { + // 1. `unshare` the user and the mount namespaces. + if libc::unshare(libc::CLONE_NEWUSER) < 0 { + return Err("unshare user namespace") + } + if libc::unshare(libc::CLONE_NEWNS) < 0 { + return Err("unshare mount namespace") + } + + // 2. `pivot_root` to the artifact directory. + gum::info!(target: LOG_TARGET, "1. {:?}", std::env::current_dir()); + gum::info!(target: LOG_TARGET, "1.5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + // TODO: Ensure that 'new_root' and its parent mount don't have shared propagation. + if libc::mount(ptr::null(), root_absolute_c.as_ptr(), ptr::null(), libc::MS_REC | libc::MS_PRIVATE, ptr::null()) < 0 { + return Err("mount MS_PRIVATE") + } + if libc::mount( + cache_path_c.as_ptr(), + cache_path_c.as_ptr(), + ptr::null(), // ignored when MS_BIND is used + libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC, + ptr::null(), // ignored when MS_BIND is used + ) < 0 + { + return Err("mount MS_BIND") + } + if libc::mkdir(oldroot_relative_c.as_ptr(), 0755) < 0 { + return Err("mkdir oldroot") + } + if libc::syscall( + libc::SYS_pivot_root, + cache_path_c.as_ptr(), + oldroot_relative_c.as_ptr(), + ) < 0 + { + return Err("pivot_root") + } + + // 3. Change to the new root, `unmount2` and remove the old root. + if libc::chdir(root_absolute_c.as_ptr()) < 0 { + return Err("chdir to new root") + } + gum::info!(target: LOG_TARGET, "2. {:?}", std::env::current_dir()); + gum::info!(target: LOG_TARGET, "3. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + if libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH) < 0 { + return Err("umount2 the oldroot") + } + if libc::rmdir(oldroot_absolute_c.as_ptr()) < 0 { + return Err("rmdir the oldroot") + } + gum::info!(target: LOG_TARGET, "4. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + + // TODO: do some assertions + } + + Ok(()) +} + /// Delete all env vars to prevent malicious code from accessing them. fn remove_env_vars(debug_id: &'static str) { for (key, value) in std::env::vars_os() { diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index b0780c5fe4bd..8eea18edb449 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -150,46 +150,50 @@ pub fn worker_entrypoint( |mut stream| async move { let worker_pid = std::process::id(); + gum::info!(target: LOG_TARGET, "10. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + let Handshake { landlock_enabled } = recv_handshake(&mut stream).await?; + gum::info!(target: LOG_TARGET, "11. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + // Try to enable landlock. - { - #[cfg(target_os = "linux")] - let landlock_status = { - use polkadot_node_core_pvf_common::worker::security::landlock::{ - path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI, - }; - - // Allow an exception for writing to the artifact cache, with no allowance for - // listing the directory contents. Since we prepend artifact names with a random - // hash, this means attackers can't discover artifacts apart from the current - // job. - try_restrict(path_beneath_rules( - &[cache_path], - AccessFs::from_write(LANDLOCK_ABI), - )) - .map(LandlockStatus::from_ruleset_status) - .map_err(|e| e.to_string()) - }; - #[cfg(not(target_os = "linux"))] - let landlock_status: Result = Ok(LandlockStatus::NotEnforced); - - // Error if the host determined that landlock is fully enabled and we couldn't fully - // enforce it here. - if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced)) - { - gum::warn!( - target: LOG_TARGET, - %worker_pid, - "could not fully enable landlock: {:?}", - landlock_status - ); - return Err(io::Error::new( - io::ErrorKind::Other, - format!("could not fully enable landlock: {:?}", landlock_status), - )) - } - } + // { + // #[cfg(target_os = "linux")] + // let landlock_status = { + // use polkadot_node_core_pvf_common::worker::security::landlock::{ + // path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI, + // }; + + // // Allow an exception for writing to the artifact cache, with no allowance for + // // listing the directory contents. Since we prepend artifact names with a random + // // hash, this means attackers can't discover artifacts apart from the current + // // job. + // try_restrict(path_beneath_rules( + // &[cache_path], + // AccessFs::from_write(LANDLOCK_ABI), + // )) + // .map(LandlockStatus::from_ruleset_status) + // .map_err(|e| e.to_string()) + // }; + // #[cfg(not(target_os = "linux"))] + // let landlock_status: Result = Ok(LandlockStatus::NotEnforced); + + // // Error if the host determined that landlock is fully enabled and we couldn't fully + // // enforce it here. + // if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced)) + // { + // gum::warn!( + // target: LOG_TARGET, + // %worker_pid, + // "could not fully enable landlock: {:?}", + // landlock_status + // ); + // return Err(io::Error::new( + // io::ErrorKind::Other, + // format!("could not fully enable landlock: {:?}", landlock_status), + // )) + // } + // } loop { let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?; @@ -199,9 +203,9 @@ pub fn worker_entrypoint( "worker: preparing artifact", ); - if !temp_artifact_dest.starts_with(cache_path) { - return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}"))) - } + // if !temp_artifact_dest.starts_with(cache_path) { + // return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}"))) + // } let preparation_timeout = pvf.prep_timeout(); let prepare_job_kind = pvf.prep_kind(); diff --git a/node/core/pvf/src/execute/worker_intf.rs b/node/core/pvf/src/execute/worker_intf.rs index 77b7393fa71d..6e39c6a3eb9e 100644 --- a/node/core/pvf/src/execute/worker_intf.rs +++ b/node/core/pvf/src/execute/worker_intf.rs @@ -49,17 +49,17 @@ pub async fn spawn( cache_path: &Path, landlock_enabled: bool, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let cache_path = match cache_path.to_str() { + let cache_path_str = match cache_path.to_str() { Some(a) => a, None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())), }; - let mut extra_args = vec!["execute-worker", "--cache-path", cache_path]; + let mut extra_args = vec!["execute-worker", "--cache-path", cache_path_str]; if let Some(node_version) = node_version { extra_args.extend_from_slice(&["--node-impl-version", node_version]); } let (mut idle_worker, worker_handle) = - spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?; + spawn_with_program_path("execute", program_path, Some(cache_path), &extra_args, spawn_timeout).await?; send_handshake(&mut idle_worker.stream, Handshake { executor_params, landlock_enabled }) .await .map_err(|error| { diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index 49ee145b6cbc..52fb8b75a1f5 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -49,17 +49,23 @@ pub async fn spawn( cache_path: &Path, landlock_enabled: bool, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let cache_path = match cache_path.to_str() { + let cache_path_str = match cache_path.to_str() { Some(a) => a, None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())), }; - let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path]; + let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path_str]; if let Some(node_version) = node_version { extra_args.extend_from_slice(&["--node-impl-version", node_version]); } - let (mut idle_worker, worker_handle) = - spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await?; + let (mut idle_worker, worker_handle) = spawn_with_program_path( + "prepare", + program_path, + Some(cache_path), + &extra_args, + spawn_timeout, + ) + .await?; send_handshake(&mut idle_worker.stream, Handshake { landlock_enabled }) .await .map_err(|error| { @@ -117,6 +123,12 @@ pub async fn start_work( ); with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move { + // Linux: Pass the socket path relative to the cache_path (what the child thinks is root). + #[cfg(target_os = "linux")] + let tmp_file = Path::new(".").with_file_name( + tmp_file.file_name().expect("tmp files are created with a filename; qed"), + ); + let preparation_timeout = pvf.prep_timeout(); if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await { gum::warn!( diff --git a/node/core/pvf/src/worker_intf.rs b/node/core/pvf/src/worker_intf.rs index c5061b3e5251..81fe79ae4349 100644 --- a/node/core/pvf/src/worker_intf.rs +++ b/node/core/pvf/src/worker_intf.rs @@ -46,6 +46,9 @@ pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4; /// /// - `program_path`: The path to the program. /// +/// - `socket_dir_path`: An optional path to the dir where the socket should be created, if `None` +/// use a temp dir. +/// /// - `extra_args`: Optional extra CLI arguments to the program. NOTE: Should only contain data /// required before the handshake, like node/worker versions for the version check. Other data /// should go through the handshake. @@ -55,11 +58,12 @@ pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4; pub async fn spawn_with_program_path( debug_id: &'static str, program_path: impl Into, + socket_dir_path: Option<&Path>, extra_args: &[&str], spawn_timeout: Duration, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { let program_path = program_path.into(); - with_transient_socket_path(debug_id, |socket_path| { + with_transient_socket_path(debug_id, socket_dir_path, |socket_path| { let socket_path = socket_path.to_owned(); let extra_args: Vec = extra_args.iter().map(|arg| arg.to_string()).collect(); @@ -121,14 +125,23 @@ pub async fn spawn_with_program_path( .await } -async fn with_transient_socket_path(debug_id: &'static str, f: F) -> Result +async fn with_transient_socket_path( + debug_id: &'static str, + socket_dir_path: Option<&Path>, + f: F, +) -> Result where F: FnOnce(&Path) -> Fut, Fut: futures::Future> + 'static, { - let socket_path = tmpfile(&format!("pvf-host-{}", debug_id)) - .await - .map_err(|_| SpawnErr::TmpFile)?; + let socket_prefix = format!("pvf-host-{}-", debug_id); + let socket_path = if let Some(socket_dir_path) = socket_dir_path { + tmpfile_in(&socket_prefix, socket_dir_path).await + } else { + tmpfile(&socket_prefix).await + } + .map_err(|_| SpawnErr::TmpFile)?; + let result = f(&socket_path).await; // Best effort to remove the socket file. Under normal circumstances the socket will be removed @@ -235,10 +248,22 @@ impl WorkerHandle { extra_args: &[String], socket_path: impl AsRef, ) -> io::Result { + // Linux: Pass the socket path relative to the cache_path (what the child thinks is root). + #[cfg(target_os = "linux")] + let socket_path = Path::new(".").with_file_name( + socket_path + .as_ref() + .file_name() + .expect("socket paths are created with a filename; qed"), + ); + // Non-Linux: We are only able to pivot-root on Linux, so pass the socket path as-is. + #[cfg(not(target_os = "linux"))] + let socket_path = socket_path.as_ref().as_os_str(); + let mut child = process::Command::new(program.as_ref()) .args(extra_args) .arg("--socket-path") - .arg(socket_path.as_ref().as_os_str()) + .arg(socket_path) .stdout(std::process::Stdio::piped()) .kill_on_drop(true) .spawn()?; diff --git a/node/core/pvf/tests/it/main.rs b/node/core/pvf/tests/it/main.rs index 72c459c2f632..0f30efefc4cd 100644 --- a/node/core/pvf/tests/it/main.rs +++ b/node/core/pvf/tests/it/main.rs @@ -258,7 +258,7 @@ async fn execute_queue_doesnt_stall_with_varying_executor_params() { #[tokio::test] async fn deleting_prepared_artifact_does_not_dispute() { let host = TestHost::new(); - let cache_dir = host.cache_dir.path().clone(); + let cache_dir = host.cache_dir.path(); let result = host .validate_candidate( diff --git a/node/core/pvf/tests/it/worker_common.rs b/node/core/pvf/tests/it/worker_common.rs index 7aa67a853ee5..9fb8be3fc08a 100644 --- a/node/core/pvf/tests/it/worker_common.rs +++ b/node/core/pvf/tests/it/worker_common.rs @@ -24,7 +24,7 @@ use crate::PUPPET_EXE; #[tokio::test] async fn spawn_immediate_exit() { let result = - spawn_with_program_path("integration-test", PUPPET_EXE, &["exit"], Duration::from_secs(2)) + spawn_with_program_path("integration-test", PUPPET_EXE, None, &["exit"], Duration::from_secs(2)) .await; assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); } @@ -32,7 +32,7 @@ async fn spawn_immediate_exit() { #[tokio::test] async fn spawn_timeout() { let result = - spawn_with_program_path("integration-test", PUPPET_EXE, &["sleep"], Duration::from_secs(2)) + spawn_with_program_path("integration-test", PUPPET_EXE, None, &["sleep"], Duration::from_secs(2)) .await; assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); } @@ -43,6 +43,7 @@ async fn should_fail_without_cache_path() { let result = spawn_with_program_path( "integration-test", PUPPET_EXE, + None, &["prepare-worker"], Duration::from_secs(2), ) @@ -53,12 +54,13 @@ async fn should_fail_without_cache_path() { #[tokio::test] async fn should_connect() { let cache_path = tempfile::tempdir().unwrap(); - let cache_path = cache_path.path().to_str().unwrap(); + let cache_path_str = cache_path.path().to_str().unwrap(); let _ = spawn_with_program_path( "integration-test", PUPPET_EXE, - &["prepare-worker", "--cache-path", cache_path], + Some(cache_path.path()), + &["prepare-worker", "--cache-path", cache_path_str], Duration::from_secs(2), ) .await