From 2b68ba15b7b5af2f59e955cb3c92c143c8fa2d5d Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Tue, 17 Sep 2024 08:59:59 +0900 Subject: [PATCH] feat: make chunk size user defined BEAKING CHANGE: all APIs updated to have min/max_encryptrable bytes passed in by the user. This allows for varying the use of the lib. --- benches/lib.rs | 10 ++- examples/basic_encryptor.rs | 8 +- src/chunk.rs | 21 +++-- src/data_map.rs | 2 +- src/lib.rs | 158 +++++++++++++++++++++++------------- src/tests.rs | 119 +++++++++++++++++++-------- tests/lib.rs | 9 +- 7 files changed, 223 insertions(+), 104 deletions(-) diff --git a/benches/lib.rs b/benches/lib.rs index e3f67a5af..275e8471e 100644 --- a/benches/lib.rs +++ b/benches/lib.rs @@ -51,6 +51,11 @@ use std::time::Duration; // https://bheisler.github.io/criterion.rs/book/analysis.html#measurement const SAMPLE_SIZE: usize = 20; +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + fn custom_criterion() -> Criterion { Criterion::default() .measurement_time(Duration::from_secs(40)) @@ -63,7 +68,8 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) { || random_bytes(bytes_len), // actual benchmark |bytes| { - let (_data_map, _encrypted_chunks) = encrypt(bytes).unwrap(); + let (_data_map, _encrypted_chunks) = + encrypt(bytes, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(); }, BatchSize::SmallInput, ); @@ -72,7 +78,7 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) { fn read(b: &mut Bencher, bytes_len: usize) { b.iter_batched( // the setup - || encrypt(random_bytes(bytes_len)).unwrap(), + || encrypt(random_bytes(bytes_len), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(), // actual benchmark |(data_map, encrypted_chunks)| { let _raw_data = decrypt_full_set(&data_map, &encrypted_chunks).unwrap(); diff --git a/examples/basic_encryptor.rs b/examples/basic_encryptor.rs index ee1fb2b49..f26d4a85b 100644 --- a/examples/basic_encryptor.rs +++ b/examples/basic_encryptor.rs @@ -89,6 +89,11 @@ fn file_name(name: XorName) -> String { string } +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + #[derive(Clone)] struct DiskBasedStorage { pub(crate) storage_path: String, @@ -147,7 +152,8 @@ async fn main() { Err(error) => return println!("{}", error), } - let (data_map, encrypted_chunks) = encrypt(Bytes::from(data)).unwrap(); + let (data_map, encrypted_chunks) = + encrypt(Bytes::from(data), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(); let result = encrypted_chunks .par_iter() diff --git a/src/chunk.rs b/src/chunk.rs index ab65225c0..af644fd9f 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -32,15 +32,20 @@ pub struct RawChunk { /// Hash all the chunks. /// Creates [num cores] batches. -pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec) { +pub(crate) fn batch_chunks( + bytes: Bytes, + min_chunk_size: usize, + max_chunk_size: usize, +) -> (usize, Vec) { let data_size = bytes.len(); - let num_chunks = get_num_chunks(data_size); + let num_chunks = get_num_chunks(data_size, min_chunk_size, max_chunk_size); let raw_chunks: Vec<_> = (0..num_chunks) .map(|index| (index, bytes.clone())) .par_bridge() .map(|(index, bytes)| { - let (start, end) = get_start_end_positions(data_size, index); + let (start, end) = + get_start_end_positions(data_size, index, min_chunk_size, max_chunk_size); let data = bytes.slice(start..end); let hash = XorName::from_content(data.as_ref()); RawChunk { index, data, hash } @@ -63,10 +68,14 @@ pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec) { } /// Calculate (start_position, end_position) for each chunk for the input file size -pub(crate) fn batch_positions(data_size: usize) -> Vec<(usize, usize)> { - let num_chunks = get_num_chunks(data_size); +pub(crate) fn batch_positions( + data_size: usize, + min_chunk_size: usize, + max_chunk_size: usize, +) -> Vec<(usize, usize)> { + let num_chunks = get_num_chunks(data_size, min_chunk_size, max_chunk_size); (0..num_chunks) - .map(|index| get_start_end_positions(data_size, index)) + .map(|index| get_start_end_positions(data_size, index, min_chunk_size, max_chunk_size)) .collect() } diff --git a/src/data_map.rs b/src/data_map.rs index c96019d0a..e099138d5 100644 --- a/src/data_map.rs +++ b/src/data_map.rs @@ -13,7 +13,7 @@ use xor_name::XorName; /// Holds the information that is required to recover the content of the encrypted file. /// This is held as a vector of `ChunkInfo`, i.e. a list of the file's chunk hashes. -/// Only files larger than 3072 bytes (3 * MIN_CHUNK_SIZE) can be self-encrypted. +/// Only files larger than 3072 bytes (3 * chunk size) can be self-encrypted. /// Smaller files will have to be batched together. #[derive(Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct DataMap(Vec); diff --git a/src/lib.rs b/src/lib.rs index 384cfd127..4b880d0e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -123,12 +123,6 @@ use xor_name::XorName; pub use bytes; pub use xor_name; -/// The minimum size (before compression) of data to be self-encrypted, defined as 3B. -pub const MIN_ENCRYPTABLE_BYTES: usize = 3 * MIN_CHUNK_SIZE; -/// The maximum size (before compression) of an individual chunk of a file, defined as 500kiB. -pub const MAX_CHUNK_SIZE: usize = 512 * 1024; -/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. -pub const MIN_CHUNK_SIZE: usize = 1; /// Controls the compression-speed vs compression-density tradeoffs. The higher the quality, the /// slower the compression. Range is 0 to 11. pub const COMPRESSION_QUALITY: i32 = 6; @@ -163,12 +157,17 @@ pub struct StreamSelfEncryptor { impl StreamSelfEncryptor { /// For encryption, return with an intialized streaming encryptor. /// If a `chunk_dir` is provided, the encrypted_chunks will be written into the specified dir as well. - pub fn encrypt_from_file(file_path: PathBuf, chunk_dir: Option) -> Result { + pub fn encrypt_from_file( + file_path: PathBuf, + chunk_dir: Option, + min_chunk_size: usize, + max_chunk_size: usize, + ) -> Result { let file = File::open(&*file_path)?; let metadata = file.metadata()?; let file_size = metadata.len(); - let batch_positions = batch_positions(file_size as usize); + let batch_positions = batch_positions(file_size as usize, min_chunk_size, max_chunk_size); Ok(StreamSelfEncryptor { file_path, @@ -350,13 +349,18 @@ impl StreamSelfDecryptor { } /// Read a file from the disk to encrypt, and output the chunks to a given output directory if presents. -pub fn encrypt_from_file(file_path: &Path, output_dir: &Path) -> Result<(DataMap, Vec)> { +pub fn encrypt_from_file( + file_path: &Path, + output_dir: &Path, + min_chunk_size: usize, + max_chunk_size: usize, +) -> Result<(DataMap, Vec)> { let mut file = File::open(file_path)?; let mut bytes = Vec::new(); let _ = file.read_to_end(&mut bytes)?; let bytes = Bytes::from(bytes); - let (data_map, encrypted_chunks) = encrypt(bytes)?; + let (data_map, encrypted_chunks) = encrypt(bytes, min_chunk_size, max_chunk_size)?; let mut chunk_names = Vec::new(); for chunk in encrypted_chunks { @@ -401,16 +405,21 @@ pub fn decrypt_from_chunk_files( /// Encrypts a set of bytes and returns the encrypted data together with /// the data map that is derived from the input data, and is used to later decrypt the encrypted data. /// Returns an error if the size is too small for self-encryption. -/// Only files larger than 3072 bytes (3 * MIN_CHUNK_SIZE) can be self-encrypted. +/// Only files larger than 3072 bytes (3 * min_chunk_size) can be self-encrypted. /// Smaller files will have to be batched together for self-encryption to work. -pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec)> { - if (MIN_ENCRYPTABLE_BYTES) > bytes.len() { +pub fn encrypt( + bytes: Bytes, + min_chunk_size: usize, + max_chunk_size: usize, +) -> Result<(DataMap, Vec)> { + let min_encryptable_bytes = 3 * min_chunk_size; + if (min_encryptable_bytes) > bytes.len() { return Err(Error::Generic(format!( "Too small for self-encryption! Required size at least {}", - MIN_ENCRYPTABLE_BYTES + min_encryptable_bytes ))); } - let (num_chunks, batches) = chunk::batch_chunks(bytes); + let (num_chunks, batches) = chunk::batch_chunks(bytes, min_chunk_size, max_chunk_size); let (data_map, encrypted_chunks) = encrypt::encrypt(batches); if num_chunks > encrypted_chunks.len() { return Err(Error::Encryption); @@ -480,13 +489,20 @@ pub struct SeekInfo { /// It is used to first fetch chunks using the `index_range`. /// Then the chunks are passed into `self_encryption::decrypt_range` together /// with `relative_pos` from the `SeekInfo` instance, and the `len` to be read. -pub fn seek_info(file_size: usize, pos: usize, len: usize) -> SeekInfo { - let (start_index, end_index) = overlapped_chunks(file_size, pos, len); - - let relative_pos = if start_index == 2 && file_size < 3 * MAX_CHUNK_SIZE { - pos - (2 * get_chunk_size(file_size, 0)) +pub fn seek_info( + file_size: usize, + pos: usize, + len: usize, + min_chunk_size: usize, + max_chunk_size: usize, +) -> SeekInfo { + let (start_index, end_index) = + overlapped_chunks(file_size, pos, len, min_chunk_size, max_chunk_size); + + let relative_pos = if start_index == 2 && file_size < 3 * max_chunk_size { + pos - (2 * get_chunk_size(file_size, 0, min_chunk_size, max_chunk_size)) } else { - pos % get_chunk_size(file_size, start_index) + pos % get_chunk_size(file_size, start_index, min_chunk_size, max_chunk_size) }; SeekInfo { @@ -501,9 +517,15 @@ pub fn seek_info(file_size: usize, pos: usize, len: usize) -> SeekInfo { /// Returns the chunk index range [start, end) that is overlapped by the byte range defined by `pos` /// and `len`. Returns empty range if `file_size` is so small that there are no chunks. -fn overlapped_chunks(file_size: usize, pos: usize, len: usize) -> (usize, usize) { +fn overlapped_chunks( + file_size: usize, + pos: usize, + len: usize, + min_chunk_size: usize, + max_chunk_size: usize, +) -> (usize, usize) { // FIX THIS SHOULD NOT BE ALLOWED - if file_size < (3 * MIN_CHUNK_SIZE) || pos >= file_size || len == 0 { + if file_size < (3 * min_chunk_size) || pos >= file_size || len == 0 { return (0, 0); } @@ -513,8 +535,8 @@ fn overlapped_chunks(file_size: usize, pos: usize, len: usize) -> (usize, usize) None => file_size, }; - let start_index = get_chunk_index(file_size, pos); - let end_index = get_chunk_index(file_size, end); + let start_index = get_chunk_index(file_size, pos, min_chunk_size, max_chunk_size); + let end_index = get_chunk_index(file_size, end, min_chunk_size, max_chunk_size); (start_index, end_index) } @@ -561,26 +583,31 @@ fn get_pki(src_hash: &XorName, n_1_src_hash: &XorName, n_2_src_hash: &XorName) - } // Returns the number of chunks according to file size. -fn get_num_chunks(file_size: usize) -> usize { - if file_size < (3 * MIN_CHUNK_SIZE) { +fn get_num_chunks(file_size: usize, min_chunk_size: usize, max_chunk_size: usize) -> usize { + if file_size < (3 * min_chunk_size) { return 0; } - if file_size < (3 * MAX_CHUNK_SIZE) { + if file_size < (3 * max_chunk_size) { return 3; } - if file_size % MAX_CHUNK_SIZE == 0 { - file_size / MAX_CHUNK_SIZE + if file_size % max_chunk_size == 0 { + file_size / max_chunk_size } else { - (file_size / MAX_CHUNK_SIZE) + 1 + (file_size / max_chunk_size) + 1 } } -// Returns the size of a chunk according to file size. -fn get_chunk_size(file_size: usize, chunk_index: usize) -> usize { - if file_size < 3 * MIN_CHUNK_SIZE { +// Returns the size of a chunk according to file size and defined chunk sizes. +fn get_chunk_size( + file_size: usize, + chunk_index: usize, + min_chunk_size: usize, + max_chunk_size: usize, +) -> usize { + if file_size < 3 * min_chunk_size { return 0; } - if file_size < 3 * MAX_CHUNK_SIZE { + if file_size < 3 * max_chunk_size { if chunk_index < 2 { return file_size / 3; } else { @@ -588,63 +615,82 @@ fn get_chunk_size(file_size: usize, chunk_index: usize) -> usize { return file_size - (2 * (file_size / 3)); } } - let total_chunks = get_num_chunks(file_size); + let total_chunks = get_num_chunks(file_size, min_chunk_size, max_chunk_size); if chunk_index < total_chunks - 2 { - return MAX_CHUNK_SIZE; + return max_chunk_size; } - let remainder = file_size % MAX_CHUNK_SIZE; + let remainder = file_size % max_chunk_size; let penultimate = (total_chunks - 2) == chunk_index; if remainder == 0 { - return MAX_CHUNK_SIZE; + return max_chunk_size; } - if remainder < MIN_CHUNK_SIZE { + if remainder < min_chunk_size { if penultimate { - MAX_CHUNK_SIZE - MIN_CHUNK_SIZE + max_chunk_size - min_chunk_size } else { - MIN_CHUNK_SIZE + remainder + min_chunk_size + remainder } } else if penultimate { - MAX_CHUNK_SIZE + max_chunk_size } else { remainder } } // Returns the [start, end) half-open byte range of a chunk. -fn get_start_end_positions(file_size: usize, chunk_index: usize) -> (usize, usize) { - if get_num_chunks(file_size) == 0 { +fn get_start_end_positions( + file_size: usize, + chunk_index: usize, + min_chunk_size: usize, + max_chunk_size: usize, +) -> (usize, usize) { + if get_num_chunks(file_size, min_chunk_size, max_chunk_size) == 0 { return (0, 0); } - let start = get_start_position(file_size, chunk_index); - (start, start + get_chunk_size(file_size, chunk_index)) + let start = get_start_position(file_size, chunk_index, min_chunk_size, max_chunk_size); + ( + start, + start + get_chunk_size(file_size, chunk_index, min_chunk_size, max_chunk_size), + ) } -fn get_start_position(file_size: usize, chunk_index: usize) -> usize { - let total_chunks = get_num_chunks(file_size); +fn get_start_position( + file_size: usize, + chunk_index: usize, + min_chunk_size: usize, + max_chunk_size: usize, +) -> usize { + let total_chunks = get_num_chunks(file_size, min_chunk_size, max_chunk_size); if total_chunks == 0 { return 0; } let last = (total_chunks - 1) == chunk_index; - let first_chunk_size = get_chunk_size(file_size, 0); + let first_chunk_size = get_chunk_size(file_size, 0, min_chunk_size, max_chunk_size); if last { - first_chunk_size * (chunk_index - 1) + get_chunk_size(file_size, chunk_index - 1) + first_chunk_size * (chunk_index - 1) + + get_chunk_size(file_size, chunk_index - 1, min_chunk_size, max_chunk_size) } else { first_chunk_size * chunk_index } } -fn get_chunk_index(file_size: usize, position: usize) -> usize { - let num_chunks = get_num_chunks(file_size); +fn get_chunk_index( + file_size: usize, + position: usize, + min_chunk_size: usize, + max_chunk_size: usize, +) -> usize { + let num_chunks = get_num_chunks(file_size, min_chunk_size, max_chunk_size); if num_chunks == 0 { return 0; // FIX THIS SHOULD NOT BE ALLOWED } - let chunk_size = get_chunk_size(file_size, 0); + let chunk_size = get_chunk_size(file_size, 0, min_chunk_size, max_chunk_size); let remainder = file_size % chunk_size; if remainder == 0 - || remainder >= MIN_CHUNK_SIZE - || position < file_size - remainder - MIN_CHUNK_SIZE + || remainder >= min_chunk_size + || position < file_size - remainder - min_chunk_size { usize::min(position / chunk_size, num_chunks - 1) } else { diff --git a/src/tests.rs b/src/tests.rs index deaa519e0..8467103ee 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -9,7 +9,7 @@ use crate::{ decrypt_full_set, decrypt_range, encrypt, get_chunk_size, get_num_chunks, overlapped_chunks, seek_info, test_helpers::random_bytes, DataMap, EncryptedChunk, Error, StreamSelfDecryptor, - StreamSelfEncryptor, MIN_ENCRYPTABLE_BYTES, + StreamSelfEncryptor, }; use bytes::Bytes; use itertools::Itertools; @@ -20,6 +20,13 @@ use std::{ }; use tempfile::tempdir; +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + +const MIN_ENCRYPTABLE_BYTES: usize = 3 * MAX_CHUNK_SIZE; + #[test] fn test_stream_self_encryptor() -> Result<(), Error> { // Create a 10MB temporary file @@ -34,8 +41,12 @@ fn test_stream_self_encryptor() -> Result<(), Error> { create_dir_all(chunk_path.clone())?; // Encrypt the file using StreamSelfEncryptor - let mut encryptor = - StreamSelfEncryptor::encrypt_from_file(file_path, Some(chunk_path.clone()))?; + let mut encryptor = StreamSelfEncryptor::encrypt_from_file( + file_path, + Some(chunk_path.clone()), + MIN_CHUNK_SIZE, + MAX_CHUNK_SIZE, + )?; let mut encrypted_chunks = Vec::new(); let mut data_map = None; while let Ok((chunk, map)) = encryptor.next_encryption() { @@ -100,7 +111,7 @@ fn write_and_read() -> Result<(), Error> { let file_size = 10_000_000; let bytes = random_bytes(file_size); - let (data_map, encrypted_chunks) = encrypt_chunks(bytes.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(bytes.clone())?; let raw_data = decrypt_full_set(&data_map, &encrypted_chunks)?; compare(bytes, raw_data) @@ -112,20 +123,20 @@ fn seek_indices() -> Result<(), Error> { let pos = 0; let len = file_size / 2; - let info = seek_info(file_size, pos, len); + let info = seek_info(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(1, info.index_range.end); let pos = len; - let info = seek_info(file_size, pos, len); + let info = seek_info(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(512, info.relative_pos); assert_eq!(1, info.index_range.start); assert_eq!(2, info.index_range.end); - let info = seek_info(file_size, pos, len + 1); + let info = seek_info(file_size, pos, len + 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(512, info.relative_pos); assert_eq!(1, info.index_range.start); @@ -140,25 +151,25 @@ fn seek_indices_on_medium_size_file() -> Result<(), Error> { let pos = 0; let len = 131072; - let info = seek_info(file_size, pos, len); + let info = seek_info(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); - let info = seek_info(file_size, 131072, len); + let info = seek_info(file_size, 131072, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(131072, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); - let info = seek_info(file_size, 393216, len); + let info = seek_info(file_size, 393216, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(70128, info.relative_pos); assert_eq!(1, info.index_range.start); assert_eq!(1, info.index_range.end); - let info = seek_info(file_size, 655360, len); + let info = seek_info(file_size, 655360, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(9184, info.relative_pos); assert_eq!(2, info.index_range.start); @@ -172,42 +183,42 @@ fn seek_indices_on_small_size_file() -> Result<(), Error> { let file_size = 1024; // first byte of index 0 - let info = seek_info(file_size, 0, 340); + let info = seek_info(file_size, 0, 340, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); // first byte of index 1 - let info = seek_info(file_size, 341, 340); + let info = seek_info(file_size, 341, 340, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(1, info.index_range.start); assert_eq!(1, info.index_range.end); // first byte of index 2 - let info = seek_info(file_size, 682, 340); + let info = seek_info(file_size, 682, 340, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(2, info.index_range.start); assert_eq!(2, info.index_range.end); // last byte of index 2 - let info = seek_info(file_size, file_size - 1, 1); + let info = seek_info(file_size, file_size - 1, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(341, info.relative_pos); assert_eq!(2, info.index_range.start); assert_eq!(2, info.index_range.end); // overflow - should this error? - let info = seek_info(file_size, file_size, 1); + let info = seek_info(file_size, file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(1, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); // last byte of index 2 (as 2 remainders in last chunk) - let info = seek_info(file_size + 1, file_size, 1); + let info = seek_info(file_size + 1, file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(342, info.relative_pos); assert_eq!(2, info.index_range.start); @@ -220,21 +231,48 @@ fn seek_indices_on_small_size_file() -> Result<(), Error> { fn get_chunk_sizes() -> Result<(), Error> { let file_size = 969_265; - assert_eq!(323088, get_chunk_size(file_size, 0)); - assert_eq!(323088, get_chunk_size(file_size, 1)); - assert_eq!(323089, get_chunk_size(file_size, 2)); + assert_eq!( + 323088, + get_chunk_size(file_size, 0, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 323088, + get_chunk_size(file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 323089, + get_chunk_size(file_size, 2, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); let file_size = 1024; - assert_eq!(341, get_chunk_size(file_size, 0)); - assert_eq!(341, get_chunk_size(file_size, 1)); - assert_eq!(342, get_chunk_size(file_size, 2)); + assert_eq!( + 341, + get_chunk_size(file_size, 0, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 341, + get_chunk_size(file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 342, + get_chunk_size(file_size, 2, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); let file_size = 1025; - assert_eq!(341, get_chunk_size(file_size, 0)); - assert_eq!(341, get_chunk_size(file_size, 1)); - assert_eq!(343, get_chunk_size(file_size, 2)); + assert_eq!( + 341, + get_chunk_size(file_size, 0, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 341, + get_chunk_size(file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 343, + get_chunk_size(file_size, 2, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); Ok(()) } @@ -247,7 +285,7 @@ fn seek_and_join() -> Result<(), Error> { for divisor in 2..15 { let len = file_size / divisor; let data = random_bytes(file_size); - let (data_map, encrypted_chunks) = encrypt_chunks(data.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(data.clone())?; // Read first part let read_data_1 = { @@ -282,7 +320,13 @@ fn seek( len: usize, ) -> Result { let expected_data = bytes.slice(pos..(pos + len)); - let info = seek_info(data_map.file_size(), pos, len); + let info = seek_info( + data_map.file_size(), + pos, + len, + MIN_CHUNK_SIZE, + MAX_CHUNK_SIZE, + ); // select a subset of chunks; the ones covering the bytes we want to read let subset: Vec<_> = encrypted_chunks @@ -314,10 +358,11 @@ fn seek_over_chunk_limit() -> Result<(), Error> { let expected_data = bytes.slice(pos..(pos + len)); // the chunks covering the bytes we want to read - let (start_index, end_index) = overlapped_chunks(file_size, pos, len); + let (start_index, end_index) = + overlapped_chunks(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); // first encrypt the whole file - let (data_map, encrypted_chunks) = encrypt_chunks(bytes.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(bytes.clone())?; // select a subset of chunks; the ones covering the bytes we want to read let subset: Vec<_> = encrypted_chunks @@ -327,7 +372,8 @@ fn seek_over_chunk_limit() -> Result<(), Error> { .collect(); // the start position within the first chunk (thus `relative`..) - let relative_pos = pos % get_chunk_size(file_size, start_index); + let relative_pos = + pos % get_chunk_size(file_size, start_index, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); let read_data = decrypt_range(&data_map, &subset, relative_pos, len)?; compare(expected_data, read_data)?; @@ -345,10 +391,11 @@ fn seek_with_length_over_data_size() -> Result<(), Error> { let len = bytes.len() - start_pos + 1; // the chunks covering the bytes we want to read - let (start_index, end_index) = overlapped_chunks(file_size, start_pos, len); + let (start_index, end_index) = + overlapped_chunks(file_size, start_pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); // first encrypt the whole file - let (data_map, encrypted_chunks) = encrypt_chunks(bytes.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(bytes.clone())?; // select a subset of chunks; the ones covering the bytes we want to read let subset: Vec<_> = encrypted_chunks @@ -380,9 +427,9 @@ fn compare(original: Bytes, result: Bytes) -> Result<(), Error> { Ok(()) } -fn encrypt_chunks(bytes: Bytes) -> Result<(DataMap, Vec), Error> { - let num_chunks = get_num_chunks(bytes.len()); - let (data_map, encrypted_chunks) = encrypt(bytes)?; +fn test_encrypt_chunks(bytes: Bytes) -> Result<(DataMap, Vec), Error> { + let num_chunks = get_num_chunks(bytes.len(), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); + let (data_map, encrypted_chunks) = encrypt(bytes, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE)?; assert_eq!(num_chunks, encrypted_chunks.len()); diff --git a/tests/lib.rs b/tests/lib.rs index b033649ad..38519a377 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -51,9 +51,14 @@ )] use bytes::Bytes; -use self_encryption::{encrypt, ChunkInfo, Result, MAX_CHUNK_SIZE}; +use self_encryption::{encrypt, ChunkInfo, Result}; use xor_name::XorName; +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + #[tokio::test] async fn cross_platform_check() -> Result<()> { let content_size: usize = 20 * MAX_CHUNK_SIZE + 100; @@ -62,7 +67,7 @@ async fn cross_platform_check() -> Result<()> { *c = (i % 17) as u8; } - let (data_map, _) = encrypt(Bytes::from(content))?; + let (data_map, _) = encrypt(Bytes::from(content), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE)?; // (NB: this hard-coded ref needs update if algorithm changes) let ref_data_map = vec![