Skip to content

Commit

Permalink
feat: EXC-1567: Add charging for take and load canister snapshot (#811)
Browse files Browse the repository at this point in the history
This PR updates the canister manager to charge canisters that are
executing `take_canister_snapshot` or `load_canister_snapshot`.

Closes EXC-1567

---------

Co-authored-by: Andriy Berestovskyy <[email protected]>
  • Loading branch information
AlexandraZapuc and berestovskyy authored Aug 12, 2024
1 parent 9356151 commit 85f58e9
Show file tree
Hide file tree
Showing 10 changed files with 399 additions and 71 deletions.
13 changes: 13 additions & 0 deletions rs/config/src/subnet_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ const DEFAULT_RESERVED_BALANCE_LIMIT: Cycles = Cycles::new(5 * T);
/// 1/10th of a round.
pub const DEFAULT_UPLOAD_CHUNK_INSTRUCTIONS: NumInstructions = NumInstructions::new(200_000_000);

/// Baseline cost for creating or loading a canister snapshot (2B instructions).
/// The cost is based on the benchmarks: rs/execution_environment/benches/management_canister/
pub const DEFAULT_CANISTERS_SNAPSHOT_BASELINE_INSTRUCTIONS: NumInstructions =
NumInstructions::new(2_000_000_000);

/// The per subnet type configuration for the scheduler component
#[derive(Clone, Serialize, Deserialize)]
pub struct SchedulerConfig {
Expand Down Expand Up @@ -259,6 +264,9 @@ pub struct SchedulerConfig {

/// Number of instructions to count when uploading a chunk to the wasm store.
pub upload_wasm_chunk_instructions: NumInstructions,

/// Number of instructions to count when creating or loading a canister snapshot.
pub canister_snapshot_baseline_instructions: NumInstructions,
}

impl SchedulerConfig {
Expand Down Expand Up @@ -286,6 +294,8 @@ impl SchedulerConfig {
dirty_page_overhead: DEFAULT_DIRTY_PAGE_OVERHEAD,
accumulated_priority_reset_interval: ACCUMULATED_PRIORITY_RESET_INTERVAL,
upload_wasm_chunk_instructions: DEFAULT_UPLOAD_CHUNK_INSTRUCTIONS,
canister_snapshot_baseline_instructions:
DEFAULT_CANISTERS_SNAPSHOT_BASELINE_INSTRUCTIONS,
}
}

Expand Down Expand Up @@ -328,6 +338,7 @@ impl SchedulerConfig {
dirty_page_overhead: SYSTEM_SUBNET_DIRTY_PAGE_OVERHEAD,
accumulated_priority_reset_interval: ACCUMULATED_PRIORITY_RESET_INTERVAL,
upload_wasm_chunk_instructions: NumInstructions::from(0),
canister_snapshot_baseline_instructions: NumInstructions::from(0),
}
}

Expand Down Expand Up @@ -358,6 +369,8 @@ impl SchedulerConfig {
dirty_page_overhead: DEFAULT_DIRTY_PAGE_OVERHEAD,
accumulated_priority_reset_interval: ACCUMULATED_PRIORITY_RESET_INTERVAL,
upload_wasm_chunk_instructions: DEFAULT_UPLOAD_CHUNK_INSTRUCTIONS,
canister_snapshot_baseline_instructions:
DEFAULT_CANISTERS_SNAPSHOT_BASELINE_INSTRUCTIONS,
}
}

Expand Down
3 changes: 3 additions & 0 deletions rs/execution_environment/benches/lib/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,9 @@ where
subnet_configs
.scheduler_config
.upload_wasm_chunk_instructions,
subnet_configs
.scheduler_config
.canister_snapshot_baseline_instructions,
);
for Benchmark(id, wat, expected_ops) in benchmarks {
run_benchmark(
Expand Down
197 changes: 161 additions & 36 deletions rs/execution_environment/src/canister_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ pub(crate) struct CanisterMgrConfig {
heap_delta_rate_limit: NumBytes,
upload_wasm_chunk_instructions: NumInstructions,
wasm_chunk_store_max_size: NumBytes,
canister_snapshot_baseline_instructions: NumInstructions,
}

impl CanisterMgrConfig {
Expand All @@ -143,6 +144,7 @@ impl CanisterMgrConfig {
heap_delta_rate_limit: NumBytes,
upload_wasm_chunk_instructions: NumInstructions,
wasm_chunk_store_max_size: NumBytes,
canister_snapshot_baseline_instructions: NumInstructions,
) -> Self {
Self {
subnet_memory_capacity,
Expand All @@ -159,6 +161,7 @@ impl CanisterMgrConfig {
heap_delta_rate_limit,
upload_wasm_chunk_instructions,
wasm_chunk_store_max_size,
canister_snapshot_baseline_instructions,
}
}
}
Expand Down Expand Up @@ -1767,7 +1770,7 @@ impl CanisterManager {
/// and delete it before creating a new one.
/// Failure to do so will result in the creation of a new snapshot being unsuccessful.
///
/// If the new snapshot cannot be created, an appropiate error will be returned.
/// If the new snapshot cannot be created, an appropriate error will be returned.
pub(crate) fn take_canister_snapshot(
&self,
subnet_size: usize,
Expand All @@ -1777,28 +1780,39 @@ impl CanisterManager {
state: &mut ReplicatedState,
round_limits: &mut RoundLimits,
resource_saturation: &ResourceSaturation,
) -> Result<CanisterSnapshotResponse, CanisterManagerError> {
) -> (
Result<CanisterSnapshotResponse, CanisterManagerError>,
NumInstructions,
) {
// Check sender is a controller.
validate_controller(canister, &sender)?;
if let Err(err) = validate_controller(canister, &sender) {
return (Err(err), NumInstructions::new(0));
};

match replace_snapshot {
// Check that replace snapshot ID exists if provided.
Some(replace_snapshot) => {
match state.canister_snapshots.get(replace_snapshot) {
None => {
// If not found, the operation fails due to invalid parameters.
return Err(CanisterManagerError::CanisterSnapshotNotFound {
canister_id: canister.canister_id(),
snapshot_id: replace_snapshot,
});
return (
Err(CanisterManagerError::CanisterSnapshotNotFound {
canister_id: canister.canister_id(),
snapshot_id: replace_snapshot,
}),
NumInstructions::new(0),
);
}
Some(snapshot) => {
// Verify the provided replacement snapshot belongs to this canister.
if snapshot.canister_id() != canister.canister_id() {
return Err(CanisterManagerError::CanisterSnapshotInvalidOwnership {
canister_id: canister.canister_id(),
snapshot_id: replace_snapshot,
});
return (
Err(CanisterManagerError::CanisterSnapshotInvalidOwnership {
canister_id: canister.canister_id(),
snapshot_id: replace_snapshot,
}),
NumInstructions::new(0),
);
}
}
}
Expand All @@ -1811,22 +1825,28 @@ impl CanisterManager {
.snapshots_count(&canister.canister_id())
>= MAX_NUMBER_OF_SNAPSHOTS_PER_CANISTER
{
return Err(CanisterManagerError::CanisterSnapshotLimitExceeded {
canister_id: canister.canister_id(),
limit: MAX_NUMBER_OF_SNAPSHOTS_PER_CANISTER,
});
return (
Err(CanisterManagerError::CanisterSnapshotLimitExceeded {
canister_id: canister.canister_id(),
limit: MAX_NUMBER_OF_SNAPSHOTS_PER_CANISTER,
}),
NumInstructions::new(0),
);
}
}
}

if self.config.rate_limiting_of_heap_delta == FlagStatus::Enabled
&& canister.scheduler_state.heap_delta_debit >= self.config.heap_delta_rate_limit
{
return Err(CanisterManagerError::CanisterHeapDeltaRateLimited {
canister_id: canister.canister_id(),
value: canister.scheduler_state.heap_delta_debit,
limit: self.config.heap_delta_rate_limit,
});
return (
Err(CanisterManagerError::CanisterHeapDeltaRateLimited {
canister_id: canister.canister_id(),
value: canister.scheduler_state.heap_delta_debit,
limit: self.config.heap_delta_rate_limit,
}),
NumInstructions::new(0),
);
}

let new_snapshot_size = canister.snapshot_size_bytes();
Expand Down Expand Up @@ -1859,14 +1879,17 @@ impl CanisterManager {
);

if canister.system_state.balance() < threshold + reservation_cycles {
return Err(CanisterManagerError::InsufficientCyclesInMemoryGrow {
bytes: new_snapshot_size,
available: canister.system_state.balance(),
threshold,
});
return (
Err(CanisterManagerError::InsufficientCyclesInMemoryGrow {
bytes: new_snapshot_size,
available: canister.system_state.balance(),
threshold,
}),
NumInstructions::new(0),
);
}
// Verify that the subnet has enough memory.
round_limits
if let Err(err) = round_limits
.subnet_available_memory
.check_available_memory(new_snapshot_size, NumBytes::from(0), NumBytes::from(0))
.map_err(
Expand All @@ -1879,9 +1902,12 @@ impl CanisterManager {
.max(0) as u64,
),
},
)?;
)
{
return (Err(err), NumInstructions::new(0));
};
// Reserve needed cycles if the subnet is becoming saturated.
canister
if let Err(err) = canister
.system_state
.reserve_cycles(reservation_cycles)
.map_err(|err| match err {
Expand All @@ -1900,17 +1926,64 @@ impl CanisterManager {
limit,
}
}
})?;
})
{
return (Err(err), NumInstructions::new(0));
};
// Actually deduct memory from the subnet. It's safe to unwrap
// here because we already checked the available memory above.
round_limits.subnet_available_memory
.try_decrement(new_snapshot_size, NumBytes::from(0), NumBytes::from(0))
.expect("Error: Cannot fail to decrement SubnetAvailableMemory after checking for availability");
}

let current_memory_usage = canister.memory_usage() + new_snapshot_size;
let message_memory = canister.message_memory_usage();
let compute_allocation = canister.compute_allocation();
let reveal_top_up = canister.controllers().contains(&sender);
let instructions = self.config.canister_snapshot_baseline_instructions
+ NumInstructions::new(new_snapshot_size.get());

// Charge for the take snapshot of the canister.
let prepaid_cycles = match self
.cycles_account_manager
.prepay_execution_cycles(
&mut canister.system_state,
current_memory_usage,
message_memory,
compute_allocation,
instructions,
subnet_size,
reveal_top_up,
)
.map_err(CanisterManagerError::CanisterSnapshotNotEnoughCycles)
{
Ok(c) => c,
Err(err) => return (Err(err), NumInstructions::new(0)),
};

// To keep the invariant that `prepay_execution_cycles` is always paired
// with `refund_unused_execution_cycles` we refund zero immediately.
self.cycles_account_manager.refund_unused_execution_cycles(
&mut canister.system_state,
NumInstructions::from(0),
instructions,
prepaid_cycles,
// This counter is incremented if we refund more
// instructions than initially charged, which is impossible
// here.
&IntCounter::new("no_op", "no_op").unwrap(),
subnet_size,
&self.log,
);

// Create new snapshot.
let new_snapshot = CanisterSnapshot::from_canister(canister, state.time())
.map_err(CanisterManagerError::from)?;
let new_snapshot = match CanisterSnapshot::from_canister(canister, state.time())
.map_err(CanisterManagerError::from)
{
Ok(s) => s,
Err(err) => return (Err(err), instructions),
};

// Delete old snapshot identified by `replace_snapshot` ID.
if let Some(replace_snapshot) = replace_snapshot {
Expand Down Expand Up @@ -1943,15 +2016,19 @@ impl CanisterManager {
.canister_snapshots
.push(snapshot_id, Arc::new(new_snapshot));
canister.system_state.snapshots_memory_usage += new_snapshot_size;
Ok(CanisterSnapshotResponse::new(
&snapshot_id,
state.time().as_nanos_since_unix_epoch(),
new_snapshot_size,
))
(
Ok(CanisterSnapshotResponse::new(
&snapshot_id,
state.time().as_nanos_since_unix_epoch(),
new_snapshot_size,
)),
instructions,
)
}

pub(crate) fn load_canister_snapshot(
&self,
subnet_size: usize,
sender: PrincipalId,
canister: &CanisterState,
snapshot_id: SnapshotId,
Expand Down Expand Up @@ -2080,6 +2157,46 @@ impl CanisterManager {
);
}

let compute_allocation = new_canister.compute_allocation();
let message_memory = canister.message_memory_usage();
let reveal_top_up = canister.controllers().contains(&sender);
let instructions = self.config.canister_snapshot_baseline_instructions
+ instructions_used
+ NumInstructions::new(snapshot.size().get());

// Charge for loading the snapshot of the canister.
let prepaid_cycles = match self.cycles_account_manager.prepay_execution_cycles(

This comment has been minimized.

Copy link
@ilbertt

ilbertt Aug 18, 2024

Maybe I don't have the full context of what's happening in this function.

It looks a bit weird to me that we are executing all the code to load the canister snapshot into the canister state, including creating the new canister from the execution state obtained from the snapshot, and only then paying/checking if the canister has enough cycles.

This means, correct me if I'm wrong, that we are spending the compute power we account for even if the canister doesn't have enough cycles for it.

@AlexandraZapuc @berestovskyy am I missing something here?

This comment has been minimized.

Copy link
@AleDema

AleDema Aug 18, 2024

I think it comes down to the fact those instructions are needed to calculate the amount of cycles to charge. A potential mitigation could be to check whether the caller has at least the amount of cycles to pay for canister_snapshot_baseline_instructions before running the highlighted block of code.

This comment has been minimized.

Copy link
@berestovskyy

berestovskyy Aug 18, 2024

Author Contributor

Yes, we plan a refactor this, but it's not critical for this release.

&mut new_canister.system_state,
new_memory_usage,
message_memory,
compute_allocation,
instructions,
subnet_size,
reveal_top_up,
) {
Ok(prepaid_cycles) => prepaid_cycles,
Err(err) => {
return (
instructions_used,
Err(CanisterManagerError::CanisterSnapshotNotEnoughCycles(err)),
)
}
};

// To keep the invariant that `prepay_execution_cycles` is always paired
// with `refund_unused_execution_cycles` we refund zero immediately.
self.cycles_account_manager.refund_unused_execution_cycles(
&mut new_canister.system_state,
NumInstructions::from(0),
instructions,
prepaid_cycles,
// This counter is incremented if we refund more
// instructions than initially charged, which is impossible
// here.
&IntCounter::new("no_op", "no_op").unwrap(),
subnet_size,
&self.log,
);
// Increment canister version.
new_canister.system_state.canister_version += 1;
new_canister.system_state.add_canister_change(
Expand Down Expand Up @@ -2286,6 +2403,7 @@ pub(crate) enum CanisterManagerError {
canister_id: CanisterId,
limit: usize,
},
CanisterSnapshotNotEnoughCycles(CanisterOutOfCyclesError),
LongExecutionAlreadyInProgress {
canister_id: CanisterId,
},
Expand Down Expand Up @@ -2334,6 +2452,7 @@ impl AsErrorHelp for CanisterManagerError {
| CanisterManagerError::CanisterSnapshotInvalidOwnership { .. }
| CanisterManagerError::CanisterSnapshotExecutionStateNotFound { .. }
| CanisterManagerError::CanisterSnapshotLimitExceeded { .. }
| CanisterManagerError::CanisterSnapshotNotEnoughCycles { .. }
| CanisterManagerError::LongExecutionAlreadyInProgress { .. }
| CanisterManagerError::MissingUpgradeOptionError { .. }
| CanisterManagerError::InvalidUpgradeOptionError { .. } => ErrorHelp::UserError {
Expand Down Expand Up @@ -2633,6 +2752,12 @@ impl From<CanisterManagerError> for UserError {
)
)
}
CanisterSnapshotNotEnoughCycles(err) => {
Self::new(
ErrorCode::CanisterOutOfCycles,
format!("Canister snapshotting failed with `{}`{additional_help}", err),
)
}
LongExecutionAlreadyInProgress { canister_id } => {
Self::new(
ErrorCode::CanisterRejectedMessage,
Expand Down
1 change: 1 addition & 0 deletions rs/execution_environment/src/canister_manager/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ fn canister_manager_config(
NumBytes::from(10 * 1024 * 1024),
SchedulerConfig::application_subnet().upload_wasm_chunk_instructions,
ic_config::embedders::Config::default().wasm_max_size,
SchedulerConfig::application_subnet().canister_snapshot_baseline_instructions,
)
}

Expand Down
Loading

0 comments on commit 85f58e9

Please sign in to comment.