Skip to content

Commit

Permalink
fix: Cap ingress induction debit for cleanup callback (#1777)
Browse files Browse the repository at this point in the history
This fixes an edge case where in a cleanup callback that has produced
some cycles balance change and when there's an accumulated ingress
induction debit (due to ingress messages inducted while the cleanup
callback was running DTS), the canister's balance might end up with an
amount that wouldn't respect the canister's freezing threshold.

Typically, for other message executions, the DTS execution would fail if
there were concurrent cycles changes that could not be applied. The
cleanup callback is a bit different and failing it is not a good option;
canisters depend on it running successfully to perform critical tasks
(such as releasing locks or reverting state changes in case something
goes wrong). Therefore, we should ensure that the cleanup callback can
still complete successfully.

The solution (which the PR implements) is to cap the ingress induction
debit by the amount removed from the cycles balance during execution.
This way we can ensure completion of the cleanup callback at the expense
of some cycles debit that is applied partially in some cases.
  • Loading branch information
dsarlis authored Oct 2, 2024
1 parent 9fe63e2 commit 60f1d55
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 1 deletion.
25 changes: 25 additions & 0 deletions rs/execution_environment/src/execution/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,31 @@ impl ResponseHelper {
.system_state
.canister_log
.append(&mut output.canister_log);

// The ingress induction debit can interfere with cycles changes that happened concurrently
// during the cleanup callback execution. If the balance of the canister is not enough to
// cover the debit + the amount of removed cycles during execution, the canister might end
// up with an incorrect balance. To avoid this, we check if the balance is enough to cover
// the debit + the removed cycles to ensure that the cycles change can be performed.
//
// This allows the cleanup callback to always succeed at the expense of some ingress
// messages being inducted for free in this edge case. This is acceptable because the cleanup
// callback is expected to always run and allow the canister to perform important cleanup tasks,
// like releasing locks or undoing other state changes.
if let Some(state_changes) = &canister_state_changes {
let ingress_induction_cycles_debit =
self.canister.system_state.ingress_induction_cycles_debit();
let removed_cycles = state_changes.system_state_changes.removed_cycles();
if self.canister.system_state.balance()
< ingress_induction_cycles_debit + removed_cycles
{
self.canister
.system_state
.remove_charge_from_ingress_induction_cycles_debit(
ingress_induction_cycles_debit - removed_cycles,
);
}
}
self.canister
.system_state
.apply_ingress_induction_cycles_debit(
Expand Down
100 changes: 99 additions & 1 deletion rs/execution_environment/src/execution/response/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1477,7 +1477,7 @@ fn dts_response_concurrent_cycles_change_fails() {
}

#[test]
fn dts_response_with_cleanup_concurrent_cycles_change_fails() {
fn dts_response_with_cleanup_concurrent_cycles_change_succeeds() {
// Test steps:
// 1. Canister A calls canister B.
// 2. Canister B replies to canister A.
Expand Down Expand Up @@ -1618,6 +1618,104 @@ fn dts_response_with_cleanup_concurrent_cycles_change_fails() {
);
}

#[test]
fn dts_response_with_cleanup_concurrent_cycles_change_is_capped() {
// Test steps:
// 1. Canister A calls canister B.
// 2. Canister B replies to canister A.
// 3. The response callback of canister A traps.
// 4. The cleanup callback of canister A runs in multiple slices.
// 5. While canister A is paused, we emulate more postponed charges.
// 6. The cleanup callback resumes and succeeds because it cannot change the
// cycles balance of the canister.
let instruction_limit = 100_000_000;
let mut test = ExecutionTestBuilder::new()
.with_instruction_limit(instruction_limit)
.with_slice_instruction_limit(1_000_000)
.with_subnet_memory_threshold(100 * 1024 * 1024)
.with_manual_execution()
.build();

let a_id = test.universal_canister().unwrap();
let b_id = test.universal_canister().unwrap();

let transferred_cycles = Cycles::new(1_000);

let b = wasm()
.accept_cycles(transferred_cycles)
.message_payload()
.append_and_reply()
.build();

let a = wasm()
.inter_update(
b_id,
call_args()
.other_side(b.clone())
.on_reply(
wasm()
// Fail the response callback to trigger the cleanup callback.
.trap(),
)
.on_cleanup(
wasm()
// Grow by enough pages to trigger a cycles reservation for the extra storage.
.stable64_grow(1_300)
.instruction_counter_is_at_least(1_000_000),
),
)
.build();

let (ingress_id, _) = test.ingress_raw(a_id, "update", a);
test.execute_message(a_id);
test.induct_messages();
test.execute_message(b_id);
test.induct_messages();

test.update_freezing_threshold(a_id, NumSeconds::from(1))
.unwrap();
test.canister_update_allocations_settings(a_id, Some(1), None)
.unwrap();

// The test setup is done by this point.

// Execute one slice of the canister. This will run the response callback in full as
// it immediately traps and will start the first slice of the cleanup callback.
test.execute_slice(a_id);

assert_eq!(
test.canister_state(a_id).next_execution(),
NextExecution::ContinueLong,
);

// Emulate a postponed charge that drives the cycles balance of the canister to zero.
let cycles_debit = test.canister_state(a_id).system_state.balance();
test.canister_state_mut(a_id)
.system_state
.add_postponed_charge_to_ingress_induction_cycles_debit(cycles_debit);

// Keep running the cleanup callback until it finishes.
test.execute_slice(a_id);
while test.canister_state(a_id).next_execution() != NextExecution::None {
test.execute_slice(a_id);
}

let err = check_ingress_status(test.ingress_status(&ingress_id)).unwrap_err();
assert_eq!(err.code(), ErrorCode::CanisterCalledTrap);

// Check that the cleanup callback did run.
assert_eq!(
test.execution_state(a_id).stable_memory.size,
NumWasmPages::from(1300)
);

// Even though the emulated ingress induction debit was set to be equal to the
// canister's balance, it's going to be capped by the amount removed from the
// balance during Wasm execution, so the canister will maintain a positive
// balance.
assert!(test.canister_state(a_id).system_state.balance() > Cycles::zero());
}

#[test]
fn cleanup_callback_cannot_accept_cycles() {
let mut test = ExecutionTestBuilder::new().build();
Expand Down
9 changes: 9 additions & 0 deletions rs/replicated_state/src/canister_state/system_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,15 @@ impl SystemState {
self.ingress_induction_cycles_debit += charge;
}

/// Removes a previously postponed charge for ingress messages from the balance
/// of the canister.
///
/// Note that this will saturate the balance to zero if the charge to remove is
/// larger than the current debit.
pub fn remove_charge_from_ingress_induction_cycles_debit(&mut self, charge: Cycles) {
self.ingress_induction_cycles_debit -= charge;
}

/// Charges the pending 'ingress_induction_cycles_debit' from the balance.
///
/// Precondition:
Expand Down

0 comments on commit 60f1d55

Please sign in to comment.