Skip to content

Commit

Permalink
Remove timer-based pruning
Browse files Browse the repository at this point in the history
This removes the timer based pruning that is no longer needed due to #2489.
The prune event aggregations will be removed once II reports their value
to be 0 (i.e. in a subsequent release after the next one).
  • Loading branch information
frederikrothenberger committed Jun 5, 2024
1 parent f7875b8 commit de36873
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 173 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/canister-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,22 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-didc
- name: 'Get latest release'
uses: actions/github-script@v7
id: latest-release-tag
with:
result-encoding: string
script: return (await github.rest.repos.getLatestRelease({owner:"dfinity", repo:"internet-identity"})).data.tag_name;
- name: "Check canister interface compatibility"
run: |
release="release-2024-05-13"
# undo the breaking changes that we _explicitly_ made
# remove after the next release
# if we accidentally introduced other breaking changes, the patch would no longer apply / fix them
# making this job fail.
if [ "${{ steps.latest-release-tag.outputs.result }}" == "$release" ]; then
echo "Rolling back intentionally made breaking changes $release"
git apply allowed_breaking_change.patch
fi
curl -sSL https://github.com/dfinity/internet-identity/releases/latest/download/internet_identity.did -o internet_identity_previous.did
didc check src/internet_identity/internet_identity.did internet_identity_previous.did
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions allowed_breaking_change.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
diff --git a/src/internet_identity/internet_identity.did b/src/internet_identity/internet_identity.did
index 0164427a..f8aa40a1 100644
--- a/src/internet_identity/internet_identity.did
+++ b/src/internet_identity/internet_identity.did
@@ -542,6 +542,11 @@ service : (opt InternetIdentityInit) -> {
fetch_entries: () -> (vec BufferedArchiveEntry);
acknowledge_entries: (sequence_number: nat64) -> ();

+ // Calls used for event stats housekeeping.
+ // Only callable by the canister itself.
+ prune_events_if_necessary: () -> ();
+ inject_prune_event: (timestamp: Timestamp) -> ();
+
// V2 API
// WARNING: The following methods are experimental and may change in the future.

1 change: 0 additions & 1 deletion src/internet_identity/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ captcha = { git = "https://github.com/dfinity/captcha", rev = "9c0d2dd9bf519e255
# All IC deps
candid.workspace = true
ic-cdk.workspace = true
ic-cdk-timers.workspace = true
ic-cdk-macros.workspace = true
ic-certification.workspace = true
ic-metrics-encoder.workspace = true
Expand Down
5 changes: 0 additions & 5 deletions src/internet_identity/internet_identity.did
Original file line number Diff line number Diff line change
Expand Up @@ -542,11 +542,6 @@ service : (opt InternetIdentityInit) -> {
fetch_entries: () -> (vec BufferedArchiveEntry);
acknowledge_entries: (sequence_number: nat64) -> ();

// Calls used for event stats housekeeping.
// Only callable by the canister itself.
prune_events_if_necessary: () -> ();
inject_prune_event: (timestamp: Timestamp) -> ();

// V2 API
// WARNING: The following methods are experimental and may change in the future.

Expand Down
27 changes: 0 additions & 27 deletions src/internet_identity/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use canister_sig_util::signature_map::LABEL_SIG;
use ic_cdk::api::{caller, set_certified_data, trap};
use ic_cdk::call;
use ic_cdk_macros::{init, post_upgrade, pre_upgrade, query, update};
use ic_cdk_timers::{set_timer, set_timer_interval};
use internet_identity_interface::archive::types::BufferedEntry;
use internet_identity_interface::http_gateway::{HttpRequest, HttpResponse};
use internet_identity_interface::internet_identity::types::vc_mvp::{
Expand All @@ -23,7 +22,6 @@ use internet_identity_interface::internet_identity::types::vc_mvp::{
use internet_identity_interface::internet_identity::types::*;
use serde_bytes::ByteBuf;
use std::collections::HashMap;
use std::time::Duration;
use storage::{Salt, Storage};

mod anchor_management;
Expand Down Expand Up @@ -374,22 +372,6 @@ fn acknowledge_entries(sequence_number: u64) {
archive::acknowledge_entries(sequence_number)
}

/// Check for backlogged event data and prune if necessary.
/// Only callable by this canister.
#[update]
#[candid_method]
async fn prune_events_if_necessary() {
stats::event_stats::prune_events_if_necessary().await
}

/// Inject pruning event at the given timestamp.
/// Only callable by this canister.
#[update]
#[candid_method]
fn inject_prune_event(timestamp: Timestamp) {
stats::event_stats::inject_prune_event(timestamp);
}

#[init]
#[candid_method(init)]
fn init(maybe_arg: Option<InternetIdentityInit>) {
Expand All @@ -410,15 +392,6 @@ fn initialize(maybe_arg: Option<InternetIdentityInit>) {
init_assets();
apply_install_arg(maybe_arg);
update_root_hash();

// Immediately prune events if necessary and also set a timer to do so every 5 minutes.
// We need to use a timer here, because we cannot call canisters directly in init.
set_timer(Duration::from_nanos(0), || {
ic_cdk::spawn(prune_events_if_necessary());
});
set_timer_interval(Duration::from_nanos(5 * MINUTE_NS), || {
ic_cdk::spawn(prune_events_if_necessary());
});
}

fn apply_install_arg(maybe_arg: Option<InternetIdentityInit>) {
Expand Down
94 changes: 5 additions & 89 deletions src/internet_identity/src/stats/event_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,18 @@
//! ```

use crate::ii_domain::IIDomain;
use crate::state::{storage_borrow, storage_borrow_mut};
use crate::state::storage_borrow_mut;
use crate::stats::event_stats::event_aggregations::AGGREGATIONS;
use crate::stats::event_stats::Event::PruneEvent;
use crate::storage::Storage;
use crate::{state, DAY_NS, MINUTE_NS};
use crate::{state, DAY_NS};
use candid::CandidType;
use ic_cdk::api::call::CallResult;
use ic_cdk::api::time;
use ic_cdk::{caller, id, trap};
use ic_cdk_timers::set_timer;
use ic_stable_structures::storable::Bound;
use ic_stable_structures::{Memory, StableBTreeMap, Storable};
use internet_identity_interface::internet_identity::types::{FrontendHostname, Timestamp};
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::collections::HashMap;
use std::time::Duration;

/// This module defines the aggregations over the events.
mod event_aggregations;
Expand Down Expand Up @@ -133,12 +128,9 @@ pub struct EventData {
#[derive(Deserialize, Serialize, Clone, Eq, PartialEq, Debug)]
pub enum Event {
PrepareDelegation(PrepareDelegationEvent),
/// This event is injected when the backlog of events to be pruned gets too large to be handled
/// in an amortized fashion. This should not happen during regular operations, but might happen
/// as a second order effect to another incident causing II to stop processing `prepare_delegation`
/// calls.
/// By making it an event, we have control over the time window that should be pruned, and we
/// can have aggregations over it to know how often such an event was injected.
/// No longer used, but kept for backwards compatibility.
/// Used to be injected by a timer to artificially trigger regular pruning. No longer needed
/// due to the explicit [MAX_EVENTS_TO_PROCESS] limit.
PruneEvent,
}

Expand Down Expand Up @@ -208,82 +200,6 @@ pub fn update_event_based_stats(event: EventData) {
})
}

/// Checks if the event data has been pruned recently. If not, injects an artificial event to trigger
/// the pruning.
/// If the pruning fails, the timestamp delta is shrunk exponentially until the pruning succeeds.
///
/// This is useful to recover from a situation where II has not pruned events for a long time and
/// would need to prune a large backlog of events immediately. Such a situation should not happen
/// in regular operation, but might happen as a second order effect to another incident causing II
/// to stop processing `prepare_delegation` calls.
pub async fn prune_events_if_necessary() {
if caller() != id() {
// ignore if this is not a self-call
return;
}
let last = storage_borrow(|s| s.event_data.last_key_value());
let Some((key, _)) = last else {
return;
};

let now = time();
if now - key.time <= 5 * MINUTE_NS {
// the last event is recent, no need to prune
return;
}

// the last event is older than 5 minutes, which indicates that the event data is backlogged
// and needs to be pruned
let mut delta = now - key.time;

// The loop is explicitly bounded since it contains a self call.
// If the loop logic is accidentally changed in the future to not terminate, putting an explicit
// limit ensures termination
for _ in 0..64 {
let injected_pruning_time = key.time + delta;

// Use a self-call, so that we can retry in case of failure
let result: CallResult<()> =
ic_cdk::call(id(), "inject_prune_event", (injected_pruning_time,)).await;
match result {
Ok(()) => break, // success, we managed to prune once
Err(err) => {
// failure, try again with a smaller delta
delta /= 2;
if delta == 0 {
trap("Prune event time delta reduced to 0, giving up!");
}
ic_cdk::println!(
"Failed to inject prune event: {:?}, retrying with smaller delta {}",
err,
delta
);
}
}
}

if delta < now - key.time {
// we needed to back off at least once, so immediately try again to prune the rest
// use a timer instead of a self-call to avoid creating a call context
set_timer(Duration::from_nanos(0), || {
ic_cdk::spawn(crate::prune_events_if_necessary());
});
}
}

/// Retrieve the aggregation data for the given window and II domain.
/// The data is returned as a map from a human-readable label to the aggregation weight.
pub fn inject_prune_event(timestamp: Timestamp) {
if caller() != id() {
// ignore if this is not a self-call
return;
}
ic_cdk::println!("Injecting prune event at timestamp {}", timestamp);
storage_borrow_mut(|s| {
update_events_internal(EventData { event: PruneEvent }, timestamp, s);
})
}

/// Internal helper useful for testing.
fn update_events_internal<M: Memory>(event: EventData, now: Timestamp, s: &mut Storage<M>) {
let current_key = EventKey::new(now);
Expand Down
57 changes: 9 additions & 48 deletions src/internet_identity/tests/integration/aggregation_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use std::time::Duration;
const SESSION_LENGTH: u64 = 900; // 900 seconds
const PD_COUNT: &str = "prepare_delegation_count";
const PD_SESS_SEC: &str = "prepare_delegation_session_seconds";
const PRUNE_EVENT_COUNT: &str = "prune_event_count";

#[test]
fn should_report_expected_aggregations() -> Result<(), CallError> {
Expand Down Expand Up @@ -60,6 +59,7 @@ fn should_not_report_empty_aggregations() -> Result<(), CallError> {
let env = env();
let canister_id = install_ii_canister(&env, II_WASM.clone());
let ii_origin = "ic0.app";
let ii_origin2 = "internetcomputer.org";
let identity_nr = create_identity(&env, canister_id, ii_origin);

delegation_for_origin(&env, canister_id, identity_nr, "https://some-dapp.com")?;
Expand All @@ -78,14 +78,18 @@ fn should_not_report_empty_aggregations() -> Result<(), CallError> {
assert_eq!(keys, expected_keys);

env.advance_time(Duration::from_secs(60 * 60 * 24 * 30)); // 30 days
env.tick(); // execute timers

let identity_nr2 = create_identity(&env, canister_id, ii_origin2);
delegation_for_origin(&env, canister_id, identity_nr2, "https://some-dapp.com")?;

let aggregations = api::stats(&env, canister_id)?.event_aggregations;

// set of keys is now entirely different due to automatic pruning
// set of keys is now entirely different, because the other origin has been pruned after 30 days
let mut expected_keys = vec![
aggregation_key(PRUNE_EVENT_COUNT, "24h", "other"),
aggregation_key(PRUNE_EVENT_COUNT, "30d", "other"),
aggregation_key(PD_COUNT, "24h", ii_origin2),
aggregation_key(PD_COUNT, "30d", ii_origin2),
aggregation_key(PD_SESS_SEC, "24h", ii_origin2),
aggregation_key(PD_SESS_SEC, "30d", ii_origin2),
];
let mut keys = aggregations.into_keys().collect::<Vec<_>>();
// sort for stable comparison
Expand Down Expand Up @@ -260,49 +264,6 @@ fn should_keep_aggregations_across_upgrades() -> Result<(), CallError> {
Ok(())
}

#[test]
fn should_prune_automatically_after_upgrade() -> Result<(), CallError> {
const II_ORIGIN: &str = "ic0.app";
let env = env();
let canister_id = install_ii_canister(&env, II_WASM.clone());
let identity_nr = create_identity(&env, canister_id, II_ORIGIN);

delegation_for_origin(&env, canister_id, identity_nr, "https://some-dapp.com")?;

let aggregations = api::stats(&env, canister_id)?.event_aggregations;
let mut expected_keys = vec![
aggregation_key(PD_COUNT, "24h", II_ORIGIN),
aggregation_key(PD_COUNT, "30d", II_ORIGIN),
aggregation_key(PD_SESS_SEC, "24h", II_ORIGIN),
aggregation_key(PD_SESS_SEC, "30d", II_ORIGIN),
];
let mut keys = aggregations.into_keys().collect::<Vec<_>>();
// sort for stable comparison
expected_keys.sort();
keys.sort();
assert_eq!(keys, expected_keys);

// upgrade then advance time to trigger pruning
upgrade_ii_canister(&env, canister_id, II_WASM.clone());
env.advance_time(Duration::from_secs(60 * 60 * 24 * 30)); // 30 days
env.tick(); // execute timers

let aggregations = api::stats(&env, canister_id)?.event_aggregations;

// set of keys only reflects pruning now
let mut expected_keys = vec![
aggregation_key(PRUNE_EVENT_COUNT, "24h", "other"),
aggregation_key(PRUNE_EVENT_COUNT, "30d", "other"),
];
let mut keys = aggregations.into_keys().collect::<Vec<_>>();
// sort for stable comparison
expected_keys.sort();
keys.sort();
assert_eq!(keys, expected_keys);

Ok(())
}

#[test]
fn crash_mem_backup() -> Result<(), CallError> {
let env = env();
Expand Down
13 changes: 11 additions & 2 deletions src/internet_identity/tests/integration/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,16 @@ fn should_list_aggregated_session_seconds_and_event_data_counters() -> Result<()

// advance time one day to see it reflected on the daily stats
env.advance_time(Duration::from_secs(60 * 60 * 24));
env.tick(); // trigger automatic pruning
// call prepare delegation again to trigger stats update
api::prepare_delegation(
&env,
canister_id,
authn_method_internetcomputer.principal(),
user_number_1,
"https://some-dapp-4.com",
&pub_session_key,
None,
)?;

let metrics = get_metrics(&env, canister_id);
// make sure aggregations that have no data anymore get removed from stats endpoint
Expand Down Expand Up @@ -716,7 +725,7 @@ fn should_list_aggregated_session_seconds_and_event_data_counters() -> Result<()
assert_metric(
&get_metrics(&env, canister_id),
"internet_identity_event_aggregations_count",
8f64,
10f64,
);
Ok(())
}
Expand Down

0 comments on commit de36873

Please sign in to comment.