Skip to content

Commit

Permalink
add env var for configuring event log pruning
Browse files Browse the repository at this point in the history
  • Loading branch information
ezekg committed Oct 18, 2023
1 parent f1f4fdf commit f0cb353
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 7 deletions.
26 changes: 22 additions & 4 deletions app/workers/prune_event_logs_worker.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,25 @@
class PruneEventLogsWorker < BaseWorker
BACKLOG_DAYS = ENV.fetch('KEYGEN_PRUNE_EVENT_BACKLOG_DAYS') { 90 }.to_i
BATCH_SIZE = ENV.fetch('KEYGEN_PRUNE_BATCH_SIZE') { 1_000 }.to_i
BATCH_WAIT = ENV.fetch('KEYGEN_PRUNE_BATCH_WAIT') { 1 }.to_f
# Number of days to keep event logs in backlog.
BACKLOG_DAYS = ENV.fetch('KEYGEN_PRUNE_EVENT_BACKLOG_DAYS') { 90 }.to_i

# Number of days from backlog to target for pruning. The lower the
# number, the better the performance. Use a higher number for e.g.
# catching up going from a backlog of 90 days to 30. For normal
# non-catch up workloads, this should be set to 1.
TARGET_DAYS = ENV.fetch('KEYGEN_PRUNE_EVENT_TARGET_DAYS') { 1 }.to_i

# Number of event logs to delete per batch. The larger the number,
# the higher the impact on the database.
BATCH_SIZE = ENV.fetch('KEYGEN_PRUNE_BATCH_SIZE') { 1_000 }.to_i

# Number of seconds to wait in between batches.
BATCH_WAIT = ENV.fetch('KEYGEN_PRUNE_BATCH_WAIT') { 1 }.to_f

# High volume events elligible for pruning. Essentially, this
# reduces the storage burden for noisy events. For example,
# a license could be validated hundreds of times a day, or
# a machine could send thousands of heartbeat pings every
# day. This job prunes those superfluous event logs.
HIGH_VOLUME_EVENTS = %w[
license.validation.succeeded
license.validation.failed
Expand All @@ -20,7 +38,7 @@ def perform
BACKLOG_DAYS <= 0

end_date = BACKLOG_DAYS.days.ago.beginning_of_day
start_date = (end_date - 1.day).beginning_of_day
start_date = (end_date - TARGET_DAYS.day).beginning_of_day

# FIXME(ezekg) Update this to use created_date after we've backfilled old logs
accounts = Account.where(<<~SQL.squish, start_date:, end_date:)
Expand Down
6 changes: 3 additions & 3 deletions spec/workers/prune_event_logs_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@
)
end

it 'should not prune high-volume event logs before/after backlog' do
it 'should not prune high-volume event logs not in target batch' do
license = create(:license, account:)

create_list(:event_log, 50, :license_validation_succeeded, account:, resource: license, created_at: worker::BACKLOG_DAYS + 2)
create_list(:event_log, 50, :license_validation_succeeded, account:, resource: license, created_at: (worker::BACKLOG_DAYS + worker::TARGET_DAYS + 1).days.ago)
create_list(:event_log, 50, :license_validation_succeeded, account:, resource: license)
create_list(:event_log, 50, :license_validation_failed, account:, resource: license, created_at: worker::BACKLOG_DAYS)
create_list(:event_log, 50, :license_validation_failed, account:, resource: license, created_at: worker::BACKLOG_DAYS.days.ago)
create_list(:event_log, 50, :license_validation_failed, account:, resource: license)

expect { worker.perform_async }.to_not(
Expand Down

0 comments on commit f0cb353

Please sign in to comment.