Skip to content

Commit

Permalink
Add quarterly pruning strategy. (#8337)
Browse files Browse the repository at this point in the history
Add 13weekly and 3monthly quarterly pruning strategies.
  • Loading branch information
cr1901 authored and ThomasWaldmann committed Oct 5, 2024
1 parent ea619e7 commit d54eefd
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 16 deletions.
24 changes: 24 additions & 0 deletions docs/misc/prune-example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,27 @@ example simple. They all work in basically the same way.

The weekly rule is easy to understand roughly, but hard to understand in all
details. If interested, read "ISO 8601:2000 standard week-based year".

The 13weekly and 3monthly rules are two different strategies for keeping one
every quarter of a year. There are `multiple ways` to define a quarter-year;
borg prune recognizes two:

* --keep-13weekly keeps one backup every 13 weeks using ISO 8601:2000's
definition of the week-based year. January 4th is always included in the
first week of a year, and January 1st to 3rd may be in week 52 or 53 of the
previous year. Week 53 is also in the fourth quarter of the year.
* --keep-3monthly keeps one backup every 3 months. January 1st to
March 31, April 1st to June 30th, July 1st to September 30th, and October 1st
to December 31st form the quarters.

If the subtleties of the definition of a quarter year don't matter to you, a
short summary of behavior is:

* --keep-13weekly favors keeping backups at the beginning of Jan, Apr, July,
and Oct.
* --keep-3monthly favors keeping backups at the end of Dec, Mar, Jun, and Sept.
* Both strategies will have some overlap in which backups are kept.
* The differences are negligible unless backups considered for deletion were
created weekly or more frequently.

.. _multiple ways: https://en.wikipedia.org/wiki/Calendar_year#Quarter_year
110 changes: 94 additions & 16 deletions src/borg/archiver/prune_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,78 @@ def prune_within(archives, hours, kept_because):
return result


def default_period_func(pattern):
def inner(a):
# compute in local timezone
return a.ts.astimezone().strftime(pattern)

return inner


def quarterly_13weekly_period_func(a):
(year, week, _) = a.ts.astimezone().isocalendar() # local time
if week <= 13:
# Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
# days later.
return (year, 1)
elif 14 <= week <= 26:
# Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91
# days later.
return (year, 2)
elif 27 <= week <= 39:
# Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th-
# at least 91 days later.
return (year, 3)
else:
# Everything else, Oct 3rd (leap year) or 4th onward, will always
# include week of Dec 26th (leap year) or Dec 27th, may also include
# up to possibly Jan 3rd of next year.
return (year, 4)


def quarterly_3monthly_period_func(a):
lt = a.ts.astimezone() # local time
if lt.month <= 3:
# 1-1 to 3-31
return (lt.year, 1)
elif 4 <= lt.month <= 6:
# 4-1 to 6-30
return (lt.year, 2)
elif 7 <= lt.month <= 9:
# 7-1 to 9-30
return (lt.year, 3)
else:
# 10-1 to 12-31
return (lt.year, 4)


PRUNING_PATTERNS = OrderedDict(
[
("secondly", "%Y-%m-%d %H:%M:%S"),
("minutely", "%Y-%m-%d %H:%M"),
("hourly", "%Y-%m-%d %H"),
("daily", "%Y-%m-%d"),
("weekly", "%G-%V"),
("monthly", "%Y-%m"),
("yearly", "%Y"),
("secondly", default_period_func("%Y-%m-%d %H:%M:%S")),
("minutely", default_period_func("%Y-%m-%d %H:%M")),
("hourly", default_period_func("%Y-%m-%d %H")),
("daily", default_period_func("%Y-%m-%d")),
("weekly", default_period_func("%G-%V")),
("monthly", default_period_func("%Y-%m")),
("quarterly_13weekly", quarterly_13weekly_period_func),
("quarterly_3monthly", quarterly_3monthly_period_func),
("yearly", default_period_func("%Y")),
]
)


def prune_split(archives, rule, n, kept_because=None):
last = None
keep = []
pattern = PRUNING_PATTERNS[rule]
period_func = PRUNING_PATTERNS[rule]
if kept_because is None:
kept_because = {}
if n == 0:
return keep

a = None
for a in sorted(archives, key=attrgetter("ts"), reverse=True):
# we compute the pruning in local time zone
period = a.ts.astimezone().strftime(pattern)
period = period_func(a)
if period != last:
last = period
if a.id not in kept_because:
Expand All @@ -75,12 +121,24 @@ class PruneMixIn:
def do_prune(self, args, repository, manifest):
"""Prune repository archives according to specified rules"""
if not any(
(args.secondly, args.minutely, args.hourly, args.daily, args.weekly, args.monthly, args.yearly, args.within)
(
args.secondly,
args.minutely,
args.hourly,
args.daily,
args.weekly,
args.monthly,
args.quarterly_13weekly,
args.quarterly_3monthly,
args.yearly,
args.within,
)
):
raise CommandError(
'At least one of the "keep-within", "keep-last", '
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.'
'"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
'or "keep-yearly" settings must be specified.'
)

if args.format is not None:
Expand Down Expand Up @@ -190,10 +248,15 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
starts is used for pruning purposes. Dates and times are interpreted in the local
timezone of the system where borg prune runs, and weeks go from Monday to Sunday.
Specifying a negative number of archives to keep means that there is no limit.
As of borg 1.2.0, borg will retain the oldest archive if any of the secondly,
minutely, hourly, daily, weekly, monthly, or yearly rules was not otherwise able to
meet its retention target. This enables the first chronological archive to continue
aging until it is replaced by a newer archive that meets the retention criteria.
Borg will retain the oldest archive if any of the secondly, minutely, hourly,
daily, weekly, monthly, quarterly, or yearly rules was not otherwise able to
meet its retention target. This enables the first chronological archive to
continue aging until it is replaced by a newer archive that meets the retention
criteria.
The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different
strategies for keeping archives every quarter year.
The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will
keep the last N archives under the assumption that you do not create more than one
Expand Down Expand Up @@ -293,6 +356,21 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
action=Highlander,
help="number of monthly archives to keep",
)
quarterly_group = subparser.add_mutually_exclusive_group()
quarterly_group.add_argument(
"--keep-13weekly",
dest="quarterly_13weekly",
type=int,
default=0,
help="number of quarterly archives to keep (13 week strategy)",
)
quarterly_group.add_argument(
"--keep-3monthly",
dest="quarterly_3monthly",
type=int,
default=0,
help="number of quarterly archives to keep (3 month strategy)",
)
subparser.add_argument(
"-y",
"--keep-yearly",
Expand Down
71 changes: 71 additions & 0 deletions src/borg/testsuite/archiver/prune_cmd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,77 @@ def test_prune_repository_example(archivers, request):
assert "test%02d" % i not in output


def test_prune_quarterly(archivers, request):
# Example worked through by hand when developing quarterly
# strategy, based upon existing backups where quarterly strategy
# is desired. Weekly/monthly backups that don't affect results were
# trimmed to speed up the test.
#
# Week number is shown in comment for every row in the below list.
# Year is also shown when it doesn't match the year given in the
# date tuple.
archiver = request.getfixturevalue(archivers)
test_dates = [
(2020, 12, 6),
(2021, 1, 3), # 49, 2020-53
(2021, 3, 28),
(2021, 4, 25), # 12, 16
(2021, 6, 27),
(2021, 7, 4), # 25, 26
(2021, 9, 26),
(2021, 10, 3), # 38, 39
(2021, 12, 26),
(2022, 1, 2), # 51, 2021-52
]

def mk_name(tup):
(y, m, d) = tup
suff = datetime(y, m, d).strftime("%Y-%m-%d")
return f"test-{suff}"

# The kept repos are based on working on an example by hand,
# archives made on the following dates should be kept:
EXPECTED_KEPT = {
"13weekly": [(2020, 12, 6), (2021, 1, 3), (2021, 3, 28), (2021, 7, 4), (2021, 10, 3), (2022, 1, 2)],
"3monthly": [(2020, 12, 6), (2021, 3, 28), (2021, 6, 27), (2021, 9, 26), (2021, 12, 26), (2022, 1, 2)],
}

for strat, to_keep in EXPECTED_KEPT.items():
# Initialize our repo.
cmd(archiver, "repo-create", RK_ENCRYPTION)
for a, (y, m, d) in zip(map(mk_name, test_dates), test_dates):
_create_archive_ts(archiver, a, y, m, d)

to_prune = list(set(test_dates) - set(to_keep))

# Use 99 instead of -1 to test that oldest backup is kept.
output = cmd(archiver, "prune", "--list", "--dry-run", f"--keep-{strat}=99")
for a in map(mk_name, to_prune):
assert re.search(rf"Would prune:\s+{a}", output)

oldest = r"\[oldest\]" if strat in ("13weekly") else ""
assert re.search(rf"Keeping archive \(rule: quarterly_{strat}{oldest} #\d+\):\s+test-2020-12-06", output)
for a in map(mk_name, to_keep[1:]):
assert re.search(rf"Keeping archive \(rule: quarterly_{strat} #\d+\):\s+{a}", output)

output = cmd(archiver, "repo-list")
# Nothing pruned after dry run
for a in map(mk_name, test_dates):
assert a in output

cmd(archiver, "prune", f"--keep-{strat}=99")
output = cmd(archiver, "repo-list")
# All matching backups plus oldest kept
for a in map(mk_name, to_keep):
assert a in output
# Other backups have been pruned
for a in map(mk_name, to_prune):
assert a not in output

# Delete repo and begin anew
cmd(archiver, "repo-delete")


# With an initial and daily backup, prune daily until oldest is replaced by a monthly backup
def test_prune_retain_and_expire_oldest(archivers, request):
archiver = request.getfixturevalue(archivers)
Expand Down

0 comments on commit d54eefd

Please sign in to comment.