Skip to content

Commit

Permalink
Merge pull request borgbackup#8429 from ThomasWaldmann/improve-matching
Browse files Browse the repository at this point in the history
AND-matching also on host, user, tags
  • Loading branch information
ThomasWaldmann authored Sep 28, 2024
2 parents 2a01d29 + f082df7 commit 156d33e
Show file tree
Hide file tree
Showing 11 changed files with 102 additions and 45 deletions.
6 changes: 5 additions & 1 deletion src/borg/archiver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,14 @@ def parse_args(self, args=None):
replace_placeholders.override("now", DatetimeWrapper(args.timestamp))
replace_placeholders.override("utcnow", DatetimeWrapper(args.timestamp.astimezone(timezone.utc)))
args.location = args.location.with_timestamp(args.timestamp)
for name in "name", "other_name", "newname", "match_archives", "comment":
for name in "name", "other_name", "newname", "comment":
value = getattr(args, name, None)
if value is not None:
setattr(args, name, replace_placeholders(value))
for name in ("match_archives",): # lists
value = getattr(args, name, None)
if value:
setattr(args, name, [replace_placeholders(elem) for elem in value])

return args

Expand Down
6 changes: 3 additions & 3 deletions src/borg/archiver/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def with_archive(method):
def wrapper(self, args, repository, manifest, **kwargs):
archive_name = getattr(args, "name", None)
assert archive_name is not None
archive_info = manifest.archives.get_one(archive_name)
archive_info = manifest.archives.get_one([archive_name])
archive = Archive(
manifest,
archive_info.id,
Expand Down Expand Up @@ -379,8 +379,8 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True, ol
"--match-archives",
metavar="PATTERN",
dest="match_archives",
action=Highlander,
help='only consider archive names matching the pattern. see "borg help match-archives".',
action="append",
help='only consider archives matching all patterns. see "borg help match-archives".',
)

if sort_by:
Expand Down
4 changes: 2 additions & 2 deletions src/borg/archiver/debug_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def do_debug_info(self, args):
def do_debug_dump_archive_items(self, args, repository, manifest):
"""dump (decrypted, decompressed) archive items metadata (not: data)"""
repo_objs = manifest.repo_objs
archive_info = manifest.archives.get_one(args.name)
archive_info = manifest.archives.get_one([args.name])
archive = Archive(manifest, archive_info.id)
for i, item_id in enumerate(archive.metadata.items):
_, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM)
Expand All @@ -45,7 +45,7 @@ def do_debug_dump_archive_items(self, args, repository, manifest):
@with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
def do_debug_dump_archive(self, args, repository, manifest):
"""dump decoded archive metadata (not: data)"""
archive_info = manifest.archives.get_one(args.name)
archive_info = manifest.archives.get_one([args.name])
repo_objs = manifest.repo_objs
try:
archive_meta_orig = manifest.archives.get_by_id(archive_info.id, raw=True)
Expand Down
4 changes: 2 additions & 2 deletions src/borg/archiver/delete_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ def do_delete(self, args, repository):
dry_run = args.dry_run
manifest = Manifest.load(repository, (Manifest.Operation.DELETE,))
if args.name:
archive_infos = [manifest.archives.get_one(args.name)]
archive_infos = [manifest.archives.get_one([args.name])]
else:
archive_infos = manifest.archives.list_considering(args)
count = len(archive_infos)
if count == 0:
return
if not args.name and args.match_archives is None and args.first == 0 and args.last == 0:
if not args.name and not args.match_archives and args.first == 0 and args.last == 0:
raise CommandError(
"Aborting: if you really want to delete all archives, please use -a 'sh:*' "
"or just delete the whole repository (might be much faster)."
Expand Down
4 changes: 2 additions & 2 deletions src/borg/archiver/diff_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def do_diff(self, args, repository, manifest):
else:
format = os.environ.get("BORG_DIFF_FORMAT", "{change} {path}{NL}")

archive1_info = manifest.archives.get_one(args.name)
archive2_info = manifest.archives.get_one(args.other_name)
archive1_info = manifest.archives.get_one([args.name])
archive2_info = manifest.archives.get_one([args.other_name])
archive1 = Archive(manifest, archive1_info.id)
archive2 = Archive(manifest, archive2_info.id)

Expand Down
33 changes: 26 additions & 7 deletions src/borg/archiver/help_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,19 @@ class HelpMixIn:
)
helptext["match-archives"] = textwrap.dedent(
"""
The ``--match-archives`` option matches a given pattern against the list of all archive
names in the repository.
The ``--match-archives`` option matches a given pattern against the list of all archives
in the repository. It can be given multiple times.
It uses pattern styles similar to the ones described by ``borg help patterns``:
The patterns can have a prefix of:
- name: pattern match on the archive name (default)
- aid: prefix match on the archive id (only one result allowed)
- user: exact match on the username who created the archive
- host: exact match on the hostname where the archive was created
- tags: match on the archive tags
In case of a name pattern match,
it uses pattern styles similar to the ones described by ``borg help patterns``:
Identical match pattern, selector ``id:`` (default)
Simple string match, must fully match exactly as given.
Expand All @@ -281,16 +290,26 @@ class HelpMixIn:
Examples::
# id: style
# name match, id: style
borg delete --match-archives 'id:archive-with-crap'
borg delete -a 'id:archive-with-crap' # same, using short option
borg delete -a 'archive-with-crap' # same, because 'id:' is the default
# sh: style
# name match, sh: style
borg delete -a 'sh:home-kenny-*'
# re: style
borg delete -a 're:pc[123]-home-(user1|user2)-2022-09-.*'\n\n"""
# name match, re: style
borg delete -a 're:pc[123]-home-(user1|user2)-2022-09-.*'
# archive id prefix match:
borg delete -a 'aid:d34db33f'
# host or user match
borg delete -a 'user:kenny'
borg delete -a 'host:kenny-pc'
# tags match
borg delete -a 'tags:TAG1' -a 'tags:TAG2'\n\n"""
)
helptext["placeholders"] = textwrap.dedent(
"""
Expand Down
2 changes: 1 addition & 1 deletion src/borg/archiver/info_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def do_info(self, args, repository, manifest, cache):
"""Show archive details such as disk space used"""

if args.name:
archive_infos = [manifest.archives.get_one(args.name)]
archive_infos = [manifest.archives.get_one([args.name])]
else:
archive_infos = manifest.archives.list_considering(args)

Expand Down
2 changes: 1 addition & 1 deletion src/borg/archiver/list_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def do_list(self, args, repository, manifest):
else:
format = os.environ.get("BORG_LIST_FORMAT", "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}")

archive_info = manifest.archives.get_one(args.name)
archive_info = manifest.archives.get_one([args.name])

def _list_inner(cache):
archive = Archive(manifest, archive_info.id, cache=cache)
Expand Down
2 changes: 1 addition & 1 deletion src/borg/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def _build_files_cache(self):
from .archive import Archive

# get the latest archive with the IDENTICAL name, supporting archive series:
archives = self.manifest.archives.list(match=self.archive_name, sort_by=["ts"], last=1)
archives = self.manifest.archives.list(match=[self.archive_name], sort_by=["ts"], last=1)
if not archives:
# nothing found
return
Expand Down
82 changes: 58 additions & 24 deletions src/borg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class NoManifestError(Error):
exit_mcode = 26


ArchiveInfo = namedtuple("ArchiveInfo", "name id ts")
ArchiveInfo = namedtuple("ArchiveInfo", "name id ts tags host user", defaults=[(), None, None])

# timestamp is a replacement for ts, archive is an alias for name (see SortBySpec)
AI_HUMAN_SORT_KEYS = ["timestamp", "archive"] + list(ArchiveInfo._fields)
Expand Down Expand Up @@ -129,6 +129,9 @@ def _get_archive_meta(self, id: bytes) -> dict:
time="1970-01-01T00:00:00.000000",
# new:
exists=False, # we have the pointer, but the repo does not have an archive item
username="",
hostname="",
tags=(),
)
else:
_, data = self.manifest.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)
Expand All @@ -149,6 +152,7 @@ def _get_archive_meta(self, id: bytes) -> dict:
size=archive_item.size,
nfiles=archive_item.nfiles,
comment=archive_item.comment, # not always present?
tags=tuple(sorted(getattr(archive_item, "tags", []))), # must be hashable
)
return metadata

Expand All @@ -159,7 +163,41 @@ def _infos(self):

def _info_tuples(self):
for info in self._infos():
yield ArchiveInfo(name=info["name"], id=info["id"], ts=parse_timestamp(info["time"]))
yield ArchiveInfo(
name=info["name"],
id=info["id"],
ts=parse_timestamp(info["time"]),
tags=info["tags"],
user=info["username"],
host=info["hostname"],
)

def _matching_info_tuples(self, match_patterns, match_end):
archive_infos = list(self._info_tuples())
if match_patterns:
assert isinstance(match_patterns, list), f"match_pattern is a {type(match_patterns)}"
for match in match_patterns:
if match.startswith("aid:"): # do a match on the archive ID (prefix)
wanted_id = match.removeprefix("aid:")
archive_infos = [x for x in archive_infos if bin_to_hex(x.id).startswith(wanted_id)]
if len(archive_infos) != 1:
raise CommandError("archive ID based match needs to match precisely one archive ID")
elif match.startswith("tags:"):
wanted_tags = match.removeprefix("tags:")
wanted_tags = [tag for tag in wanted_tags.split(",") if tag] # remove empty tags
archive_infos = [x for x in archive_infos if set(x.tags) >= set(wanted_tags)]
elif match.startswith("user:"):
wanted_user = match.removeprefix("user:")
archive_infos = [x for x in archive_infos if x.user == wanted_user]
elif match.startswith("host:"):
wanted_host = match.removeprefix("host:")
archive_infos = [x for x in archive_infos if x.host == wanted_host]
else: # do a match on the name
match = match.removeprefix("name:") # accept optional name: prefix
regex = get_regex_from_pattern(match)
regex = re.compile(regex + match_end)
archive_infos = [x for x in archive_infos if regex.match(x.name) is not None]
return archive_infos

def count(self):
# return the count of archives in the repo
Expand Down Expand Up @@ -211,7 +249,14 @@ def _lookup_name(self, name, raw=False):
if archive_info["exists"] and archive_info["name"] == name:
if not raw:
ts = parse_timestamp(archive_info["time"])
return ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
return ArchiveInfo(
name=archive_info["name"],
id=archive_info["id"],
ts=ts,
tags=archive_info["tags"],
user=archive_info["username"],
host=archive_info["hostname"],
)
else:
return archive_info
else:
Expand Down Expand Up @@ -243,7 +288,14 @@ def get_by_id(self, id, raw=False):
if archive_info["exists"]:
if not raw:
ts = parse_timestamp(archive_info["time"])
archive_info = ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
archive_info = ArchiveInfo(
name=archive_info["name"],
id=archive_info["id"],
ts=ts,
tags=archive_info["tags"],
user=archive_info["username"],
host=archive_info["hostname"],
)
return archive_info
else:
for name, values in self._archives.items():
Expand Down Expand Up @@ -311,18 +363,7 @@ def list(
if isinstance(sort_by, (str, bytes)):
raise TypeError("sort_by must be a sequence of str")

archive_infos = self._info_tuples()
if match is None:
archive_infos = list(archive_infos)
elif match.startswith("aid:"): # do a match on the archive ID (prefix)
wanted_id = match.removeprefix("aid:")
archive_infos = [x for x in archive_infos if bin_to_hex(x.id).startswith(wanted_id)]
if len(archive_infos) != 1:
raise CommandError("archive ID based match needs to match precisely one archive ID")
else: # do a match on the name
regex = get_regex_from_pattern(match)
regex = re.compile(regex + match_end)
archive_infos = [x for x in archive_infos if regex.match(x.name) is not None]
archive_infos = self._matching_info_tuples(match, match_end)

if any([oldest, newest, older, newer]):
archive_infos = filter_archives_by_date(
Expand Down Expand Up @@ -361,14 +402,7 @@ def list_considering(self, args):
def get_one(self, match, *, match_end=r"\Z"):
"""get exactly one archive matching <match>"""
assert match is not None
archive_infos = self._info_tuples()
if match.startswith("aid:"): # do a match on the archive ID (prefix)
wanted_id = match.removeprefix("aid:")
archive_infos = [i for i in archive_infos if bin_to_hex(i.id).startswith(wanted_id)]
else: # do a match on the name
regex = get_regex_from_pattern(match)
regex = re.compile(regex + match_end)
archive_infos = [i for i in archive_infos if regex.match(i.name) is not None]
archive_infos = self._matching_info_tuples(match, match_end)
if len(archive_infos) != 1:
raise CommandError(f"{match} needed to match precisely one archive, but matched {len(archive_infos)}.")
return archive_infos[0]
Expand Down
2 changes: 1 addition & 1 deletion src/borg/testsuite/archiver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def open_archive(repo_path, name):
repository = Repository(repo_path, exclusive=True)
with repository:
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive_info = manifest.archives.get_one(name)
archive_info = manifest.archives.get_one([name])
archive = Archive(manifest, archive_info.id)
return archive, repository

Expand Down

0 comments on commit 156d33e

Please sign in to comment.