From 10d9fef2eebfab9f7c152dba0e603d18b7cfbe6e Mon Sep 17 00:00:00 2001 From: Ralf Hubert Date: Fri, 29 Jul 2022 07:31:30 +0200 Subject: [PATCH] archive: add plugin interface Extend the archive command to clean up a non local archive using a custom plugin. Therefore the plugin manifest must contain 'archiveAccessors'. Each entry should point to a class inheriting from 'BaseArchiveAccess' providing the appropriate methods to access the custom archive. Fixes #340. --- contrib/plugins/artifactoryArchiveAccess.py | 73 +++++++++++++++++++++ pym/bob/archive_access.py | 22 +++++++ pym/bob/cmds/archive.py | 71 +++++++++++++++----- pym/bob/input.py | 10 +++ 4 files changed, 161 insertions(+), 15 deletions(-) create mode 100644 contrib/plugins/artifactoryArchiveAccess.py create mode 100644 pym/bob/archive_access.py diff --git a/contrib/plugins/artifactoryArchiveAccess.py b/contrib/plugins/artifactoryArchiveAccess.py new file mode 100644 index 000000000..1191fc5ff --- /dev/null +++ b/contrib/plugins/artifactoryArchiveAccess.py @@ -0,0 +1,73 @@ +from bob.archive_access import BaseArchiveAccess + +from artifactory import ArtifactoryPath +import os +import tempfile +import datetime +import calendar +import struct + +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +class Artifactory(BaseArchiveAccess): + def __init__(self): + self.__url = "https://artifactory/bobs_cache" + print("Using Artifactory Archive @ " + self.__url) + + def get(self, path): + out = tempfile.NamedTemporaryFile("wb", delete=False) + try: + archive = ArtifactoryPath(self.__url + path, verify=False) + with archive.open() as fd: + out.write(fd.read()) + + except Exception as e: + logging.error(traceback.format_exc()) + out.close() + return out.name + + def removeTmp(self, tmp): + # remove the tmp file + if tmp is not None and os.path.exists(tmp): + os.unlink(tmp) + + def listdir(self, path): + if path != ".": + base = self.__url + path + else: + base = self.__url + if not base.endswith("/"): + base += "/" + self.__path = ArtifactoryPath(base, verify=False) + ret = [ str(p).replace(base, "") for p in self.__path ] + return ret + + def binStat(self, path): + archive = ArtifactoryPath(self.__url + path, verify=False) + # Get FileStat + stat = archive.stat() + ctime = calendar.timegm(stat.ctime.timetuple()) + mtime = calendar.timegm(stat.mtime.timetuple()) + size = stat.size + archive = ArtifactoryPath(self.__url + path, verify=False) + return struct.pack('=qqQ64s', ctime, mtime, stat.size, bytes(stat.sha256, 'utf-8')) + + def unlink(self, path): + archive = ArtifactoryPath(self.__url + path, verify=False) + if archive.exists(): + archive.unlink() + + def getSize(self,path): + archive = ArtifactoryPath(self.__url + path, verify=False) + if archive.exists(): + return archive.stat().size + +ArtifactoryAccess = Artifactory() + +manifest = { + 'apiVersion' : "0.21", + 'archiveAccessors' : { + 'Artifactory' : ArtifactoryAccess + } +} diff --git a/pym/bob/archive_access.py b/pym/bob/archive_access.py new file mode 100644 index 000000000..035f1d6a3 --- /dev/null +++ b/pym/bob/archive_access.py @@ -0,0 +1,22 @@ +class BaseArchiveAccess: + """Base class for Archive Access handlers. + """ + def get(self, path): + """Get the package 'path' from the archive. + Return the path the a local accessable archive file.""" + return "" + def removeTmp(self, path): + """Remove the temporary file returned by 'get'""" + return None + def listdir(self, path): + """Return a list of directory entries""" + return None + def getSize(self,path): + """Return the file size (in bytes) for 'path'""" + return None + def unlink(self, path): + """Unlink 'path' from archive""" + return None + def binStat(self, path): + """Return binary stat for 'path'""" + return None diff --git a/pym/bob/cmds/archive.py b/pym/bob/cmds/archive.py index 89c1350ac..cceb28fc1 100644 --- a/pym/bob/cmds/archive.py +++ b/pym/bob/cmds/archive.py @@ -5,7 +5,9 @@ from ..audit import Audit from ..errors import BobError +from ..input import RecipeSet from ..utils import binStat, asHexStr, infixBinaryOp +from ..archive_access import BaseArchiveAccess import argparse import gzip import json @@ -20,14 +22,29 @@ # need to enable this for nested expression parsing performance pyparsing.ParserElement.enablePackrat() +class LocalAccessor(BaseArchiveAccess): + def get(self, path): + return path + def removeTmp(self, path): + return None + def listdir(self, path): + return os.listdir(path) + def getSize(self,path): + return os.stat(path).st_size + def unlink(self, path): + os.unlink(path) + def binStat(self, path): + return binStat(path) + class ArchiveScanner: CUR_VERSION = 2 - def __init__(self): + def __init__(self, accessor): self.__dirSchema = re.compile(r'[0-9a-zA-Z]{2}') self.__archiveSchema = re.compile(r'[0-9a-zA-Z]{36,}-1.tgz') self.__db = None self.__cleanup = False + self.__accessor = accessor def __enter__(self): try: @@ -80,18 +97,20 @@ def scan(self, verbose): try: found = False self.__db.execute("BEGIN") - for l1 in os.listdir("."): + for l1 in self.__accessor.listdir("."): if not self.__dirSchema.fullmatch(l1): continue - for l2 in os.listdir(l1): + for l2 in self.__accessor.listdir(l1): if not self.__dirSchema.fullmatch(l2): continue l2 = os.path.join(l1, l2) - for l3 in os.listdir(l2): + for l3 in self.__accessor.listdir(l2): m = self.__archiveSchema.fullmatch(l3) if not m: continue found = True self.__scan(os.path.join(l2, l3), verbose) except OSError as e: raise BobError("Error scanning archive: " + str(e)) + except Exception as e: + raise BobError("Error: " + str(e)) finally: self.__db.execute("END") if verbose and not found: @@ -101,8 +120,9 @@ def scan(self, verbose): return found def __scan(self, fileName, verbose): + tmpFileName = None try: - st = binStat(fileName) + st = self.__accessor.binStat(fileName) bidHex, sep, suffix = fileName.partition("-") bid = bytes.fromhex(bidHex[0:2] + bidHex[3:5] + bidHex[6:]) @@ -116,9 +136,10 @@ def __scan(self, fileName, verbose): self.__db.execute("DELETE FROM files WHERE bid=?", (bid,)) + tmpFileName = self.__accessor.get(fileName) # read audit trail if verbose: print("scan", fileName) - with tarfile.open(fileName, errorlevel=1) as tar: + with tarfile.open(tmpFileName, errorlevel=1) as tar: # validate if tar.pax_headers.get('bob-archive-vsn') != "1": print("Not a Bob archive:", fileName, "Ignored!") @@ -135,7 +156,7 @@ def __scan(self, fileName, verbose): # read audit trail auditJsonGz = tar.extractfile(f) auditJson = gzip.GzipFile(fileobj=auditJsonGz) - audit = Audit.fromByteStream(auditJson, fileName) + audit = Audit.fromByteStream(auditJson, tmpFileName) # import data artifact = audit.getArtifact() @@ -152,6 +173,10 @@ def __scan(self, fileName, verbose): raise BobError("Cannot read {}: {}".format(fileName, str(e))) except OSError as e: raise BobError(str(e)) + except Exception as e: + raise BobError("Error: " + str(e)) + finally: + self.__accessor.removeTmp(tmpFileName) def remove(self, bid): self.__cleanup = True @@ -386,7 +411,7 @@ def query(scanner, expressions): return retained -def doArchiveScan(argv): +def doArchiveScan(accessor, argv): parser = argparse.ArgumentParser(prog="bob archive scan") parser.add_argument("-v", "--verbose", action='store_true', help="Verbose operation") @@ -394,14 +419,14 @@ def doArchiveScan(argv): help="Return a non-zero error code in case of errors") args = parser.parse_args(argv) - scanner = ArchiveScanner() + scanner = ArchiveScanner(accessor) with scanner: if not scanner.scan(args.verbose) and args.fail: sys.exit(1) # meta.package == "root" && build.date > "2017-06-19" LIMIT 5 ORDER BY build.date ASC -def doArchiveClean(argv): +def doArchiveClean(accessor, argv): parser = argparse.ArgumentParser(prog="bob archive clean") parser.add_argument('expression', nargs='+', help="Expression of artifacts that shall be kept") @@ -415,7 +440,7 @@ def doArchiveClean(argv): help="Return a non-zero error code in case of errors") args = parser.parse_args(argv) - scanner = ArchiveScanner() + scanner = ArchiveScanner(accessor) with scanner: if not args.noscan: if not scanner.scan(args.verbose) and args.fail: @@ -435,24 +460,29 @@ def doArchiveClean(argv): todo.update(scanner.getReferencedBuildIds(n)) # Third pass: remove everything that is *not* retained + totalRemoved = 0 for bid in scanner.getBuildIds(): if bid in retained: continue victim = asHexStr(bid) victim = os.path.join(victim[0:2], victim[2:4], victim[4:] + "-1.tgz") if args.dry_run: print(victim) + totalRemoved += accessor.getSize(victim) else: try: if args.verbose: print("rm", victim) - os.unlink(victim) + totalRemoved += accessor.getSize(victim) + accessor.unlink(victim) except FileNotFoundError: pass except OSError as e: raise BobError("Cannot remove {}: {}".format(victim, str(e))) scanner.remove(bid) + print("{} {} Bytes from archive".format ("Would remove " if args.dry_run else "Removed", + totalRemoved)) -def doArchiveFind(argv): +def doArchiveFind(accessor, argv): parser = argparse.ArgumentParser(prog="bob archive find") parser.add_argument('expression', nargs='+', help="Expression that artifacts need to match") @@ -464,7 +494,7 @@ def doArchiveFind(argv): help="Return a non-zero error code in case of errors") args = parser.parse_args(argv) - scanner = ArchiveScanner() + scanner = ArchiveScanner(accessor) with scanner: if not args.noscan: if not scanner.scan(args.verbose) and args.fail: @@ -492,14 +522,25 @@ def doArchive(argv, bobRoot): bob archive {} """.format(subHelp)) + parser.add_argument('-a', '--accessor', nargs='?', default=None, help="Archive Accessor (plugin)") parser.add_argument('subcommand', help="Subcommand") parser.add_argument('args', nargs=argparse.REMAINDER, help="Arguments for subcommand") args = parser.parse_args(argv) + if args.accessor: + recipes = RecipeSet() + recipes.parse() + accessors = recipes.getArchiveAccessors() + if not args.accessor in accessors: + parser.error("Unknown archive accessor '{}'".format(args.accessor)) + accessor = accessors[args.accessor] + else: + accessor = LocalAccessor() + if args.subcommand in availableArchiveCmds: - availableArchiveCmds[args.subcommand][0](args.args) + availableArchiveCmds[args.subcommand][0](accessor, args.args) else: parser.error("Unknown subcommand '{}'".format(args.subcommand)) diff --git a/pym/bob/input.py b/pym/bob/input.py index 175b4afad..e376a5e40 100644 --- a/pym/bob/input.py +++ b/pym/bob/input.py @@ -2897,6 +2897,7 @@ def __init__(self): self.__scmOverrides = [] self.__hooks = {} self.__projectGenerators = {} + self.__archiveAccessors = {} self.__configFiles = [] self.__properties = {} self.__states = {} @@ -3157,6 +3158,12 @@ def __loadPlugin(self, mangledName, fileName, name): } self.__projectGenerators.update(projectGenerators) + archiveAccessors = manifest.get('archiveAccessors', {}) + if not isinstance(archiveAccessors, dict): + raise ParseError("Plugin '"+fileName+"': 'archiveAccessor' has wrong type!") + if archiveAccessors: + self.__archiveAccessors.update(archiveAccessors) + properties = manifest.get('properties', {}) if not isinstance(properties, dict): raise ParseError("Plugin '"+fileName+"': 'properties' has wrong type!") @@ -3225,6 +3232,9 @@ def defineHook(self, name, value): def setConfigFiles(self, configFiles): self.__configFiles = configFiles + def getArchiveAccessors (self): + return self.__archiveAccessors + def getCommandConfig(self): return self.__commandConfig