diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 27fe9c66a..896da4dff 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -20,7 +20,6 @@ from vulnerabilities.importers import gentoo from vulnerabilities.importers import github from vulnerabilities.importers import github_osv -from vulnerabilities.importers import gitlab from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla from vulnerabilities.importers import nginx @@ -39,12 +38,12 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen +from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import pypa_importer IMPORTERS_REGISTRY = [ nvd.NVDImporter, github.GitHubAPIImporter, - gitlab.GitLabAPIImporter, npm.NpmImporter, nginx.NginxImporter, pysec.PyPIImporter, @@ -75,6 +74,7 @@ epss.EPSSImporter, vulnrichment.VulnrichImporter, pypa_importer.PyPaImporterPipeline, + gitlab_importer.GitLabImporterPipeline, ] IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index d23508bea..17cb0a05d 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -33,7 +33,6 @@ from vulnerabilities.importers.elixir_security import ElixirSecurityImporter from vulnerabilities.importers.github import GitHubAPIImporter from vulnerabilities.importers.github_osv import GithubOSVImporter -from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.nginx import NginxImporter from vulnerabilities.importers.npm import NpmImporter @@ -44,6 +43,7 @@ from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory +from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import clean_nginx_git_tag from vulnerabilities.utils import get_affected_packages_by_patched_package @@ -363,7 +363,7 @@ class DebianBasicImprover(ValidVersionImprover): class GitLabBasicImprover(ValidVersionImprover): - importer = GitLabAPIImporter + importer = GitLabImporterPipeline ignorable_versions = [] diff --git a/vulnerabilities/importers/gitlab.py b/vulnerabilities/pipelines/gitlab_importer.py similarity index 68% rename from vulnerabilities/importers/gitlab.py rename to vulnerabilities/pipelines/gitlab_importer.py index cd42b24ed..604ba7194 100644 --- a/vulnerabilities/importers/gitlab.py +++ b/vulnerabilities/pipelines/gitlab_importer.py @@ -12,11 +12,12 @@ from pathlib import Path from typing import Iterable from typing import List -from typing import Optional +from typing import Tuple import pytz import saneyaml from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import VersionRange @@ -25,58 +26,84 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import build_description from vulnerabilities.utils import get_advisory_url from vulnerabilities.utils import get_cwe_id -logger = logging.getLogger(__name__) -PURL_TYPE_BY_GITLAB_SCHEME = { - "conan": "conan", - "gem": "gem", - # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 - # "go": "golang", - "maven": "maven", - "npm": "npm", - "nuget": "nuget", - "packagist": "composer", - "pypi": "pypi", -} - -GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()} - - -class GitLabAPIImporter(Importer): +class GitLabImporterPipeline(VulnerableCodeBaseImporterPipeline): spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" importer_name = "GitLab Importer" repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" - def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]: - try: - self.clone(repo_url=self.repo_url) - base_path = Path(self.vcs_response.dest_dir) + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.import_new_advisories, + cls.clean_downloads, + ) - for file_path in base_path.glob("**/*.yml"): - gitlab_type, package_slug, vuln_id = parse_advisory_path( - base_path=base_path, - file_path=file_path, - ) + purl_type_by_gitlab_scheme = { + "conan": "conan", + "gem": "gem", + # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 + # "go": "golang", + "maven": "maven", + "npm": "npm", + "nuget": "nuget", + "packagist": "composer", + "pypi": "pypi", + } + + gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + root = Path(self.vcs_response.dest_dir) + return sum(1 for _ in root.rglob("*.yml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + + for file_path in base_path.rglob("*.yml"): + if file_path.parent == base_path: + continue + + gitlab_type, _, _ = parse_advisory_path( + base_path=base_path, + file_path=file_path, + ) - if gitlab_type in PURL_TYPE_BY_GITLAB_SCHEME: - yield parse_gitlab_advisory(file=file_path, base_path=base_path) + if gitlab_type not in self.purl_type_by_gitlab_scheme: + # self.log( + # f"Unknown package type {gitlab_type!r} in {file_path!r}", + # level=logging.ERROR, + # ) + continue + + yield parse_gitlab_advisory( + file=file_path, + base_path=base_path, + gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type, + purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme, + logger=self.log, + ) - else: - logger.error(f"Unknow package type {gitlab_type!r} in {file_path!r}") - continue - finally: - if self.vcs_response and not _keep_clone: - self.vcs_response.delete() + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() -def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryData]: +def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]: """ Parse a gitlab advisory file and return a 3-tuple of: (gitlab_type, package_slug, vulnerability_id) @@ -96,21 +123,21 @@ def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryDa >>> parse_advisory_path(base_path=base_path, file_path=file_path) ('npm', '@express/beego/beego/v2', 'CVE-2021-43831') """ - relative_path_segments = str(file_path.relative_to(base_path)).strip("/").split("/") + relative_path_segments = file_path.relative_to(base_path).parts gitlab_type = relative_path_segments[0] - vuln_id = relative_path_segments[-1].replace(".yml", "") + vuln_id = file_path.stem package_slug = "/".join(relative_path_segments[1:-1]) return gitlab_type, package_slug, vuln_id -def get_purl(package_slug): +def get_purl(package_slug, purl_type_by_gitlab_scheme, logger): """ Return a PackageURL object from a package slug """ parts = [p for p in package_slug.strip("/").split("/") if p] gitlab_scheme = parts[0] - purl_type = PURL_TYPE_BY_GITLAB_SCHEME[gitlab_scheme] + purl_type = purl_type_by_gitlab_scheme[gitlab_scheme] if gitlab_scheme == "go": name = "/".join(parts[1:]) return PackageURL(type=purl_type, namespace=None, name=name) @@ -125,7 +152,7 @@ def get_purl(package_slug): name = parts[-1] namespace = "/".join(parts[1:-1]) return PackageURL(type=purl_type, namespace=namespace, name=name) - logger.error(f"get_purl: package_slug can not be parsed: {package_slug!r}") + logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR) return @@ -140,7 +167,7 @@ def extract_affected_packages( In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range. Since we can not determine which package fixes which range. We store the all the fixed_versions with the same affected_version_range in the advisory. - Later the advisory data is used to be infered in the GitLabBasicImprover. + Later the advisory data is used to be inferred in the GitLabBasicImprover. """ for fixed_version in fixed_versions: yield AffectedPackage( @@ -150,7 +177,9 @@ def extract_affected_packages( ) -def parse_gitlab_advisory(file, base_path): +def parse_gitlab_advisory( + file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger +): """ Parse a Gitlab advisory file and return an AdvisoryData or None. These files are YAML. There is a JSON schema documented at @@ -177,8 +206,9 @@ def parse_gitlab_advisory(file, base_path): with open(file) as f: gitlab_advisory = saneyaml.load(f) if not isinstance(gitlab_advisory, dict): - logger.error( - f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}" + logger( + f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}", + level=logging.ERROR, ) return @@ -199,9 +229,15 @@ def parse_gitlab_advisory(file, base_path): base_path=base_path, url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/", ) - purl: PackageURL = get_purl(package_slug=package_slug) + purl: PackageURL = get_purl( + package_slug=package_slug, + purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme, + logger=logger, + ) if not purl: - logger.error(f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}") + logger( + f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR + ) return AdvisoryData( aliases=aliases, summary=summary, @@ -214,7 +250,7 @@ def parse_gitlab_advisory(file, base_path): affected_range = gitlab_advisory.get("affected_range") gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"]) vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type] - gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE[purl.type] + gitlab_scheme = gitlab_scheme_by_purl_type[purl.type] try: if affected_range: if gitlab_scheme in gitlab_native_schemes: @@ -224,8 +260,9 @@ def parse_gitlab_advisory(file, base_path): else: affected_version_range = vrc.from_native(affected_range) except Exception as e: - logger.error( - f"parse_yaml_file: affected_range is not parsable: {affected_range!r} type:{purl.type!r} error: {e!r}\n {traceback.format_exc()}" + logger( + f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, ) parsed_fixed_versions = [] @@ -234,8 +271,9 @@ def parse_gitlab_advisory(file, base_path): fixed_version = vrc.version_class(fixed_version) parsed_fixed_versions.append(fixed_version) except Exception as e: - logger.error( - f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}" + logger( + f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, ) if parsed_fixed_versions: diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 4b264481c..1ad5be2fc 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -90,7 +90,7 @@ def import_advisory( if not vulnerability: if logger: - logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) + logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.ERROR) return for ref in advisory_data.references: diff --git a/vulnerabilities/tests/test_gitlab.py b/vulnerabilities/tests/pipelines/test_gitlab_importer_pipeline.py similarity index 66% rename from vulnerabilities/tests/test_gitlab.py rename to vulnerabilities/tests/pipelines/test_gitlab_importer_pipeline.py index bc2bfcaea..d10413a8b 100644 --- a/vulnerabilities/tests/test_gitlab.py +++ b/vulnerabilities/tests/pipelines/test_gitlab_importer_pipeline.py @@ -8,27 +8,34 @@ # import json -import os from pathlib import Path from unittest import mock import pytest from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importers.gitlab import parse_gitlab_advisory from vulnerabilities.improvers.default import DefaultImprover from vulnerabilities.improvers.valid_versions import GitLabBasicImprover +from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.tests import util_tests +from vulnerabilities.tests.pipelines import TestLogger -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data", "gitlab") +TEST_DATA = Path(__file__).parent.parent / "test_data" / "gitlab" @pytest.mark.parametrize("pkg_type", ["maven", "nuget", "gem", "composer", "pypi", "npm"]) def test_parse_yaml_file(pkg_type): - response_file = os.path.join(TEST_DATA, f"{pkg_type}.yaml") - expected_file = os.path.join(TEST_DATA, f"{pkg_type}-expected.json") - advisory = parse_gitlab_advisory(Path(response_file), Path(response_file).parent) + response_file = TEST_DATA / f"{pkg_type}.yaml" + expected_file = TEST_DATA / f"{pkg_type}-expected.json" + test_pipeline = gitlab_importer.GitLabImporterPipeline() + logger = TestLogger() + advisory = gitlab_importer.parse_gitlab_advisory( + response_file, + response_file.parent, + test_pipeline.gitlab_scheme_by_purl_type, + test_pipeline.purl_type_by_gitlab_scheme, + logger.write, + ) util_tests.check_results_against_json(advisory.to_dict(), expected_file) @@ -45,27 +52,11 @@ def valid_versions(pkg_type): "9.1.6", "10.0.0", ], - "gem": [ - "4.2.0.beta1", - "4.2.0.beta2", - "4.2.0.beta3", - ], - "golang": [ - "3.7.0", - "3.7.1", - ], + "gem": ["4.2.0.beta1", "4.2.0.beta2", "4.2.0.beta3"], + "golang": ["3.7.0", "3.7.1"], "nuget": ["1.11.0", "1.11.1", "1.11.2", "1.09.1"], - "npm": [ - "2.14.2", - "2.13.2", - "2.11.2", - ], - "pypi": [ - "1.0", - "0.9", - "0.8", - "1.1", - ], + "npm": ["2.14.2", "2.13.2", "2.11.2"], + "pypi": ["1.0", "0.9", "0.8", "1.1"], "composer": [], } return valid_versions_by_package_type[pkg_type] @@ -74,9 +65,9 @@ def valid_versions(pkg_type): @mock.patch("vulnerabilities.improvers.valid_versions.GitLabBasicImprover.get_package_versions") @pytest.mark.parametrize("pkg_type", ["maven", "nuget", "gem", "composer", "pypi", "npm"]) def test_gitlab_improver(mock_response, pkg_type): - advisory_file = os.path.join(TEST_DATA, f"{pkg_type}-expected.json") - expected_file = os.path.join(TEST_DATA, f"{pkg_type}-improver-expected.json") - with open(advisory_file) as exp: + advisory_file = TEST_DATA / f"{pkg_type}-expected.json" + expected_file = TEST_DATA / f"{pkg_type}-improver-expected.json" + with advisory_file.open() as exp: advisory = AdvisoryData.from_dict(json.load(exp)) mock_response.return_value = list(valid_versions(pkg_type)) improvers = [GitLabBasicImprover(), DefaultImprover()] diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py index 50f31caaf..5351bedf7 100644 --- a/vulnerabilities/tests/test_data_source.py +++ b/vulnerabilities/tests/test_data_source.py @@ -22,7 +22,6 @@ from vulnerabilities.importers.fireeye import FireyeImporter from vulnerabilities.importers.gentoo import GentooImporter from vulnerabilities.importers.github_osv import GithubOSVImporter -from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.mozilla import MozillaImporter from vulnerabilities.importers.npm import NpmImporter @@ -118,7 +117,6 @@ def test_git_importer(mock_clone): ElixirSecurityImporter, FireyeImporter, GentooImporter, - GitLabAPIImporter, IstioImporter, MozillaImporter, NpmImporter,