Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate GitLab importer to aboutcode pipeline #1580

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from vulnerabilities.importers import gentoo
from vulnerabilities.importers import github
from vulnerabilities.importers import github_osv
from vulnerabilities.importers import gitlab
from vulnerabilities.importers import istio
from vulnerabilities.importers import mozilla
from vulnerabilities.importers import nginx
Expand All @@ -39,12 +38,12 @@
from vulnerabilities.importers import ubuntu_usn
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
from vulnerabilities.pipelines import gitlab_importer
from vulnerabilities.pipelines import pypa_importer

IMPORTERS_REGISTRY = [
nvd.NVDImporter,
github.GitHubAPIImporter,
gitlab.GitLabAPIImporter,
npm.NpmImporter,
nginx.NginxImporter,
pysec.PyPIImporter,
Expand Down Expand Up @@ -75,6 +74,7 @@
epss.EPSSImporter,
vulnrichment.VulnrichImporter,
pypa_importer.PyPaImporterPipeline,
gitlab_importer.GitLabImporterPipeline,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
4 changes: 2 additions & 2 deletions vulnerabilities/improvers/valid_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from vulnerabilities.importers.elixir_security import ElixirSecurityImporter
from vulnerabilities.importers.github import GitHubAPIImporter
from vulnerabilities.importers.github_osv import GithubOSVImporter
from vulnerabilities.importers.gitlab import GitLabAPIImporter
from vulnerabilities.importers.istio import IstioImporter
from vulnerabilities.importers.nginx import NginxImporter
from vulnerabilities.importers.npm import NpmImporter
Expand All @@ -44,6 +43,7 @@
from vulnerabilities.improver import Improver
from vulnerabilities.improver import Inference
from vulnerabilities.models import Advisory
from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline
from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
from vulnerabilities.utils import clean_nginx_git_tag
from vulnerabilities.utils import get_affected_packages_by_patched_package
Expand Down Expand Up @@ -363,7 +363,7 @@ class DebianBasicImprover(ValidVersionImprover):


class GitLabBasicImprover(ValidVersionImprover):
importer = GitLabAPIImporter
importer = GitLabImporterPipeline
ignorable_versions = []


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
from pathlib import Path
from typing import Iterable
from typing import List
from typing import Optional
from typing import Tuple

import pytz
import saneyaml
from dateutil import parser as dateparser
from fetchcode.vcs import fetch_via_vcs
from packageurl import PackageURL
from univers.version_range import RANGE_CLASS_BY_SCHEMES
from univers.version_range import VersionRange
Expand All @@ -25,58 +26,84 @@

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.utils import build_description
from vulnerabilities.utils import get_advisory_url
from vulnerabilities.utils import get_cwe_id

logger = logging.getLogger(__name__)

PURL_TYPE_BY_GITLAB_SCHEME = {
"conan": "conan",
"gem": "gem",
# Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
# "go": "golang",
"maven": "maven",
"npm": "npm",
"nuget": "nuget",
"packagist": "composer",
"pypi": "pypi",
}

GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()}


class GitLabAPIImporter(Importer):
class GitLabImporterPipeline(VulnerableCodeBaseImporterPipeline):
spdx_license_expression = "MIT"
license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE"
importer_name = "GitLab Importer"
repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/"

def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]:
try:
self.clone(repo_url=self.repo_url)
base_path = Path(self.vcs_response.dest_dir)
@classmethod
def steps(cls):
return (
cls.clone,
cls.collect_and_store_advisories,
cls.import_new_advisories,
cls.clean_downloads,
)

for file_path in base_path.glob("**/*.yml"):
gitlab_type, package_slug, vuln_id = parse_advisory_path(
base_path=base_path,
file_path=file_path,
)
purl_type_by_gitlab_scheme = {
"conan": "conan",
"gem": "gem",
# Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
# "go": "golang",
"maven": "maven",
"npm": "npm",
"nuget": "nuget",
"packagist": "composer",
"pypi": "pypi",
}

gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()}

def clone(self):
self.log(f"Cloning `{self.repo_url}`")
self.vcs_response = fetch_via_vcs(self.repo_url)

def advisories_count(self):
root = Path(self.vcs_response.dest_dir)
return sum(1 for _ in root.rglob("*.yml"))

def collect_advisories(self) -> Iterable[AdvisoryData]:
base_path = Path(self.vcs_response.dest_dir)

for file_path in base_path.rglob("*.yml"):
if file_path.parent == base_path:
continue

gitlab_type, _, _ = parse_advisory_path(
base_path=base_path,
file_path=file_path,
)

if gitlab_type in PURL_TYPE_BY_GITLAB_SCHEME:
yield parse_gitlab_advisory(file=file_path, base_path=base_path)
if gitlab_type not in self.purl_type_by_gitlab_scheme:
# self.log(
# f"Unknown package type {gitlab_type!r} in {file_path!r}",
# level=logging.ERROR,
# )
continue

yield parse_gitlab_advisory(
file=file_path,
base_path=base_path,
gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type,
purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme,
logger=self.log,
)

else:
logger.error(f"Unknow package type {gitlab_type!r} in {file_path!r}")
continue
finally:
if self.vcs_response and not _keep_clone:
self.vcs_response.delete()
def clean_downloads(self):
if self.vcs_response:
self.log(f"Removing cloned repository")
self.vcs_response.delete()


def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryData]:
def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]:
"""
Parse a gitlab advisory file and return a 3-tuple of:
(gitlab_type, package_slug, vulnerability_id)
Expand All @@ -96,21 +123,21 @@ def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryDa
>>> parse_advisory_path(base_path=base_path, file_path=file_path)
('npm', '@express/beego/beego/v2', 'CVE-2021-43831')
"""
relative_path_segments = str(file_path.relative_to(base_path)).strip("/").split("/")
relative_path_segments = file_path.relative_to(base_path).parts
gitlab_type = relative_path_segments[0]
vuln_id = relative_path_segments[-1].replace(".yml", "")
vuln_id = file_path.stem
package_slug = "/".join(relative_path_segments[1:-1])

return gitlab_type, package_slug, vuln_id


def get_purl(package_slug):
def get_purl(package_slug, purl_type_by_gitlab_scheme, logger):
"""
Return a PackageURL object from a package slug
"""
parts = [p for p in package_slug.strip("/").split("/") if p]
gitlab_scheme = parts[0]
purl_type = PURL_TYPE_BY_GITLAB_SCHEME[gitlab_scheme]
purl_type = purl_type_by_gitlab_scheme[gitlab_scheme]
if gitlab_scheme == "go":
name = "/".join(parts[1:])
return PackageURL(type=purl_type, namespace=None, name=name)
Expand All @@ -125,7 +152,7 @@ def get_purl(package_slug):
name = parts[-1]
namespace = "/".join(parts[1:-1])
return PackageURL(type=purl_type, namespace=namespace, name=name)
logger.error(f"get_purl: package_slug can not be parsed: {package_slug!r}")
logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR)
return


Expand All @@ -140,7 +167,7 @@ def extract_affected_packages(
In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range.
Since we can not determine which package fixes which range.
We store the all the fixed_versions with the same affected_version_range in the advisory.
Later the advisory data is used to be infered in the GitLabBasicImprover.
Later the advisory data is used to be inferred in the GitLabBasicImprover.
"""
for fixed_version in fixed_versions:
yield AffectedPackage(
Expand All @@ -150,7 +177,9 @@ def extract_affected_packages(
)


def parse_gitlab_advisory(file, base_path):
def parse_gitlab_advisory(
file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger
):
"""
Parse a Gitlab advisory file and return an AdvisoryData or None.
These files are YAML. There is a JSON schema documented at
Expand All @@ -177,8 +206,9 @@ def parse_gitlab_advisory(file, base_path):
with open(file) as f:
gitlab_advisory = saneyaml.load(f)
if not isinstance(gitlab_advisory, dict):
logger.error(
f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}"
logger(
f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}",
level=logging.ERROR,
)
return

Expand All @@ -199,9 +229,15 @@ def parse_gitlab_advisory(file, base_path):
base_path=base_path,
url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/",
)
purl: PackageURL = get_purl(package_slug=package_slug)
purl: PackageURL = get_purl(
package_slug=package_slug,
purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme,
logger=logger,
)
if not purl:
logger.error(f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}")
logger(
f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR
)
return AdvisoryData(
aliases=aliases,
summary=summary,
Expand All @@ -214,7 +250,7 @@ def parse_gitlab_advisory(file, base_path):
affected_range = gitlab_advisory.get("affected_range")
gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type]
gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE[purl.type]
gitlab_scheme = gitlab_scheme_by_purl_type[purl.type]
try:
if affected_range:
if gitlab_scheme in gitlab_native_schemes:
Expand All @@ -224,8 +260,9 @@ def parse_gitlab_advisory(file, base_path):
else:
affected_version_range = vrc.from_native(affected_range)
except Exception as e:
logger.error(
f"parse_yaml_file: affected_range is not parsable: {affected_range!r} type:{purl.type!r} error: {e!r}\n {traceback.format_exc()}"
logger(
f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}",
level=logging.ERROR,
)

parsed_fixed_versions = []
Expand All @@ -234,8 +271,9 @@ def parse_gitlab_advisory(file, base_path):
fixed_version = vrc.version_class(fixed_version)
parsed_fixed_versions.append(fixed_version)
except Exception as e:
logger.error(
f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}"
logger(
f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}",
level=logging.ERROR,
)

if parsed_fixed_versions:
Expand Down
2 changes: 1 addition & 1 deletion vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def import_advisory(

if not vulnerability:
if logger:
logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING)
logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.ERROR)
return

for ref in advisory_data.references:
Expand Down
Loading
Loading