Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add commands to help with license compliance #11

Merged
merged 10 commits into from
Aug 13, 2024
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
- uses: actions/checkout@v4
- uses: ./.github/actions/poetrybuild
- name: Lint with pylint
run: poetry run pylint complassist/
run: poetry run pylint --disable=fixme complassist/

formatting:
runs-on: ubuntu-22.04
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ SPDX-License-Identifier: Apache-2.0
- **SBOM Enrichment**: Enhance an existing SBOM with detailed licensing and copyright information using ClearlyDefined data.
- **SBOM Parsing**: Extract specific information from a CycloneDX SBOM.
- **License and Copyright Information Retrieval**: Fetch licensing and copyright details for a single package from ClearlyDefined.
- **License compliance support**: Extract and unify licenses from SBOM, suggest possible license outbound candidates

Some of these features are made possible by excellent programs such as [flict](https://github.com/vinland-technology/flict) and [cdxgen](https://github.com/CycloneDX/cdxgen).

## Requirements

Expand Down Expand Up @@ -110,6 +112,7 @@ For each command, you can get detailed options, e.g. `compliance-assistant sbom-
* Enrich an SBOM with ClearlyDefined data: `compliance-assistant sbom-enrich -f /tmp/my-sbom.json -o /tmp/my-enriched-sbom.json`
* Extract certain data from an SBOM: `compliance-assistant sbom-parse -f /tmp/my-enriched-sbom.json -e purl,copyright,name`
* Gather ClearlyDefined licensing/copyright information for one package: `compliance-assistant clearlydefined -p pkg:pypi/[email protected]`
* Get license outbound candidate based on licenses from SBOM: `compliance-assistant licensing outbound -f /tmp/my-enriched-sbom.json`

### Run as GitHub workflow

Expand Down
54 changes: 46 additions & 8 deletions complassist/_flict.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,62 @@

# We need to run flict as subprocess as usage as library is too complicated
def _run_flict(
command: str, *arguments, options: list | None = None, warn_on_error: bool = True
) -> str:
command: str,
*arguments,
options: list | None = None,
warn_on_error: bool = True,
) -> tuple[int, str, str]:
"""
Run flict with a command (e.g. 'verify') and a list of arguments
(e.g. '-il', 'GPL-2.0-only', '-ol', 'MIT'), and a list of general options (e.g. ["-ip"])
Return output as str
Return: exit code, stdout, stderr
"""
if options is None:
options = []
cmd = ["flict", *options, command, *arguments]
logging.debug("Running flict: %s", cmd)
ret = subprocess.run(cmd, capture_output=True, check=False)
if ret.returncode != 0:
code = ret.returncode
stderr = ret.stderr.decode("UTF-8").strip()
stdout = ret.stdout.decode("UTF-8").strip()
if code != 0:
# If only warning requested, only log error, return normal output
if warn_on_error:
logging.warning("flict exited with an error (%s): %s", ret.returncode, ret.stderr)
logging.warning(
"flict exited with an error (%s): %s",
code,
stderr,
)

return ret.stdout.decode("UTF-8").strip()
return code, stdout, stderr


def flict_simplify(expression: str, output_format: str) -> str:
def flict_simplify(expression: str, output_format: str, no_relicensing: bool = True) -> str:
"""Simplify a license expression using flict"""
return _run_flict("simplify", expression, options=["-of", output_format])
options = ["-of", output_format]
if no_relicensing:
options.append("-nr")
_, simplified, _ = _run_flict("simplify", expression, options=options)

logging.debug("Simplified '%s' to '%s' using flict", expression, simplified)

return simplified


def flict_simplify_list(expressions: list[str]) -> list[str]:
"""Simplify a list of license expressions"""
simplified = []
for lic in expressions:
simplified.append(flict_simplify(lic, output_format="text"))

return list(set(simplified))


def flict_outbound_candidate(expression: str, output_format: str) -> str:
"""Get possible outbound license candidates using flict"""
# TODO: `-el` would make this command more helpful but it has an error:
# https://github.com/vinland-technology/flict/issues/391
_, outbound_candidate, _ = _run_flict(
"outbound-candidate", expression, options=["-nr", "-of", output_format]
)
return outbound_candidate
110 changes: 110 additions & 0 deletions complassist/_licensing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# SPDX-FileCopyrightText: 2024 DB Systel GmbH
#
# SPDX-License-Identifier: Apache-2.0

"""Open Source License Compliance helpers"""

import logging

from license_expression import ExpressionError, Licensing, get_spdx_licensing

from ._flict import flict_outbound_candidate, flict_simplify, flict_simplify_list
from ._sbom_parse import extract_items_from_cdx_sbom


def _extract_license_expression_and_names_from_sbom(
sbom_path: str, use_flict: bool = False
) -> tuple[list[str], list[str]]:
"""Exract all SPDX expressions and license names from an SBOM"""
lic_expressions = []
lic_names = []

for item in extract_items_from_cdx_sbom(
sbom_path, information=["name", "purl", "licenses-short"], use_flict=use_flict
):
licenses_short: list[dict] = item.get("licenses-short", [])

for entry in licenses_short:
if lic_expression := entry.get("expression", ""):
lic_expressions.append(lic_expression)
# Use license name instead
else:
lic_dict: dict = entry.get("license", {})
if lic_name := lic_dict.get("name", ""):
lic_names.append(lic_name)

# Make expressions and names unique, and sort them
expressions = sorted(list(set(lic_expressions)))
# If using flict, simplify these found licenses. Will reduce possible
# duplicates and fix problematic SPDX expressions (e.g. MPL-2.0+)
# That's far more performant than doing that for each license in the SBOM
if use_flict:
expressions = flict_simplify_list(expressions)
names = sorted(list(set(lic_names)))

return expressions, names


def list_all_licenses(sbom_path: str, use_flict: bool = False) -> list[str]:
"""List all detected licenses of an SBOM, unified and sorted"""
expressions, names = _extract_license_expression_and_names_from_sbom(sbom_path, use_flict)

# Combine both SPDX expressions and names, sort and unify again
return sorted(list(set(expressions + names)))


def _validate_spdx_licenses(licenses: list[str]) -> list[str]:
"""Check a list of licenses for whether they are valid SPDX. Only return
valid licenses, warn on bad expression"""
valid_licenses: list[str] = []
spdx: Licensing = get_spdx_licensing()

for lic in licenses:
try:
spdx.parse(lic, validate=True)
valid_licenses.append(lic)
except ExpressionError as exc:
logging.error(
"The license expression/name '%s' found in the given SBOM is no valid SPDX "
"expression. Therefore, it cannot be taken into consideration for the evaluation. "
"Error message: %s",
lic,
exc,
)

return valid_licenses


def _craft_single_spdx_expression(licenses: list[str]):
"""Convert multiple SPDX licenses and expressions into one large expression"""
# Put all licenses into brackets
licenses = [f"({lic})" for lic in licenses]

return " AND ".join(licenses)


def get_outbound_candidate(sbom_path: str, simplify: bool = True) -> dict[str, str | list[str]]:
"""Get license outbound candidates from an SBOM"""
logging.info("Extracting, simplifying and validating found licenses. This can take a while")
licenses_in_sbom = list_all_licenses(sbom_path, use_flict=simplify)

# Check whether all licenses are valid SPDX expressions
licenses = _validate_spdx_licenses(licenses_in_sbom)

# Combine single licenses into one large SPDX license expression
expression = _craft_single_spdx_expression(licenses)
if simplify:
logging.debug("Simplify crafted license expression %s", expression)
expression = flict_simplify(expression, output_format="text")
logging.debug("Simplified licenses expression: %s", expression)

# Get outbound candidate
logging.info("Calculating possible outbound candidates")
outbound_candidate: str = flict_outbound_candidate(expression, output_format="text")

return {
"licenses_in_sbom": licenses_in_sbom,
"considered_licenses": licenses,
"checked_expression": expression,
"outbound_candidate": outbound_candidate,
}
2 changes: 1 addition & 1 deletion complassist/_sbom_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _enrich_component_with_cd_data(component: dict) -> None:
"""
# Get purl, original licenses, and short/simplified licenses data from component
raw_data = extract_items_from_component(
component, ["purl", "licenses", "licenses-short", "copyright"], True
component, ["purl", "licenses", "licenses-short", "copyright"], use_flict=True
)
# Put raw data into separate variables, slightly adapted
purl = raw_data["purl"]
Expand Down
13 changes: 7 additions & 6 deletions complassist/_sbom_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from ._helpers import read_json_file


def _simplify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]:
"""Simplify a list of license ids/expressions/names to a single string,
def _unify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]:
"""Convert a list of license ids/expressions/names to a single string,
either an expression or a name"""

# Case 1: no data
Expand Down Expand Up @@ -92,15 +92,16 @@ def _shorten_cdx_licenses_item(licenses: list, use_flict: bool = True) -> list:
licdata,
)

simplified_license_data = _simplify_licenses_data(collection, use_flict=use_flict)
simplified_license_data = _unify_licenses_data(collection, use_flict=use_flict)
return _license_short_to_valid_cdx_item(simplified_license_data)


def extract_items_from_component(component: dict, items: list, use_flict: bool) -> dict:
"""Extract certain items from a single component of a CycloneDX SBOM"""
logging.debug(
"Handling component: purl = %s, name = %s", component.get("purl"), component.get("name")
)
# Very noisy logging, disabled
# logging.debug(
# "Handling component: purl = %s, name = %s", component.get("purl"), component.get("name")
# )
extraction = {}
# Loop requested data points for extraction
for item in items:
Expand Down
Loading