diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b845d26..274f0d1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -63,7 +63,7 @@ jobs: - uses: actions/checkout@v4 - uses: ./.github/actions/poetrybuild - name: Lint with pylint - run: poetry run pylint complassist/ + run: poetry run pylint --disable=fixme complassist/ formatting: runs-on: ubuntu-22.04 diff --git a/README.md b/README.md index 154e4a7..5939499 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,9 @@ SPDX-License-Identifier: Apache-2.0 - **SBOM Enrichment**: Enhance an existing SBOM with detailed licensing and copyright information using ClearlyDefined data. - **SBOM Parsing**: Extract specific information from a CycloneDX SBOM. - **License and Copyright Information Retrieval**: Fetch licensing and copyright details for a single package from ClearlyDefined. +- **License compliance support**: Extract and unify licenses from SBOM, suggest possible license outbound candidates +Some of these features are made possible by excellent programs such as [flict](https://github.com/vinland-technology/flict) and [cdxgen](https://github.com/CycloneDX/cdxgen). ## Requirements @@ -110,6 +112,7 @@ For each command, you can get detailed options, e.g. `compliance-assistant sbom- * Enrich an SBOM with ClearlyDefined data: `compliance-assistant sbom-enrich -f /tmp/my-sbom.json -o /tmp/my-enriched-sbom.json` * Extract certain data from an SBOM: `compliance-assistant sbom-parse -f /tmp/my-enriched-sbom.json -e purl,copyright,name` * Gather ClearlyDefined licensing/copyright information for one package: `compliance-assistant clearlydefined -p pkg:pypi/inwx-dns-recordmaster@0.3.1` +* Get license outbound candidate based on licenses from SBOM: `compliance-assistant licensing outbound -f /tmp/my-enriched-sbom.json` ### Run as GitHub workflow diff --git a/complassist/_flict.py b/complassist/_flict.py index 177ce31..fab8e75 100644 --- a/complassist/_flict.py +++ b/complassist/_flict.py @@ -10,24 +10,62 @@ # We need to run flict as subprocess as usage as library is too complicated def _run_flict( - command: str, *arguments, options: list | None = None, warn_on_error: bool = True -) -> str: + command: str, + *arguments, + options: list | None = None, + warn_on_error: bool = True, +) -> tuple[int, str, str]: """ Run flict with a command (e.g. 'verify') and a list of arguments (e.g. '-il', 'GPL-2.0-only', '-ol', 'MIT'), and a list of general options (e.g. ["-ip"]) - Return output as str + Return: exit code, stdout, stderr """ if options is None: options = [] cmd = ["flict", *options, command, *arguments] + logging.debug("Running flict: %s", cmd) ret = subprocess.run(cmd, capture_output=True, check=False) - if ret.returncode != 0: + code = ret.returncode + stderr = ret.stderr.decode("UTF-8").strip() + stdout = ret.stdout.decode("UTF-8").strip() + if code != 0: + # If only warning requested, only log error, return normal output if warn_on_error: - logging.warning("flict exited with an error (%s): %s", ret.returncode, ret.stderr) + logging.warning( + "flict exited with an error (%s): %s", + code, + stderr, + ) - return ret.stdout.decode("UTF-8").strip() + return code, stdout, stderr -def flict_simplify(expression: str, output_format: str) -> str: +def flict_simplify(expression: str, output_format: str, no_relicensing: bool = True) -> str: """Simplify a license expression using flict""" - return _run_flict("simplify", expression, options=["-of", output_format]) + options = ["-of", output_format] + if no_relicensing: + options.append("-nr") + _, simplified, _ = _run_flict("simplify", expression, options=options) + + logging.debug("Simplified '%s' to '%s' using flict", expression, simplified) + + return simplified + + +def flict_simplify_list(expressions: list[str]) -> list[str]: + """Simplify a list of license expressions""" + simplified = [] + for lic in expressions: + simplified.append(flict_simplify(lic, output_format="text")) + + return list(set(simplified)) + + +def flict_outbound_candidate(expression: str, output_format: str) -> str: + """Get possible outbound license candidates using flict""" + # TODO: `-el` would make this command more helpful but it has an error: + # https://github.com/vinland-technology/flict/issues/391 + _, outbound_candidate, _ = _run_flict( + "outbound-candidate", expression, options=["-nr", "-of", output_format] + ) + return outbound_candidate diff --git a/complassist/_licensing.py b/complassist/_licensing.py new file mode 100644 index 0000000..08c5abf --- /dev/null +++ b/complassist/_licensing.py @@ -0,0 +1,110 @@ +# SPDX-FileCopyrightText: 2024 DB Systel GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +"""Open Source License Compliance helpers""" + +import logging + +from license_expression import ExpressionError, Licensing, get_spdx_licensing + +from ._flict import flict_outbound_candidate, flict_simplify, flict_simplify_list +from ._sbom_parse import extract_items_from_cdx_sbom + + +def _extract_license_expression_and_names_from_sbom( + sbom_path: str, use_flict: bool = False +) -> tuple[list[str], list[str]]: + """Exract all SPDX expressions and license names from an SBOM""" + lic_expressions = [] + lic_names = [] + + for item in extract_items_from_cdx_sbom( + sbom_path, information=["name", "purl", "licenses-short"], use_flict=use_flict + ): + licenses_short: list[dict] = item.get("licenses-short", []) + + for entry in licenses_short: + if lic_expression := entry.get("expression", ""): + lic_expressions.append(lic_expression) + # Use license name instead + else: + lic_dict: dict = entry.get("license", {}) + if lic_name := lic_dict.get("name", ""): + lic_names.append(lic_name) + + # Make expressions and names unique, and sort them + expressions = sorted(list(set(lic_expressions))) + # If using flict, simplify these found licenses. Will reduce possible + # duplicates and fix problematic SPDX expressions (e.g. MPL-2.0+) + # That's far more performant than doing that for each license in the SBOM + if use_flict: + expressions = flict_simplify_list(expressions) + names = sorted(list(set(lic_names))) + + return expressions, names + + +def list_all_licenses(sbom_path: str, use_flict: bool = False) -> list[str]: + """List all detected licenses of an SBOM, unified and sorted""" + expressions, names = _extract_license_expression_and_names_from_sbom(sbom_path, use_flict) + + # Combine both SPDX expressions and names, sort and unify again + return sorted(list(set(expressions + names))) + + +def _validate_spdx_licenses(licenses: list[str]) -> list[str]: + """Check a list of licenses for whether they are valid SPDX. Only return + valid licenses, warn on bad expression""" + valid_licenses: list[str] = [] + spdx: Licensing = get_spdx_licensing() + + for lic in licenses: + try: + spdx.parse(lic, validate=True) + valid_licenses.append(lic) + except ExpressionError as exc: + logging.error( + "The license expression/name '%s' found in the given SBOM is no valid SPDX " + "expression. Therefore, it cannot be taken into consideration for the evaluation. " + "Error message: %s", + lic, + exc, + ) + + return valid_licenses + + +def _craft_single_spdx_expression(licenses: list[str]): + """Convert multiple SPDX licenses and expressions into one large expression""" + # Put all licenses into brackets + licenses = [f"({lic})" for lic in licenses] + + return " AND ".join(licenses) + + +def get_outbound_candidate(sbom_path: str, simplify: bool = True) -> dict[str, str | list[str]]: + """Get license outbound candidates from an SBOM""" + logging.info("Extracting, simplifying and validating found licenses. This can take a while") + licenses_in_sbom = list_all_licenses(sbom_path, use_flict=simplify) + + # Check whether all licenses are valid SPDX expressions + licenses = _validate_spdx_licenses(licenses_in_sbom) + + # Combine single licenses into one large SPDX license expression + expression = _craft_single_spdx_expression(licenses) + if simplify: + logging.debug("Simplify crafted license expression %s", expression) + expression = flict_simplify(expression, output_format="text") + logging.debug("Simplified licenses expression: %s", expression) + + # Get outbound candidate + logging.info("Calculating possible outbound candidates") + outbound_candidate: str = flict_outbound_candidate(expression, output_format="text") + + return { + "licenses_in_sbom": licenses_in_sbom, + "considered_licenses": licenses, + "checked_expression": expression, + "outbound_candidate": outbound_candidate, + } diff --git a/complassist/_sbom_enrich.py b/complassist/_sbom_enrich.py index b853815..49f7d22 100644 --- a/complassist/_sbom_enrich.py +++ b/complassist/_sbom_enrich.py @@ -141,7 +141,7 @@ def _enrich_component_with_cd_data(component: dict) -> None: """ # Get purl, original licenses, and short/simplified licenses data from component raw_data = extract_items_from_component( - component, ["purl", "licenses", "licenses-short", "copyright"], True + component, ["purl", "licenses", "licenses-short", "copyright"], use_flict=True ) # Put raw data into separate variables, slightly adapted purl = raw_data["purl"] diff --git a/complassist/_sbom_parse.py b/complassist/_sbom_parse.py index e908f8e..6c1a6c2 100644 --- a/complassist/_sbom_parse.py +++ b/complassist/_sbom_parse.py @@ -10,8 +10,8 @@ from ._helpers import read_json_file -def _simplify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]: - """Simplify a list of license ids/expressions/names to a single string, +def _unify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]: + """Convert a list of license ids/expressions/names to a single string, either an expression or a name""" # Case 1: no data @@ -92,15 +92,16 @@ def _shorten_cdx_licenses_item(licenses: list, use_flict: bool = True) -> list: licdata, ) - simplified_license_data = _simplify_licenses_data(collection, use_flict=use_flict) + simplified_license_data = _unify_licenses_data(collection, use_flict=use_flict) return _license_short_to_valid_cdx_item(simplified_license_data) def extract_items_from_component(component: dict, items: list, use_flict: bool) -> dict: """Extract certain items from a single component of a CycloneDX SBOM""" - logging.debug( - "Handling component: purl = %s, name = %s", component.get("purl"), component.get("name") - ) + # Very noisy logging, disabled + # logging.debug( + # "Handling component: purl = %s, name = %s", component.get("purl"), component.get("name") + # ) extraction = {} # Loop requested data points for extraction for item in items: diff --git a/complassist/main.py b/complassist/main.py index 8f1a024..109f309 100644 --- a/complassist/main.py +++ b/complassist/main.py @@ -19,6 +19,7 @@ purl_to_cd_coordinates, ) from ._helpers import dict_to_json +from ._licensing import get_outbound_candidate, list_all_licenses from ._sbom_enrich import enrich_sbom_with_clearlydefined from ._sbom_generate import generate_cdx_sbom from ._sbom_parse import extract_items_from_cdx_sbom @@ -89,6 +90,11 @@ choices=["json", "dict", "none"], help="Desired output format.", ) +parser_sbom_read.add_argument( + "--no-simplify", + help="Do not simplify SPDX license expression using flict. May increase speed", + action="store_true", +) # ClearlyDefined parser_cd = subparsers.add_parser( @@ -119,6 +125,65 @@ ), ) +# License Compliance +parser_licensing = subparsers.add_parser( + "licensing", + help="Help with checking and reaching Open Source license compliance", +) +licensing_subparser = parser_licensing.add_subparsers( + dest="licensing_command", + help="Available licensing commands", +) + +# List licenses +licensing_list = licensing_subparser.add_parser( + "list", + help="List all detected licenses", +) +licensing_list.add_argument( + "-f", + "--file", + help="Path to the CycloneDX SBOM (JSON format) from which licenses are read", + required=True, +) +licensing_list.add_argument( + "-o", + "--output", + default="json", + choices=["json", "dict", "plain", "none"], + help="Desired output format.", +) +licensing_list.add_argument( + "--no-simplify", + help="Do not simplify SPDX license expression using flict. May increase speed", + action="store_true", +) + +# License outbound candidate +licensing_outbound = licensing_subparser.add_parser( + "outbound", + help="Suggest possible outbound licenses based on found licenses in an SBOM", +) +licensing_outbound.add_argument( + "-f", + "--file", + help="Path to the CycloneDX SBOM (JSON format) from which licenses are read", + required=True, +) +licensing_outbound.add_argument( + "-o", + "--output", + default="json", + choices=["json", "dict", "plain", "none"], + help="Desired output format. json and dict contain the most helpful output", +) +licensing_outbound.add_argument( + "--no-simplify", + help="Do not simplify SPDX license expression using flict. May increase speed", + action="store_true", +) + + # General flags parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") parser.add_argument("--version", action="version", version="%(prog)s " + __version__) @@ -136,7 +201,7 @@ def configure_logger(args) -> logging.Logger: return log -def main(): +def main(): # pylint: disable=too-many-branches, too-many-statements """Main function""" args = parser.parse_args() @@ -144,6 +209,9 @@ def main(): # Set logger configure_logger(args=args) + # Debug arguments + logging.debug(args) + # Generate SBOM with cdxgen if args.command == "sbom-generate": generate_cdx_sbom(args.directory, args.output) @@ -156,7 +224,9 @@ def main(): elif args.command == "sbom-parse": # Convert comma-separated information to list info = args.extract.split(",") - extraction = extract_items_from_cdx_sbom(args.file, information=info, use_flict=True) + extraction = extract_items_from_cdx_sbom( + args.file, information=info, use_flict=not args.no_simplify + ) if args.output == "json": print(dict_to_json(extraction)) elif args.output == "dict": @@ -177,6 +247,38 @@ def main(): print_clearlydefined_result(get_clearlydefined_license_and_copyright(coordinates)) + # License compliance commands + elif args.command == "licensing": + # List all detected licenses in an SBOM, unified and sorted + if args.licensing_command == "list": + all_licenses = list_all_licenses(sbom_path=args.file, use_flict=not args.no_simplify) + if args.output == "json": + print(dict_to_json(all_licenses)) + elif args.output == "dict": + print(all_licenses) + elif args.output == "plain": + print("\n".join(all_licenses)) + elif args.output == "none": + pass + + # Suggest possible outbound licenses based on detected licenses in an SBOM + elif args.licensing_command == "outbound": + outbound_candidates = get_outbound_candidate( + sbom_path=args.file, simplify=not args.no_simplify + ) + if args.output == "json": + print(dict_to_json(outbound_candidates)) + elif args.output == "dict": + print(outbound_candidates) + elif args.output == "plain": + print("\n".join(outbound_candidates.get("outbound_candidate"))) + elif args.output == "none": + pass + + # No subcommand given, show help + else: + parser_licensing.print_help() + else: logging.critical("No valid command provided!") sys.exit(1) diff --git a/poetry.lock b/poetry.lock index f877944..6eda923 100644 --- a/poetry.lock +++ b/poetry.lock @@ -837,4 +837,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "944a254eca682828e6e5b20204fd8dc66b48328e56024727a512ddd2c0fc90e6" +content-hash = "6f2eedd2daa6338a0e07076c7bd3f48f148d9bc03fec7af6d8c15daef4e90690" diff --git a/pyproject.toml b/pyproject.toml index 48fef37..df9b53d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ packageurl-python = "^0.15.1" requests = "^2.32.3" flict = "^1.2.14" docker = "^7.1.0" +license-expression = "^30.3.0" [tool.poetry.group.dev.dependencies]