From 222be97e33d070730ab380175fcbf378451a76ca Mon Sep 17 00:00:00 2001 From: klmcadams <58492561+klmcadams@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:20:28 -0400 Subject: [PATCH 01/18] lint specific file(s) --- .pre-commit-hooks.yaml | 2 +- src/reuse/_main.py | 9 +++++++ src/reuse/lint_file.py | 61 ++++++++++++++++++++++++++++++++++++++++++ src/reuse/project.py | 34 +++++++++++++++++++++++ src/reuse/report.py | 12 +++++++-- 5 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 src/reuse/lint_file.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 2dd0cccb..d1878041 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -5,7 +5,7 @@ - id: reuse name: reuse entry: reuse - args: ["lint"] + args: ["lint", "lint-file"] language: python pass_filenames: false description: diff --git a/src/reuse/_main.py b/src/reuse/_main.py index fec1f731..7c69129f 100644 --- a/src/reuse/_main.py +++ b/src/reuse/_main.py @@ -23,6 +23,7 @@ convert_dep5, download, lint, + lint_file, spdx, supported_licenses, ) @@ -173,6 +174,14 @@ def parser() -> argparse.ArgumentParser: ), ) + add_command( + subparsers, + "lint-file", + lint_file.add_arguments, + lint_file.run, + help=_("list non-compliant files from specified list of files"), + ) + add_command( subparsers, "spdx", diff --git a/src/reuse/lint_file.py b/src/reuse/lint_file.py new file mode 100644 index 00000000..ce63f418 --- /dev/null +++ b/src/reuse/lint_file.py @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: 2024 Kerry McAdams +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Linting specific files happens here. The linting here is nothing more than +reading the reports and printing some conclusions. +""" + +import sys +from argparse import ArgumentParser, Namespace +from gettext import gettext as _ +from typing import IO + +from .lint import format_json, format_lines, format_plain +from .project import Project +from .report import ProjectReport + + +def add_arguments(parser: ArgumentParser) -> None: + """Add arguments to parser.""" + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument( + "-q", "--quiet", action="store_true", help=_("prevents output") + ) + mutex_group.add_argument( + "-j", "--json", action="store_true", help=_("formats output as JSON") + ) + mutex_group.add_argument( + "-p", + "--plain", + action="store_true", + help=_("formats output as plain text"), + ) + mutex_group.add_argument( + "-l", + "--lines", + action="store_true", + help=_("formats output as errors per line"), + ) + parser.add_argument("files", nargs="*") + + +def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: + """List all non-compliant files from specified file list.""" + report = ProjectReport.generate( + project, + do_checksum=False, + file_list=args.files, + multiprocessing=not args.no_multiprocessing, + ) + + if args.quiet: + pass + elif args.json: + out.write(format_json(report)) + elif args.lines: + out.write(format_lines(report)) + else: + out.write(format_plain(report)) + + return 0 if report.is_compliant else 1 diff --git a/src/reuse/project.py b/src/reuse/project.py index 9fc8b48d..a793c286 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -157,6 +157,40 @@ def from_directory( return project + def specific_files( + self, files: Optional[List], directory: Optional[StrPath] = None + ) -> Iterator[Path]: + """Yield all files in the specified file list within a directory. + + The files that are not yielded are: + + - Files ignored by VCS (e.g., see .gitignore) + + - Files matching IGNORE_*_PATTERNS. + """ + if directory is None: + directory = self.root + directory = Path(directory) + + # Filter files. + for file_ in files: + the_file = directory / file_ + if self._is_path_ignored(the_file): + _LOGGER.debug("ignoring '%s'", the_file) + continue + if the_file.is_symlink(): + _LOGGER.debug("skipping symlink '%s'", the_file) + continue + # Suppressing this error because I simply don't want to deal + # with that here. + with contextlib.suppress(OSError): + if the_file.stat().st_size == 0: + _LOGGER.debug("skipping 0-sized file '%s'", the_file) + continue + + _LOGGER.debug("yielding '%s'", the_file) + yield the_file + def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: """Yield all files in *directory* and its subdirectories. diff --git a/src/reuse/report.py b/src/reuse/report.py index 4c7eec31..71b86ee0 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -103,7 +103,7 @@ class _MultiprocessingResult(NamedTuple): class ProjectReport: # pylint: disable=too-many-instance-attributes """Object that holds linting report about the project.""" - def __init__(self, do_checksum: bool = True): + def __init__(self, do_checksum: bool = True, file_list: list = list[Any]): self.path: StrPath = "" self.licenses: Dict[str, Path] = {} self.missing_licenses: Dict[str, Set[Path]] = {} @@ -114,6 +114,7 @@ def __init__(self, do_checksum: bool = True): self.licenses_without_extension: Dict[str, Path] = {} self.do_checksum = do_checksum + self.file_list = file_list self._unused_licenses: Optional[Set[str]] = None self._used_licenses: Optional[Set[str]] = None @@ -276,6 +277,7 @@ def generate( cls, project: Project, do_checksum: bool = True, + file_list: list = list[Any], multiprocessing: bool = cpu_count() > 1, # type: ignore add_license_concluded: bool = False, ) -> "ProjectReport": @@ -298,7 +300,13 @@ def generate( ) pool.join() else: - results = map(container, project.all_files()) + # Search specific file list if files are provided with + # `reuse lint-file`. Otherwise, lint all files + results = ( + map(container, project.specific_files(file_list)) + if file_list + else map(container, project.all_files()) + ) for result in results: if result.error: From d6aca8568aeddf74a696ece06567eb232cbed1cb Mon Sep 17 00:00:00 2001 From: Sebastien Morais Date: Thu, 1 Aug 2024 11:53:10 +0200 Subject: [PATCH 02/18] fix: Methods signature --- src/reuse/report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 71b86ee0..f1a06c50 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -103,7 +103,7 @@ class _MultiprocessingResult(NamedTuple): class ProjectReport: # pylint: disable=too-many-instance-attributes """Object that holds linting report about the project.""" - def __init__(self, do_checksum: bool = True, file_list: list = list[Any]): + def __init__(self, do_checksum: bool = True, file_list: list[Any] = None): self.path: StrPath = "" self.licenses: Dict[str, Path] = {} self.missing_licenses: Dict[str, Set[Path]] = {} @@ -277,7 +277,7 @@ def generate( cls, project: Project, do_checksum: bool = True, - file_list: list = list[Any], + file_list: list[Any] = None, multiprocessing: bool = cpu_count() > 1, # type: ignore add_license_concluded: bool = False, ) -> "ProjectReport": From d5e4b8e9b4ed0b567c246fc6523f50ce660b40a1 Mon Sep 17 00:00:00 2001 From: Sebastien Morais Date: Thu, 1 Aug 2024 11:54:31 +0200 Subject: [PATCH 03/18] refactor: Iterating over files --- src/reuse/report.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index f1a06c50..a05096c4 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -293,20 +293,17 @@ def generate( project, do_checksum, add_license_concluded ) + # Iterate over specific file list if files are provided with + # `reuse lint-file`. Otherwise, lint all files. + iter_files = project.specific_files(file_list) if file_list else project.all_files() if multiprocessing: with mp.Pool() as pool: results: Iterable[_MultiprocessingResult] = pool.map( - container, project.all_files() + container, iter_files ) pool.join() else: - # Search specific file list if files are provided with - # `reuse lint-file`. Otherwise, lint all files - results = ( - map(container, project.specific_files(file_list)) - if file_list - else map(container, project.all_files()) - ) + results = (map(container, iter_files)) for result in results: if result.error: From 39f0f52ea267186f2839bf16664b9e05a41bd1de Mon Sep 17 00:00:00 2001 From: Sebastien Morais Date: Thu, 1 Aug 2024 12:19:14 +0200 Subject: [PATCH 04/18] fix: Methods signature --- src/reuse/report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index a05096c4..8edfe30c 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -103,7 +103,7 @@ class _MultiprocessingResult(NamedTuple): class ProjectReport: # pylint: disable=too-many-instance-attributes """Object that holds linting report about the project.""" - def __init__(self, do_checksum: bool = True, file_list: list[Any] = None): + def __init__(self, do_checksum: bool = True, file_list: Optional[List[str]] = None): self.path: StrPath = "" self.licenses: Dict[str, Path] = {} self.missing_licenses: Dict[str, Set[Path]] = {} @@ -277,7 +277,7 @@ def generate( cls, project: Project, do_checksum: bool = True, - file_list: list[Any] = None, + file_list: Optional[List[str]] = None, multiprocessing: bool = cpu_count() > 1, # type: ignore add_license_concluded: bool = False, ) -> "ProjectReport": From 701fafb9cbcbfd5d50ffe2659214f72faaede2c8 Mon Sep 17 00:00:00 2001 From: Sebastien Morais Date: Thu, 1 Aug 2024 12:32:21 +0200 Subject: [PATCH 05/18] tests: Check lint-file subcommand --- tests/test_lint.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_lint.py b/tests/test_lint.py index 6ca93f9f..131e1a0d 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -270,5 +270,18 @@ def test_lint_lines_read_errors(fake_repository): assert "restricted.py" in result assert "read error" in result +def test_lint_specific_files(fake_repository): + """Check lint-file subcommand.""" + (fake_repository / "foo.py").write_text("foo") + (fake_repository / "bar.py").write_text("bar") + + project = Project.from_directory(fake_repository) + report = ProjectReport.generate(project, file_list=["foo.py"]) + result = format_plain(report) + + assert ":-(" in result + assert "# UNUSED LICENSES" in result + assert "bar.py" not in result + # REUSE-IgnoreEnd From 7f86cc8c556ae243d81691528d6e4ef6aee936a9 Mon Sep 17 00:00:00 2001 From: klmcadams <58492561+klmcadams@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:44:26 -0400 Subject: [PATCH 06/18] add copyright lines & adjust code to work with pre-commit --- src/reuse/_main.py | 1 + src/reuse/project.py | 34 ++++++++++++++++++---------------- src/reuse/report.py | 14 +++++++++++--- tests/test_lint.py | 5 +++-- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/reuse/_main.py b/src/reuse/_main.py index 7c69129f..0327d85e 100644 --- a/src/reuse/_main.py +++ b/src/reuse/_main.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2024 Carmen Bianca BAKKER # SPDX-FileCopyrightText: © 2020 Liferay, Inc. +# SPDX-FileCopyrightText: 2024 Kerry McAdams # # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/src/reuse/project.py b/src/reuse/project.py index a793c286..d1e5d687 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -3,6 +3,7 @@ # SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER # SPDX-FileCopyrightText: 2023 Matthias Riße # SPDX-FileCopyrightText: 2023 DB Systel GmbH +# SPDX-FileCopyrightText: 2024 Kerry McAdams # # SPDX-License-Identifier: GPL-3.0-or-later @@ -172,24 +173,25 @@ def specific_files( directory = self.root directory = Path(directory) - # Filter files. - for file_ in files: - the_file = directory / file_ - if self._is_path_ignored(the_file): - _LOGGER.debug("ignoring '%s'", the_file) - continue - if the_file.is_symlink(): - _LOGGER.debug("skipping symlink '%s'", the_file) - continue - # Suppressing this error because I simply don't want to deal - # with that here. - with contextlib.suppress(OSError): - if the_file.stat().st_size == 0: - _LOGGER.debug("skipping 0-sized file '%s'", the_file) + if files is not None: + # Filter files. + for file_ in files: + the_file = directory / file_ + if self._is_path_ignored(the_file): + _LOGGER.debug("ignoring '%s'", the_file) + continue + if the_file.is_symlink(): + _LOGGER.debug("skipping symlink '%s'", the_file) continue + # Suppressing this error because I simply don't want to deal + # with that here. + with contextlib.suppress(OSError): + if the_file.stat().st_size == 0: + _LOGGER.debug("skipping 0-sized file '%s'", the_file) + continue - _LOGGER.debug("yielding '%s'", the_file) - yield the_file + _LOGGER.debug("yielding '%s'", the_file) + yield the_file def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: """Yield all files in *directory* and its subdirectories. diff --git a/src/reuse/report.py b/src/reuse/report.py index 8edfe30c..8a93457a 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -3,6 +3,8 @@ # SPDX-FileCopyrightText: 2022 Pietro Albini # SPDX-FileCopyrightText: 2023 DB Systel GmbH # SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER +# SPDX-FileCopyrightText: 2024 Kerry McAdams +# SPDX-FileCopyrightText: 2024 Sebastien Morais # # SPDX-License-Identifier: GPL-3.0-or-later @@ -103,7 +105,9 @@ class _MultiprocessingResult(NamedTuple): class ProjectReport: # pylint: disable=too-many-instance-attributes """Object that holds linting report about the project.""" - def __init__(self, do_checksum: bool = True, file_list: Optional[List[str]] = None): + def __init__( + self, do_checksum: bool = True, file_list: Optional[List[str]] = None + ): self.path: StrPath = "" self.licenses: Dict[str, Path] = {} self.missing_licenses: Dict[str, Set[Path]] = {} @@ -295,7 +299,11 @@ def generate( # Iterate over specific file list if files are provided with # `reuse lint-file`. Otherwise, lint all files. - iter_files = project.specific_files(file_list) if file_list else project.all_files() + iter_files = ( + project.specific_files(file_list) + if file_list + else project.all_files() + ) if multiprocessing: with mp.Pool() as pool: results: Iterable[_MultiprocessingResult] = pool.map( @@ -303,7 +311,7 @@ def generate( ) pool.join() else: - results = (map(container, iter_files)) + results = map(container, iter_files) for result in results: if result.error: diff --git a/tests/test_lint.py b/tests/test_lint.py index 131e1a0d..d9b353d6 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -1,6 +1,6 @@ -# SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2024 Nico Rikken +# SPDX-FileCopyrightText: 2024 Sebastien Morais # # SPDX-License-Identifier: GPL-3.0-or-later @@ -270,11 +270,12 @@ def test_lint_lines_read_errors(fake_repository): assert "restricted.py" in result assert "read error" in result + def test_lint_specific_files(fake_repository): """Check lint-file subcommand.""" (fake_repository / "foo.py").write_text("foo") (fake_repository / "bar.py").write_text("bar") - + project = Project.from_directory(fake_repository) report = ProjectReport.generate(project, file_list=["foo.py"]) result = format_plain(report) From 1862aacb6b35a4f5ce7fd134dcfdc298da122fcc Mon Sep 17 00:00:00 2001 From: klmcadams <58492561+klmcadams@users.noreply.github.com> Date: Thu, 1 Aug 2024 16:32:30 -0400 Subject: [PATCH 07/18] updated contact info & added files for PR --- AUTHORS.rst | 2 + changelog.d/added/lint-file.md | 1 + docs/man/reuse-lint-file.rst | 105 +++++++++++++++++++++++++++++++++ src/reuse/_main.py | 2 +- src/reuse/lint_file.py | 2 +- src/reuse/project.py | 2 +- src/reuse/report.py | 4 +- tests/test_lint.py | 4 +- 8 files changed, 115 insertions(+), 7 deletions(-) create mode 100644 changelog.d/added/lint-file.md create mode 100644 docs/man/reuse-lint-file.rst diff --git a/AUTHORS.rst b/AUTHORS.rst index 1a40a48b..7b46b576 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -120,6 +120,7 @@ Contributors - Jon Burdo - Josef Andersson - José Vieira +- Kerry McAdams - Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> - Lars Francke - Libor Pechacek @@ -139,6 +140,7 @@ Contributors - Romain Tartière - Ryan Schmidt - Sebastian Crane +- Sebastien Morais - T. E. Kalaycı - Vishesh Handa - Vlad-Stefan Harbuz diff --git a/changelog.d/added/lint-file.md b/changelog.d/added/lint-file.md new file mode 100644 index 00000000..a349d03c --- /dev/null +++ b/changelog.d/added/lint-file.md @@ -0,0 +1 @@ +- Add lint-file subcommand to enable running lint on specific files. diff --git a/docs/man/reuse-lint-file.rst b/docs/man/reuse-lint-file.rst new file mode 100644 index 00000000..f6030c64 --- /dev/null +++ b/docs/man/reuse-lint-file.rst @@ -0,0 +1,105 @@ +.. + SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. + SPDX-FileCopyrightText: © 2020 Liferay, Inc. + + SPDX-License-Identifier: CC-BY-SA-4.0 + +reuse-lint-file +================ + +Synopsis +-------- + +**reuse lint-file** [*options*] + +Description +----------- + +:program:`reuse-lint-file` verifies whether a file in a project is compliant with the REUSE +Specification located at ``_. + +Criteria +-------- + +These are the criteria that the linter checks against. + +Bad licenses +~~~~~~~~~~~~ + +Licenses that are found in ``LICENSES/`` that are not found in the SPDX License +List or do not start with ``LicenseRef-`` are bad licenses. + +Deprecated licenses +~~~~~~~~~~~~~~~~~~~ + +Licenses whose SPDX License Identifier has been deprecated by SPDX. + +Licenses without file extension +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These are licenses whose file names are a valid SPDX License Identifier, but +which do not have a file extension. + +Missing licenses +~~~~~~~~~~~~~~~~ + +A license which is referred to in a comment header, but which is not found in +the ``LICENSES/`` directory. + +Unused licenses +~~~~~~~~~~~~~~~ + +A license found in the ``LICENSES/`` directory, but which is not referred to in +any comment header. + +Read errors +~~~~~~~~~~~ + +Not technically a criterion, but files that cannot be read by the operating +system are read errors, and need to be fixed. + +Files without copyright and license information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Every file needs to have copyright and licensing information associated with it. +The REUSE Specification details several ways of doing it. By and large, these +are the methods: + +- Placing tags in the header of the file. +- Placing tags in a ``.license`` file adjacent to the file. +- Putting the information in the ``REUSE.toml`` file. +- Putting the information in the ``.reuse/dep5`` file. (Deprecated) + +If a file is found that does not have copyright and/or license information +associated with it, then the project is not compliant. + +Options +------- + +.. option:: + + File(s) that are linted. For example, ``reuse lint-file src/reuse/lint_file.py src/reuse/download.py``. + +.. option:: -q, --quiet + + Do not print anything to STDOUT. + +.. + TODO: specify the JSON output. + +.. option:: -j, --json + + Output the results of the lint as JSON. + +.. option:: -p, --plain + + Output the results of the lint as descriptive text. The text is valid + Markdown. + +.. option:: -l, --lines + + Output one line per error, prefixed by the file path. + +.. option:: -h, --help + + Display help and exit. diff --git a/src/reuse/_main.py b/src/reuse/_main.py index 0327d85e..6e52b474 100644 --- a/src/reuse/_main.py +++ b/src/reuse/_main.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2024 Carmen Bianca BAKKER # SPDX-FileCopyrightText: © 2020 Liferay, Inc. -# SPDX-FileCopyrightText: 2024 Kerry McAdams +# SPDX-FileCopyrightText: 2024 Kerry McAdams # # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/src/reuse/lint_file.py b/src/reuse/lint_file.py index ce63f418..00307697 100644 --- a/src/reuse/lint_file.py +++ b/src/reuse/lint_file.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: 2024 Kerry McAdams +# SPDX-FileCopyrightText: 2024 Kerry McAdams # # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/src/reuse/project.py b/src/reuse/project.py index d1e5d687..a002320a 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -3,7 +3,7 @@ # SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER # SPDX-FileCopyrightText: 2023 Matthias Riße # SPDX-FileCopyrightText: 2023 DB Systel GmbH -# SPDX-FileCopyrightText: 2024 Kerry McAdams +# SPDX-FileCopyrightText: 2024 Kerry McAdams # # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/src/reuse/report.py b/src/reuse/report.py index 8a93457a..7c6288f8 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -3,8 +3,8 @@ # SPDX-FileCopyrightText: 2022 Pietro Albini # SPDX-FileCopyrightText: 2023 DB Systel GmbH # SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER -# SPDX-FileCopyrightText: 2024 Kerry McAdams -# SPDX-FileCopyrightText: 2024 Sebastien Morais +# SPDX-FileCopyrightText: 2024 Kerry McAdams +# SPDX-FileCopyrightText: 2024 Sebastien Morais # # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/tests/test_lint.py b/tests/test_lint.py index d9b353d6..1b89084f 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -1,10 +1,10 @@ # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2024 Nico Rikken -# SPDX-FileCopyrightText: 2024 Sebastien Morais +# SPDX-FileCopyrightText: 2024 Sebastien Morais # # SPDX-License-Identifier: GPL-3.0-or-later -"""All tests for reuse.lint""" +"""All tests for reuse.lint and reuse.lint_files""" import re import shutil From fbef82d5d3d18f1f82e598fd6b7ba6e6ca480420 Mon Sep 17 00:00:00 2001 From: klmcadams <58492561+klmcadams@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:42:52 -0400 Subject: [PATCH 08/18] fix pylint failure --- src/reuse/report.py | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 7c6288f8..36440847 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -277,22 +277,15 @@ def bill_of_materials( return out.getvalue() @classmethod - def generate( + def get_lint_results( cls, project: Project, do_checksum: bool = True, file_list: Optional[List[str]] = None, multiprocessing: bool = cpu_count() > 1, # type: ignore add_license_concluded: bool = False, - ) -> "ProjectReport": - """Generate a ProjectReport from a Project.""" - project_report = cls(do_checksum=do_checksum) - project_report.path = project.root - project_report.licenses = project.licenses - project_report.licenses_without_extension = ( - project.licenses_without_extension - ) - + ) -> list | Iterable[_MultiprocessingResult]: + """Get lint results based on multiprocessing and file_list.""" container = _MultiprocessingContainer( project, do_checksum, add_license_concluded ) @@ -313,6 +306,33 @@ def generate( else: results = map(container, iter_files) + return results + + @classmethod + def generate( + cls, + project: Project, + do_checksum: bool = True, + file_list: Optional[List[str]] = None, + multiprocessing: bool = cpu_count() > 1, # type: ignore + add_license_concluded: bool = False, + ) -> "ProjectReport": + """Generate a ProjectReport from a Project.""" + project_report = cls(do_checksum=do_checksum) + project_report.path = project.root + project_report.licenses = project.licenses + project_report.licenses_without_extension = ( + project.licenses_without_extension + ) + + results = cls.get_lint_results( + project, + do_checksum, + file_list, + multiprocessing, # type: ignore + add_license_concluded, + ) + for result in results: if result.error: # Facilitate better debugging by being able to quit the program. @@ -333,6 +353,7 @@ def generate( ) project_report.read_errors.add(Path(result.path)) continue + file_report = cast(FileReport, result.report) # File report. From d36748fb2abcc2bfb49c3cac74484f8e2f793486 Mon Sep 17 00:00:00 2001 From: klmcadams <58492561+klmcadams@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:49:31 -0400 Subject: [PATCH 09/18] use Union instead of pipe for multiple return types --- src/reuse/report.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 36440847..508eb51c 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -21,7 +21,17 @@ from io import StringIO from os import cpu_count from pathlib import Path, PurePath -from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, cast +from typing import ( + Any, + Dict, + Iterable, + List, + NamedTuple, + Optional, + Set, + Union, + cast, +) from uuid import uuid4 from . import __REUSE_version__, __version__ @@ -284,7 +294,7 @@ def get_lint_results( file_list: Optional[List[str]] = None, multiprocessing: bool = cpu_count() > 1, # type: ignore add_license_concluded: bool = False, - ) -> list | Iterable[_MultiprocessingResult]: + ) -> Union[list, Iterable[_MultiprocessingResult]]: """Get lint results based on multiprocessing and file_list.""" container = _MultiprocessingContainer( project, do_checksum, add_license_concluded From 5bba598c2cc13e0c10bbcb9f02d163ce2c865be2 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Fri, 6 Sep 2024 14:10:07 +0200 Subject: [PATCH 10/18] Move ProjectReport.generate tests into class Signed-off-by: Carmen Bianca BAKKER --- tests/test_report.py | 359 ++++++++++++++++++++++--------------------- 1 file changed, 184 insertions(+), 175 deletions(-) diff --git a/tests/test_report.py b/tests/test_report.py index 084d591e..b1c01ed3 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -260,213 +260,222 @@ def test_generate_file_report_to_dict_lint_source_information( assert expression["value"] == "MIT OR 0BSD" -def test_generate_project_report_simple(fake_repository, multiprocessing): - """Simple generate test, just to see if it sort of works.""" - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) - - assert not result.bad_licenses - assert not result.licenses_without_extension - assert not result.missing_licenses - assert not result.unused_licenses - assert result.used_licenses - assert not result.read_errors - assert result.file_reports - - -def test_generate_project_report_licenses_without_extension( - fake_repository, multiprocessing -): - """Licenses without extension are detected.""" - (fake_repository / "LICENSES/CC0-1.0.txt").rename( - fake_repository / "LICENSES/CC0-1.0" - ) - - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) - - assert "CC0-1.0" in result.licenses_without_extension - - -def test_generate_project_report_missing_license( - fake_repository, multiprocessing -): - """Missing licenses are detected.""" - (fake_repository / "LICENSES/GPL-3.0-or-later.txt").unlink() - - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) - - assert "GPL-3.0-or-later" in result.missing_licenses - assert not result.bad_licenses - - -def test_generate_project_report_bad_license(fake_repository, multiprocessing): - """Bad licenses are detected.""" - (fake_repository / "LICENSES/bad.txt").write_text("foo") - - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) - - assert result.bad_licenses - assert not result.missing_licenses - - -def test_generate_project_report_unused_license( - fake_repository, multiprocessing -): - """Unused licenses are detected.""" - (fake_repository / "LICENSES/MIT.txt").write_text("foo") - - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) - - assert result.unused_licenses == {"MIT"} - +class TestGenerateProjectReport: + """Tests for ProjectReport.generate.""" + + def test_simple(self, fake_repository, multiprocessing): + """Simple generate test, just to see if it sort of works.""" + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) -def test_generate_project_report_unused_license_plus( - fake_repository, multiprocessing -): - """Apache-1.0+ is not an unused license if LICENSES/Apache-1.0.txt - exists. + assert not result.bad_licenses + assert not result.licenses_without_extension + assert not result.missing_licenses + assert not result.unused_licenses + assert result.used_licenses + assert not result.read_errors + assert result.file_reports + + def test__licenses_without_extension( + self, fake_repository, multiprocessing + ): + """Licenses without extension are detected.""" + (fake_repository / "LICENSES/CC0-1.0.txt").rename( + fake_repository / "LICENSES/CC0-1.0" + ) - Furthermore, Apache-1.0+ is separately identified as a used license. - """ - (fake_repository / "foo.py").write_text( - "SPDX-License-Identifier: Apache-1.0+" - ) - (fake_repository / "bar.py").write_text( - "SPDX-License-Identifier: Apache-1.0" - ) - (fake_repository / "LICENSES/Apache-1.0.txt").touch() + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) + assert "CC0-1.0" in result.licenses_without_extension - assert not result.unused_licenses - assert {"Apache-1.0", "Apache-1.0+"}.issubset(result.used_licenses) + def test_missing_license(self, fake_repository, multiprocessing): + """Missing licenses are detected.""" + (fake_repository / "LICENSES/GPL-3.0-or-later.txt").unlink() + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) -def test_generate_project_report_unused_license_plus_only_plus( - fake_repository, multiprocessing -): - """If Apache-1.0+ is the only declared license in the project, - LICENSES/Apache-1.0.txt should not be an unused license. - """ - (fake_repository / "foo.py").write_text( - "SPDX-License-Identifier: Apache-1.0+" - ) - (fake_repository / "LICENSES/Apache-1.0.txt").touch() + assert "GPL-3.0-or-later" in result.missing_licenses + assert not result.bad_licenses - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) + def test_bad_license(self, fake_repository, multiprocessing): + """Bad licenses are detected.""" + (fake_repository / "LICENSES/bad.txt").write_text("foo") - assert not result.unused_licenses - assert "Apache-1.0+" in result.used_licenses - assert "Apache-1.0" not in result.used_licenses + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) + assert result.bad_licenses + assert not result.missing_licenses -def test_generate_project_report_bad_license_in_file( - fake_repository, multiprocessing -): - """Bad licenses in files are detected.""" - (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad") + def test_unused_license(self, fake_repository, multiprocessing): + """Unused licenses are detected.""" + (fake_repository / "LICENSES/MIT.txt").write_text("foo") - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) - assert "bad" in result.bad_licenses + assert result.unused_licenses == {"MIT"} + def test_unused_license_plus(self, fake_repository, multiprocessing): + """Apache-1.0+ is not an unused license if LICENSES/Apache-1.0.txt + exists. -def test_generate_project_report_bad_license_can_also_be_missing( - fake_repository, multiprocessing -): - """Bad licenses can also be missing licenses.""" - (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad") + Furthermore, Apache-1.0+ is separately identified as a used license. + """ + (fake_repository / "foo.py").write_text( + "SPDX-License-Identifier: Apache-1.0+" + ) + (fake_repository / "bar.py").write_text( + "SPDX-License-Identifier: Apache-1.0" + ) + (fake_repository / "LICENSES/Apache-1.0.txt").touch() - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) - assert "bad" in result.bad_licenses - assert "bad" in result.missing_licenses + assert not result.unused_licenses + assert {"Apache-1.0", "Apache-1.0+"}.issubset(result.used_licenses) + + def test_unused_license_plus_only_plus( + self, fake_repository, multiprocessing + ): + """If Apache-1.0+ is the only declared license in the project, + LICENSES/Apache-1.0.txt should not be an unused license. + """ + (fake_repository / "foo.py").write_text( + "SPDX-License-Identifier: Apache-1.0+" + ) + (fake_repository / "LICENSES/Apache-1.0.txt").touch() + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) -def test_generate_project_report_deprecated_license( - fake_repository, multiprocessing -): - """Deprecated licenses are detected.""" - (fake_repository / "LICENSES/GPL-3.0-or-later.txt").rename( - fake_repository / "LICENSES/GPL-3.0.txt" - ) + assert not result.unused_licenses + assert "Apache-1.0+" in result.used_licenses + assert "Apache-1.0" not in result.used_licenses - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) + def test_bad_license_in_file(self, fake_repository, multiprocessing): + """Bad licenses in files are detected.""" + (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad") - assert "GPL-3.0" in result.deprecated_licenses + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) + assert "bad" in result.bad_licenses -@cpython -@posix -def test_generate_project_report_read_error(fake_repository, multiprocessing): - """Files that cannot be read are added to the read error list.""" - (fake_repository / "bad").write_text("foo") - (fake_repository / "bad").chmod(0o000) + def test_bad_license_can_also_be_missing( + self, fake_repository, multiprocessing + ): + """Bad licenses can also be missing licenses.""" + (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad") - project = Project.from_directory(fake_repository) - result = ProjectReport.generate(project, multiprocessing=multiprocessing) + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) - # pylint: disable=superfluous-parens - assert (fake_repository / "bad") in result.read_errors + assert "bad" in result.bad_licenses + assert "bad" in result.missing_licenses + def test_deprecated_license(self, fake_repository, multiprocessing): + """Deprecated licenses are detected.""" + (fake_repository / "LICENSES/GPL-3.0-or-later.txt").rename( + fake_repository / "LICENSES/GPL-3.0.txt" + ) -def test_generate_project_report_to_dict_lint(fake_repository, multiprocessing): - """Generate dictionary output and verify correct ordering.""" - project = Project.from_directory(fake_repository) - report = ProjectReport.generate(project, multiprocessing=multiprocessing) - result = report.to_dict_lint() + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) - # Check if the top three keys are at the beginning of the dictionary - assert list(result.keys())[:3] == [ - "lint_version", - "reuse_spec_version", - "reuse_tool_version", - ] + assert "GPL-3.0" in result.deprecated_licenses - # Check if the recommendation key is at the bottom of the dictionary - assert list(result.keys())[-1] == "recommendations" + @cpython + @posix + def test_read_error(self, fake_repository, multiprocessing): + """Files that cannot be read are added to the read error list.""" + (fake_repository / "bad").write_text("foo") + (fake_repository / "bad").chmod(0o000) - # Check if the rest of the keys are sorted alphabetically - assert list(result.keys())[3:-1] == sorted(list(result.keys())[3:-1]) + project = Project.from_directory(fake_repository) + result = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) + # pylint: disable=superfluous-parens + assert (fake_repository / "bad") in result.read_errors -def test_generate_project_partial_info_in_toml( - empty_directory, multiprocessing -): - """Some information is in REUSE.toml, and some is inside of the file.""" - (empty_directory / "REUSE.toml").write_text( - cleandoc( - """ - version = 1 - - [[annotations]] - path = "foo.py" - precedence = "closest" - SPDX-FileCopyrightText = "Jane Doe" - # This is ignored because it's in the file! - SPDX-License-Identifier = "MIT" - """ + def test_to_dict_lint(self, fake_repository, multiprocessing): + """Generate dictionary output and verify correct ordering.""" + project = Project.from_directory(fake_repository) + report = ProjectReport.generate( + project, multiprocessing=multiprocessing ) - ) - (empty_directory / "foo.py").write_text("# SPDX-License-Identifier: 0BSD") - project = Project.from_directory(empty_directory) - report = ProjectReport.generate(project, multiprocessing=multiprocessing) - file_report = next( - report for report in report.file_reports if report.path.name == "foo.py" - ) - infos = file_report.reuse_infos - assert len(infos) == 2 - assert file_report.copyright == "Jane Doe" - assert file_report.licenses_in_file == ["0BSD"] + result = report.to_dict_lint() + + # Check if the top three keys are at the beginning of the dictionary + assert list(result.keys())[:3] == [ + "lint_version", + "reuse_spec_version", + "reuse_tool_version", + ] + + # Check if the recommendation key is at the bottom of the dictionary + assert list(result.keys())[-1] == "recommendations" + + # Check if the rest of the keys are sorted alphabetically + assert list(result.keys())[3:-1] == sorted(list(result.keys())[3:-1]) + + def test_partial_info_in_toml(self, empty_directory, multiprocessing): + """Some information is in REUSE.toml, and some is inside of the file.""" + (empty_directory / "REUSE.toml").write_text( + cleandoc( + """ + version = 1 + + [[annotations]] + path = "foo.py" + precedence = "closest" + SPDX-FileCopyrightText = "Jane Doe" + # This is ignored because it's in the file! + SPDX-License-Identifier = "MIT" + """ + ) + ) + (empty_directory / "foo.py").write_text( + "# SPDX-License-Identifier: 0BSD" + ) + project = Project.from_directory(empty_directory) + report = ProjectReport.generate( + project, multiprocessing=multiprocessing + ) + file_report = next( + report + for report in report.file_reports + if report.path.name == "foo.py" + ) + infos = file_report.reuse_infos + assert len(infos) == 2 + assert file_report.copyright == "Jane Doe" + assert file_report.licenses_in_file == ["0BSD"] def test_bill_of_materials(fake_repository, multiprocessing): From f7f65865b70f50bf2625faf391f5a91cd23525c9 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Fri, 6 Sep 2024 15:20:54 +0200 Subject: [PATCH 11/18] Populate files in resources/fake_repository Signed-off-by: Carmen Bianca BAKKER --- tests/conftest.py | 19 ------------------- tests/resources/fake_repository/src/custom.py | 4 +++- .../fake_repository/src/exception.py | 4 +++- .../fake_repository/src/multiple_licenses.rs | 4 +++- 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d7f2f6a2..7b95763c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -167,25 +167,6 @@ def fake_repository(tmpdir_factory) -> Path: # Get rid of those pesky pyc files. shutil.rmtree(directory / "src/__pycache__", ignore_errors=True) - # Adding this here to avoid conflict in main project. - (directory / "src/exception.py").write_text( - "SPDX-FileCopyrightText: 2017 Jane Doe\n" - "SPDX-License-Identifier: GPL-3.0-or-later WITH Autoconf-exception-3.0", - encoding="utf-8", - ) - (directory / "src/custom.py").write_text( - "SPDX-FileCopyrightText: 2017 Jane Doe\n" - "SPDX-License-Identifier: LicenseRef-custom", - encoding="utf-8", - ) - (directory / "src/multiple_licenses.rs").write_text( - "SPDX-FileCopyrightText: 2022 Jane Doe\n" - "SPDX-License-Identifier: GPL-3.0-or-later\n" - "SPDX-License-Identifier: Apache-2.0 OR CC0-1.0" - " WITH Autoconf-exception-3.0\n", - encoding="utf-8", - ) - os.chdir(directory) return directory diff --git a/tests/resources/fake_repository/src/custom.py b/tests/resources/fake_repository/src/custom.py index 9c81a2c2..0f084508 100644 --- a/tests/resources/fake_repository/src/custom.py +++ b/tests/resources/fake_repository/src/custom.py @@ -1 +1,3 @@ -# This file is overridden by the fake_repository fixture. +# SPDX-FileCopyrightText: 2017 Jane Doe +# +# SPDX-License-Identifier: LicenseRef-custom diff --git a/tests/resources/fake_repository/src/exception.py b/tests/resources/fake_repository/src/exception.py index 9c81a2c2..c45980dd 100644 --- a/tests/resources/fake_repository/src/exception.py +++ b/tests/resources/fake_repository/src/exception.py @@ -1 +1,3 @@ -# This file is overridden by the fake_repository fixture. +# SPDX-FileCopyrightText: 2017 Jane Doe +# +# SPDX-License-Identifier: GPL-3.0-or-later WITH Autoconf-exception-3.0 diff --git a/tests/resources/fake_repository/src/multiple_licenses.rs b/tests/resources/fake_repository/src/multiple_licenses.rs index 9768b946..226b3726 100644 --- a/tests/resources/fake_repository/src/multiple_licenses.rs +++ b/tests/resources/fake_repository/src/multiple_licenses.rs @@ -1 +1,3 @@ -// This file is overridden by the fake_repository fixture. +// SPDX-FileCopyrightText: 2022 Jane Doe +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-License-Identifier: Apache-2.0 OR CC0-1.0 WITH Autoconf-exception-3.0 From 46e42a167b1e467b172e4765972ad857d1e290ea Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Fri, 6 Sep 2024 11:02:05 +0200 Subject: [PATCH 12/18] Make lint-file work Signed-off-by: Carmen Bianca BAKKER --- .pylintrc | 3 +- src/reuse/{lint_file.py => _lint_file.py} | 38 ++-- src/reuse/_main.py | 6 +- src/reuse/lint.py | 67 ++++--- src/reuse/project.py | 111 ++++++----- src/reuse/report.py | 230 ++++++++++++++++------ tests/test_lint.py | 16 +- tests/test_main.py | 48 +++++ tests/test_project.py | 71 +++++++ tests/test_report.py | 69 ++++++- 10 files changed, 483 insertions(+), 176 deletions(-) rename src/reuse/{lint_file.py => _lint_file.py} (59%) diff --git a/.pylintrc b/.pylintrc index f71bd6ce..5d5ace11 100644 --- a/.pylintrc +++ b/.pylintrc @@ -11,7 +11,8 @@ jobs=0 disable=duplicate-code, logging-fstring-interpolation, - implicit-str-concat + implicit-str-concat, + inconsistent-quotes enable=useless-suppression [REPORTS] diff --git a/src/reuse/lint_file.py b/src/reuse/_lint_file.py similarity index 59% rename from src/reuse/lint_file.py rename to src/reuse/_lint_file.py index 00307697..ceeb0265 100644 --- a/src/reuse/lint_file.py +++ b/src/reuse/_lint_file.py @@ -9,11 +9,13 @@ import sys from argparse import ArgumentParser, Namespace from gettext import gettext as _ +from pathlib import Path from typing import IO -from .lint import format_json, format_lines, format_plain +from ._util import PathType +from .lint import format_lines_subset from .project import Project -from .report import ProjectReport +from .report import ProjectSubsetReport def add_arguments(parser: ArgumentParser) -> None: @@ -22,40 +24,34 @@ def add_arguments(parser: ArgumentParser) -> None: mutex_group.add_argument( "-q", "--quiet", action="store_true", help=_("prevents output") ) - mutex_group.add_argument( - "-j", "--json", action="store_true", help=_("formats output as JSON") - ) - mutex_group.add_argument( - "-p", - "--plain", - action="store_true", - help=_("formats output as plain text"), - ) mutex_group.add_argument( "-l", "--lines", action="store_true", - help=_("formats output as errors per line"), + help=_("formats output as errors per line (default)"), ) - parser.add_argument("files", nargs="*") + parser.add_argument("files", action="store", nargs="*", type=PathType("r")) def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: """List all non-compliant files from specified file list.""" - report = ProjectReport.generate( + subset_files = {Path(file_) for file_ in args.files} + for file_ in subset_files: + if not file_.resolve().is_relative_to(project.root.resolve()): + args.parser.error( + _("'{file}' is not inside of '{root}'").format( + file=file_, root=project.root + ) + ) + report = ProjectSubsetReport.generate( project, - do_checksum=False, - file_list=args.files, + subset_files, multiprocessing=not args.no_multiprocessing, ) if args.quiet: pass - elif args.json: - out.write(format_json(report)) - elif args.lines: - out.write(format_lines(report)) else: - out.write(format_plain(report)) + out.write(format_lines_subset(report)) return 0 if report.is_compliant else 1 diff --git a/src/reuse/_main.py b/src/reuse/_main.py index 6e52b474..37d13305 100644 --- a/src/reuse/_main.py +++ b/src/reuse/_main.py @@ -21,10 +21,10 @@ __REUSE_version__, __version__, _annotate, + _lint_file, convert_dep5, download, lint, - lint_file, spdx, supported_licenses, ) @@ -178,8 +178,8 @@ def parser() -> argparse.ArgumentParser: add_command( subparsers, "lint-file", - lint_file.add_arguments, - lint_file.run, + _lint_file.add_arguments, + _lint_file.run, help=_("list non-compliant files from specified list of files"), ) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 257d41b9..97277edf 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -20,7 +20,7 @@ from . import __REUSE_version__ from .project import Project -from .report import ProjectReport +from .report import ProjectReport, ProjectReportSubsetProtocol def add_arguments(parser: ArgumentParser) -> None: @@ -36,7 +36,7 @@ def add_arguments(parser: ArgumentParser) -> None: "-p", "--plain", action="store_true", - help=_("formats output as plain text"), + help=_("formats output as plain text (default)"), ) mutex_group.add_argument( "-l", @@ -264,13 +264,43 @@ def custom_serializer(obj: Any) -> Any: ) +def format_lines_subset(report: ProjectReportSubsetProtocol) -> str: + """Formats a subset of a report, namely missing licenses, read errors, files + without licenses, and files without copyright. + + Args: + report: A populated report. + """ + output = StringIO() + + # Missing licenses + for lic, files in sorted(report.missing_licenses.items()): + for path in sorted(files): + output.write( + _("{path}: missing license {lic}\n").format(path=path, lic=lic) + ) + + # Read errors + for path in sorted(report.read_errors): + output.write(_("{path}: read error\n").format(path=path)) + + # Without licenses + for path in report.files_without_licenses: + output.write(_("{path}: no license identifier\n").format(path=path)) + + # Without copyright + for path in report.files_without_copyright: + output.write(_("{path}: no copyright notice\n").format(path=path)) + + return output.getvalue() + + def format_lines(report: ProjectReport) -> str: - """Formats data dictionary as plaintext strings to be printed to sys.stdout - Sorting of output is not guaranteed. - Symbolic links can result in multiple entries per file. + """Formats report as plaintext strings to be printed to sys.stdout. Sorting + of output is not guaranteed. Args: - report: ProjectReport data + report: A populated report. Returns: String (in plaintext) that can be output to sys.stdout @@ -281,6 +311,7 @@ def license_path(lic: str) -> Optional[Path]: """Resolve a license identifier to a license path.""" return report.licenses.get(lic) + subset_output = "" if not report.is_compliant: # Bad licenses for lic, files in sorted(report.bad_licenses.items()): @@ -312,28 +343,10 @@ def license_path(lic: str) -> Optional[Path]: _("{lic_path}: unused license\n").format(lic_path=lic_path) ) - # Missing licenses - for lic, files in sorted(report.missing_licenses.items()): - for path in sorted(files): - output.write( - _("{path}: missing license {lic}\n").format( - path=path, lic=lic - ) - ) - - # Read errors - for path in sorted(report.read_errors): - output.write(_("{path}: read error\n").format(path=path)) - - # Without licenses - for path in report.files_without_licenses: - output.write(_("{path}: no license identifier\n").format(path=path)) - - # Without copyright - for path in report.files_without_copyright: - output.write(_("{path}: no copyright notice\n").format(path=path)) + # Everything else. + subset_output = format_lines_subset(report) - return output.getvalue() + return output.getvalue() + subset_output def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: diff --git a/src/reuse/project.py b/src/reuse/project.py index a002320a..0585f85a 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -18,7 +18,18 @@ from collections import defaultdict from gettext import gettext as _ from pathlib import Path -from typing import DefaultDict, Dict, Iterator, List, NamedTuple, Optional, Type +from typing import ( + Collection, + DefaultDict, + Dict, + Iterator, + List, + NamedTuple, + Optional, + Set, + Type, + cast, +) from binaryornot.check import is_binary @@ -158,53 +169,19 @@ def from_directory( return project - def specific_files( - self, files: Optional[List], directory: Optional[StrPath] = None + def _iter_files( + self, + directory: Optional[StrPath] = None, + subset_files: Optional[Collection[StrPath]] = None, ) -> Iterator[Path]: - """Yield all files in the specified file list within a directory. - - The files that are not yielded are: - - - Files ignored by VCS (e.g., see .gitignore) - - - Files matching IGNORE_*_PATTERNS. - """ - if directory is None: - directory = self.root - directory = Path(directory) - - if files is not None: - # Filter files. - for file_ in files: - the_file = directory / file_ - if self._is_path_ignored(the_file): - _LOGGER.debug("ignoring '%s'", the_file) - continue - if the_file.is_symlink(): - _LOGGER.debug("skipping symlink '%s'", the_file) - continue - # Suppressing this error because I simply don't want to deal - # with that here. - with contextlib.suppress(OSError): - if the_file.stat().st_size == 0: - _LOGGER.debug("skipping 0-sized file '%s'", the_file) - continue - - _LOGGER.debug("yielding '%s'", the_file) - yield the_file - - def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: - """Yield all files in *directory* and its subdirectories. - - The files that are not yielded are: - - - Files ignored by VCS (e.g., see .gitignore) - - - Files/directories matching IGNORE_*_PATTERNS. - """ + # pylint: disable=too-many-branches if directory is None: directory = self.root directory = Path(directory) + if subset_files is not None: + subset_files = cast( + Set[Path], {Path(file_).resolve() for file_ in subset_files} + ) for root_str, dirs, files in os.walk(directory): root = Path(root_str) @@ -213,6 +190,11 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: # Don't walk ignored directories for dir_ in list(dirs): the_dir = root / dir_ + if subset_files is not None and not any( + file_.is_relative_to(the_dir.resolve()) + for file_ in subset_files + ): + continue if self._is_path_ignored(the_dir): _LOGGER.debug("ignoring '%s'", the_dir) dirs.remove(dir_) @@ -231,6 +213,11 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: # Filter files. for file_ in files: the_file = root / file_ + if ( + subset_files is not None + and the_file.resolve() not in subset_files + ): + continue if self._is_path_ignored(the_file): _LOGGER.debug("ignoring '%s'", the_file) continue @@ -247,6 +234,42 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: _LOGGER.debug("yielding '%s'", the_file) yield the_file + def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: + """Yield all files in *directory* and its subdirectories. + + The files that are not yielded are those explicitly ignored by the REUSE + Specification. That means: + + - LICENSE/COPYING files. + - VCS directories. + - .license files. + - .spdx files. + - Files ignored by VCS. + - Symlinks. + - Submodules (depending on the value of :attr:`include_submodules`). + - Meson subprojects (depending on the value of + :attr:`include_meson_subprojects`). + - 0-sized files. + + Args: + directory: The directory in which to search. + """ + return self._iter_files(directory=directory) + + def subset_files( + self, files: Collection[StrPath], directory: Optional[StrPath] = None + ) -> Iterator[Path]: + """Like :meth:`all_files`, but all files that are not in *files* are + filtered out. + + Args: + files: A collection of paths relative to the current working + directory. Any files that are not in this collection are not + yielded. + directory: The directory in which to search. + """ + return self._iter_files(directory=directory, subset_files=files) + def reuse_info_of(self, path: StrPath) -> List[ReuseInfo]: """Return REUSE info of *path*. diff --git a/src/reuse/report.py b/src/reuse/report.py index 508eb51c..e9267b28 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -23,13 +23,14 @@ from pathlib import Path, PurePath from typing import ( Any, + Collection, Dict, Iterable, List, NamedTuple, Optional, + Protocol, Set, - Union, cast, ) from uuid import uuid4 @@ -112,12 +113,81 @@ class _MultiprocessingResult(NamedTuple): error: Optional[Exception] +def _generate_file_reports( + project: Project, + do_checksum: bool = True, + subset_files: Optional[Collection[StrPath]] = None, + multiprocessing: bool = cpu_count() > 1, # type: ignore + add_license_concluded: bool = False, +) -> Iterable[_MultiprocessingResult]: + """Create a :class:`FileReport` for every file in the project, filtered + by *subset_files*. + """ + container = _MultiprocessingContainer( + project, do_checksum, add_license_concluded + ) + + files = ( + project.subset_files(subset_files) + if subset_files is not None + else project.all_files() + ) + if multiprocessing: + with mp.Pool() as pool: + results: Iterable[_MultiprocessingResult] = pool.map( + container, files + ) + pool.join() + else: + results = map(container, files) + return results + + +def _process_error(error: Exception, path: StrPath) -> None: + # Facilitate better debugging by being able to quit the program. + if isinstance(error, bdb.BdbQuit): + raise bdb.BdbQuit() from error + if isinstance(error, (OSError, UnicodeError)): + _LOGGER.error( + _("Could not read '{path}'").format(path=path), + exc_info=error, + ) + else: + _LOGGER.error( + _("Unexpected error occurred while parsing '{path}'").format( + path=path + ), + exc_info=error, + ) + + +class ProjectReportSubsetProtocol(Protocol): + """A :class:`Protocol` that defines a subset of functionality of + :class:`ProjectReport`, implemented by :class:`ProjectSubsetReport`. + """ + + path: StrPath + missing_licenses: Dict[str, Set[Path]] + read_errors: Set[Path] + file_reports: Set["FileReport"] + + @property + def files_without_licenses(self) -> Set[Path]: + """Set of paths that have no licensing information.""" + + @property + def files_without_copyright(self) -> Set[Path]: + """Set of paths that have no copyright information.""" + + @property + def is_compliant(self) -> bool: + """Whether the report subset is compliant with the REUSE Spec.""" + + class ProjectReport: # pylint: disable=too-many-instance-attributes """Object that holds linting report about the project.""" - def __init__( - self, do_checksum: bool = True, file_list: Optional[List[str]] = None - ): + def __init__(self, do_checksum: bool = True): self.path: StrPath = "" self.licenses: Dict[str, Path] = {} self.missing_licenses: Dict[str, Set[Path]] = {} @@ -128,7 +198,6 @@ def __init__( self.licenses_without_extension: Dict[str, Path] = {} self.do_checksum = do_checksum - self.file_list = file_list self._unused_licenses: Optional[Set[str]] = None self._used_licenses: Optional[Set[str]] = None @@ -286,48 +355,24 @@ def bill_of_materials( return out.getvalue() - @classmethod - def get_lint_results( - cls, - project: Project, - do_checksum: bool = True, - file_list: Optional[List[str]] = None, - multiprocessing: bool = cpu_count() > 1, # type: ignore - add_license_concluded: bool = False, - ) -> Union[list, Iterable[_MultiprocessingResult]]: - """Get lint results based on multiprocessing and file_list.""" - container = _MultiprocessingContainer( - project, do_checksum, add_license_concluded - ) - - # Iterate over specific file list if files are provided with - # `reuse lint-file`. Otherwise, lint all files. - iter_files = ( - project.specific_files(file_list) - if file_list - else project.all_files() - ) - if multiprocessing: - with mp.Pool() as pool: - results: Iterable[_MultiprocessingResult] = pool.map( - container, iter_files - ) - pool.join() - else: - results = map(container, iter_files) - - return results - @classmethod def generate( cls, project: Project, do_checksum: bool = True, - file_list: Optional[List[str]] = None, multiprocessing: bool = cpu_count() > 1, # type: ignore add_license_concluded: bool = False, ) -> "ProjectReport": - """Generate a ProjectReport from a Project.""" + """Generate a :class:`ProjectReport` from a :class:`Project`. + + Args: + project: The :class:`Project` to lint. + do_checksum: Generate a checksum of every file. If this is + :const:`False`, generate a random checksum for every file. + multiprocessing: Whether to use multiprocessing. + add_license_concluded: Whether to aggregate all found SPDX + expressions into a concluded license. + """ project_report = cls(do_checksum=do_checksum) project_report.path = project.root project_report.licenses = project.licenses @@ -335,32 +380,15 @@ def generate( project.licenses_without_extension ) - results = cls.get_lint_results( + results = _generate_file_reports( project, - do_checksum, - file_list, - multiprocessing, # type: ignore - add_license_concluded, + do_checksum=do_checksum, + multiprocessing=multiprocessing, + add_license_concluded=add_license_concluded, ) - for result in results: if result.error: - # Facilitate better debugging by being able to quit the program. - if isinstance(result.error, bdb.BdbQuit): - raise bdb.BdbQuit() from result.error - if isinstance(result.error, (OSError, UnicodeError)): - _LOGGER.error( - _("Could not read '{path}'").format(path=result.path), - exc_info=result.error, - ) - project_report.read_errors.add(Path(result.path)) - continue - _LOGGER.error( - _( - "Unexpected error occurred while parsing '{path}'" - ).format(path=result.path), - exc_info=result.error, - ) + _process_error(result.error, result.path) project_report.read_errors.add(Path(result.path)) continue @@ -554,6 +582,86 @@ def recommendations(self) -> List[str]: return recommendations +class ProjectSubsetReport: + """Like a :class:`ProjectReport`, but for a subset of the files using a + subset of features. + """ + + def __init__(self) -> None: + self.path: StrPath = "" + self.missing_licenses: Dict[str, Set[Path]] = {} + self.read_errors: Set[Path] = set() + self.file_reports: Set[FileReport] = set() + + @classmethod + def generate( + cls, + project: Project, + subset_files: Collection[StrPath], + multiprocessing: bool = cpu_count() > 1, # type: ignore + ) -> "ProjectSubsetReport": + """Generate a :class:`ProjectSubsetReport` from a :class:`Project`. + + Args: + project: The :class:`Project` to lint. + subset_files: Only lint the files in this list. + multiprocessing: Whether to use multiprocessing. + """ + subset_report = cls() + subset_report.path = project.root + results = _generate_file_reports( + project, + do_checksum=False, + subset_files=subset_files, + multiprocessing=multiprocessing, + add_license_concluded=False, + ) + for result in results: + if result.error: + _process_error(result.error, result.path) + subset_report.read_errors.add(Path(result.path)) + continue + + file_report = cast(FileReport, result.report) + subset_report.file_reports.add(file_report) + + for missing_license in file_report.missing_licenses: + subset_report.missing_licenses.setdefault( + missing_license, set() + ).add(file_report.path) + return subset_report + + @property + def files_without_licenses(self) -> Set[Path]: + """Set of paths that have no licensing information.""" + return { + file_report.path + for file_report in self.file_reports + if not file_report.licenses_in_file + } + + @property + def files_without_copyright(self) -> Set[Path]: + """Set of paths that have no copyright information.""" + return { + file_report.path + for file_report in self.file_reports + if not file_report.copyright + } + + @property + def is_compliant(self) -> bool: + """Whether the report subset is compliant with the REUSE Spec.""" + return not any( + ( + self.missing_licenses, + self.files_without_copyright, + self.files_without_licenses, + self.read_errors, + ) + ) + + class FileReport: # pylint: disable=too-many-instance-attributes """Object that holds a linting report about a single file.""" diff --git a/tests/test_lint.py b/tests/test_lint.py index 1b89084f..e680b82d 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -4,7 +4,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""All tests for reuse.lint and reuse.lint_files""" +"""All tests for reuse.lint.""" import re import shutil @@ -271,18 +271,4 @@ def test_lint_lines_read_errors(fake_repository): assert "read error" in result -def test_lint_specific_files(fake_repository): - """Check lint-file subcommand.""" - (fake_repository / "foo.py").write_text("foo") - (fake_repository / "bar.py").write_text("bar") - - project = Project.from_directory(fake_repository) - report = ProjectReport.generate(project, file_list=["foo.py"]) - result = format_plain(report) - - assert ":-(" in result - assert "# UNUSED LICENSES" in result - assert "bar.py" not in result - - # REUSE-IgnoreEnd diff --git a/tests/test_main.py b/tests/test_main.py index 461a15ca..d5d97733 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -367,6 +367,54 @@ def test_lint_no_multiprocessing(fake_repository, stringio, multiprocessing): assert ":-)" in stringio.getvalue() +class TestLintFile: + """Tests for lint-file.""" + + def test_simple(self, fake_repository, stringio): + """A simple test to make sure it works.""" + result = main(["lint-file", "src/custom.py"], out=stringio) + assert result == 0 + assert not stringio.getvalue() + + def test_no_copyright_licensing(self, fake_repository, stringio): + """A file is correctly spotted when it has no copyright or licensing + info. + """ + (fake_repository / "foo.py").write_text("foo") + result = main(["lint-file", "foo.py"], out=stringio) + assert result == 1 + output = stringio.getvalue() + assert "foo.py" in output + assert "no license identifier" in output + assert "no copyright notice" in output + + def test_path_outside_project(self, empty_directory, capsys): + """A file can't be outside the project.""" + with pytest.raises(SystemExit): + main(["lint-file", "/"]) + assert "'/' is not in" in capsys.readouterr().err + + def test_file_not_exists(self, empty_directory, capsys): + """A file must exist.""" + with pytest.raises(SystemExit): + main(["lint-file", "foo.py"]) + assert "can't open 'foo.py'" in capsys.readouterr().err + + def test_ignored_file(self, fake_repository, stringio): + """A corner case where a specified file is ignored. It isn't checked at + all. + """ + (fake_repository / "COPYING").write_text("foo") + result = main(["lint-file", "COPYING"], out=stringio) + assert result == 0 + + def test_file_covered_by_toml(self, fake_repository_reuse_toml, stringio): + """If a file is covered by REUSE.toml, use its infos.""" + (fake_repository_reuse_toml / "doc/foo.md").write_text("foo") + result = main(["lint-file", "doc/foo.md"], out=stringio) + assert result == 0 + + @freeze_time("2024-04-08T17:34:00Z") def test_spdx(fake_repository, stringio): """Compile to an SPDX document.""" diff --git a/tests/test_project.py b/tests/test_project.py index fceba03e..70a5af7e 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -309,6 +309,77 @@ def test_all_files_pijul_ignored_contains_newline(pijul_repository): assert Path("hello\nworld.pyc").absolute() not in project.all_files() +class TestSubsetFiles: + """Tests for subset_files.""" + + def test_single(self, fake_repository): + """Only yield the single specified file.""" + project = Project.from_directory(fake_repository) + result = list(project.subset_files({fake_repository / "src/custom.py"})) + assert result == [fake_repository / "src/custom.py"] + + def test_two(self, fake_repository): + """Yield multiple specified files.""" + project = Project.from_directory(fake_repository) + result = list( + project.subset_files( + { + fake_repository / "src/custom.py", + fake_repository / "src/exception.py", + } + ) + ) + assert result == [ + fake_repository / "src/custom.py", + fake_repository / "src/exception.py", + ] + + def test_non_existent(self, fake_repository): + """If a file does not exist, don't yield it.""" + project = Project.from_directory(fake_repository) + result = list( + project.subset_files( + { + fake_repository / "src/custom.py", + fake_repository / "not_exist.py", + fake_repository / "also/does/not/exist.py", + } + ) + ) + assert result == [fake_repository / "src/custom.py"] + + def test_outside_cwd(self, fake_repository): + """If a file is outside of the project, don't yield it.""" + project = Project.from_directory(fake_repository) + result = list( + project.subset_files( + { + fake_repository / "src/custom.py", + (fake_repository / "../outside.py").resolve(), + } + ) + ) + assert result == [fake_repository / "src/custom.py"] + + def test_empty(self, fake_repository): + """If no files are provided, yield nothing.""" + project = Project.from_directory(fake_repository) + result = list(project.subset_files(set())) + assert not result + + def test_list_arg(self, fake_repository): + """Also accepts a list argument.""" + project = Project.from_directory(fake_repository) + result = list(project.subset_files([fake_repository / "src/custom.py"])) + assert result == [fake_repository / "src/custom.py"] + + def test_relative_path(self, fake_repository): + """Also handles relative paths.""" + project = Project.from_directory(fake_repository) + result = list(project.subset_files({"src/custom.py"})) + assert result == [fake_repository / "src/custom.py"] + + def test_reuse_info_of_file_does_not_exist(fake_repository): """Raise FileNotFoundError when asking for the REUSE info of a file that does not exist. diff --git a/tests/test_report.py b/tests/test_report.py index b1c01ed3..3afc9d95 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -17,7 +17,7 @@ from reuse import SourceType from reuse.project import Project -from reuse.report import FileReport, ProjectReport +from reuse.report import FileReport, ProjectReport, ProjectSubsetReport # REUSE-IgnoreStart @@ -278,9 +278,7 @@ def test_simple(self, fake_repository, multiprocessing): assert not result.read_errors assert result.file_reports - def test__licenses_without_extension( - self, fake_repository, multiprocessing - ): + def test_licenses_without_extension(self, fake_repository, multiprocessing): """Licenses without extension are detected.""" (fake_repository / "LICENSES/CC0-1.0.txt").rename( fake_repository / "LICENSES/CC0-1.0" @@ -478,6 +476,69 @@ def test_partial_info_in_toml(self, empty_directory, multiprocessing): assert file_report.licenses_in_file == ["0BSD"] +class TestProjectSubsetReport: + """Tests for ProjectSubsetReport.""" + + def test_simple(self, fake_repository, multiprocessing): + """Simple generate test.""" + project = Project.from_directory(fake_repository) + result = ProjectSubsetReport.generate( + project, + {fake_repository / "src/custom.py"}, + multiprocessing=multiprocessing, + ) + + assert not result.missing_licenses + assert not result.read_errors + assert not result.files_without_licenses + assert not result.files_without_copyright + assert len(result.file_reports) == 1 + + @cpython + @posix + def test_read_error(self, fake_repository, multiprocessing): + """Files that cannot be read are added to the read error list.""" + (fake_repository / "bad").write_text("foo") + (fake_repository / "bad").chmod(0o000) + + project = Project.from_directory(fake_repository) + result = ProjectSubsetReport.generate( + project, {fake_repository / "bad"}, multiprocessing=multiprocessing + ) + + # pylint: disable=superfluous-parens + assert (fake_repository / "bad") in result.read_errors + + def test_missing_license(self, fake_repository, multiprocessing): + """Missing licenses are detected.""" + (fake_repository / "LICENSES/GPL-3.0-or-later.txt").unlink() + + project = Project.from_directory(fake_repository) + result = ProjectSubsetReport.generate( + project, + {fake_repository / "src/exception.py"}, + multiprocessing=multiprocessing, + ) + + assert result.missing_licenses == { + "GPL-3.0-or-later": {fake_repository / "src/exception.py"} + } + + def test_missing_copyright_license(self, empty_directory, multiprocessing): + """Missing copyright and license is detected.""" + (empty_directory / "foo.py").write_text("foo") + project = Project.from_directory(empty_directory) + result = ProjectSubsetReport.generate( + project, + {empty_directory / "foo.py"}, + multiprocessing=multiprocessing, + ) + + # pylint: disable=superfluous-parens + assert (empty_directory / "foo.py") in result.files_without_copyright + assert (empty_directory / "foo.py") in result.files_without_licenses + + def test_bill_of_materials(fake_repository, multiprocessing): """Generate a bill of materials.""" project = Project.from_directory(fake_repository) From 34a8bb23328c4b0b03bdb8c19d5660902e206360 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Fri, 6 Sep 2024 17:03:04 +0200 Subject: [PATCH 13/18] Make compatible with Python 3.8 Signed-off-by: Carmen Bianca BAKKER --- src/reuse/_lint_file.py | 4 ++-- src/reuse/_util.py | 13 +++++++++++-- src/reuse/global_licensing.py | 7 ++----- src/reuse/project.py | 3 ++- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/reuse/_lint_file.py b/src/reuse/_lint_file.py index ceeb0265..5caa6661 100644 --- a/src/reuse/_lint_file.py +++ b/src/reuse/_lint_file.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import IO -from ._util import PathType +from ._util import PathType, is_relative_to from .lint import format_lines_subset from .project import Project from .report import ProjectSubsetReport @@ -37,7 +37,7 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: """List all non-compliant files from specified file list.""" subset_files = {Path(file_) for file_ in args.files} for file_ in subset_files: - if not file_.resolve().is_relative_to(project.root.resolve()): + if not is_relative_to(file_.resolve(), project.root.resolve()): args.parser.error( _("'{file}' is not inside of '{root}'").format( file=file_, root=project.root diff --git a/src/reuse/_util.py b/src/reuse/_util.py index d5cf1666..58de0d1f 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -14,7 +14,7 @@ """Misc. utilities for reuse.""" - +import contextlib import logging import os import re @@ -29,7 +29,7 @@ from inspect import cleandoc from itertools import chain from os import PathLike -from pathlib import Path +from pathlib import Path, PurePath from typing import ( IO, Any, @@ -665,4 +665,13 @@ def cleandoc_nl(text: str) -> str: return cleandoc(text) + "\n" +def is_relative_to(path: PurePath, target: PurePath) -> bool: + """Like Path.is_relative_to, but working for Python <3.9.""" + # TODO: When Python 3.8 is dropped, remove this function. + with contextlib.suppress(ValueError): + path.relative_to(target) + return True + return False + + # REUSE-IgnoreEnd diff --git a/src/reuse/global_licensing.py b/src/reuse/global_licensing.py index b66adc45..6a659c9f 100644 --- a/src/reuse/global_licensing.py +++ b/src/reuse/global_licensing.py @@ -6,7 +6,6 @@ # mypy: disable-error-code=attr-defined -import contextlib import logging import re from abc import ABC, abstractmethod @@ -40,7 +39,7 @@ from license_expression import ExpressionError from . import ReuseInfo, SourceType -from ._util import _LICENSING, StrPath +from ._util import _LICENSING, StrPath, is_relative_to _LOGGER = logging.getLogger(__name__) @@ -555,9 +554,7 @@ def _find_relevant_tomls(self, path: StrPath) -> List[ReuseTOML]: found = [] for toml in self.reuse_tomls: # TODO: When Python 3.8 is dropped, use is_relative_to instead. - with contextlib.suppress(ValueError): - PurePath(path).relative_to(toml.directory) - # No error. + if is_relative_to(PurePath(path), toml.directory): found.append(toml) # Sort from topmost to deepest directory. found.sort(key=lambda toml: toml.directory.parts) diff --git a/src/reuse/project.py b/src/reuse/project.py index 0585f85a..8ab6d37c 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -45,6 +45,7 @@ _LICENSEREF_PATTERN, StrPath, _determine_license_path, + is_relative_to, relative_from_root, reuse_info_of_file, ) @@ -191,7 +192,7 @@ def _iter_files( for dir_ in list(dirs): the_dir = root / dir_ if subset_files is not None and not any( - file_.is_relative_to(the_dir.resolve()) + is_relative_to(file_, the_dir.resolve()) for file_ in subset_files ): continue From a15f4a7c1f5145c9ef0bd86f2fe1c33c45a0b938 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Fri, 6 Sep 2024 17:04:57 +0200 Subject: [PATCH 14/18] Use set instead of list in test This worked on my machine, but not on the CI. I'm convinced that my filesystem (btrfs) iterates over files very differently compared to ext4. Signed-off-by: Carmen Bianca BAKKER --- tests/test_project.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 70a5af7e..ea3ad7ae 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -321,7 +321,7 @@ def test_single(self, fake_repository): def test_two(self, fake_repository): """Yield multiple specified files.""" project = Project.from_directory(fake_repository) - result = list( + result = set( project.subset_files( { fake_repository / "src/custom.py", @@ -329,10 +329,10 @@ def test_two(self, fake_repository): } ) ) - assert result == [ + assert result == { fake_repository / "src/custom.py", fake_repository / "src/exception.py", - ] + } def test_non_existent(self, fake_repository): """If a file does not exist, don't yield it.""" From 7f04e3c998fdcb93d8fdfcb1919c7a89a333ee65 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Fri, 6 Sep 2024 17:11:33 +0200 Subject: [PATCH 15/18] Make test compatible with Windows Signed-off-by: Carmen Bianca BAKKER --- tests/test_main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index d5d97733..94c078f9 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -391,8 +391,8 @@ def test_no_copyright_licensing(self, fake_repository, stringio): def test_path_outside_project(self, empty_directory, capsys): """A file can't be outside the project.""" with pytest.raises(SystemExit): - main(["lint-file", "/"]) - assert "'/' is not in" in capsys.readouterr().err + main(["lint-file", ".."]) + assert "'..' is not in" in capsys.readouterr().err def test_file_not_exists(self, empty_directory, capsys): """A file must exist.""" From 97074ed29d83f0efcdf9c5931d188330e3bb2571 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 10 Sep 2024 11:55:15 +0200 Subject: [PATCH 16/18] Improve documentation of lint-file Signed-off-by: Carmen Bianca BAKKER --- docs/conf.py | 13 ++++++ docs/man/reuse-lint-file.rst | 87 +++++++----------------------------- docs/man/reuse-lint.rst | 2 +- src/reuse/_lint_file.py | 8 +++- 4 files changed, 37 insertions(+), 73 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index f9674da7..9a53bb03 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -115,6 +115,14 @@ "Free Software Foundation Europe", 1, ), + ( + "man/reuse-lint-file", + "reuse-lint-file", + "Verify whether the specified files are compliant with the REUSE" + " Specification", + "Free Software Foundation Europe", + 1, + ), ( "man/reuse-spdx", "reuse-spdx", @@ -130,6 +138,11 @@ 1, ), ] +manpages_url = ( + "https://reuse.readthedocs.io/en/v{version}/man/{page}.html".format( + version=version, page="{page}" + ) +) # -- Custom ------------------------------------------------------------------ diff --git a/docs/man/reuse-lint-file.rst b/docs/man/reuse-lint-file.rst index f6030c64..314cc46d 100644 --- a/docs/man/reuse-lint-file.rst +++ b/docs/man/reuse-lint-file.rst @@ -5,100 +5,45 @@ SPDX-License-Identifier: CC-BY-SA-4.0 reuse-lint-file -================ +=============== Synopsis -------- -**reuse lint-file** [*options*] +**reuse lint-file** [*options*] [*file* ...] Description ----------- -:program:`reuse-lint-file` verifies whether a file in a project is compliant with the REUSE -Specification located at ``_. +:program:`reuse-lint-file` verifies whether the specified files are compliant +with the REUSE Specification located at ``_. It +runs the linter from :manpage:`reuse-lint(1)` against a subset of files, using a +subset of criteria. + +Files that are ignored by :program:`reuse-lint` are also ignored by +:program:`reuse-lint-file`, even if specified. Criteria -------- -These are the criteria that the linter checks against. - -Bad licenses -~~~~~~~~~~~~ - -Licenses that are found in ``LICENSES/`` that are not found in the SPDX License -List or do not start with ``LicenseRef-`` are bad licenses. - -Deprecated licenses -~~~~~~~~~~~~~~~~~~~ - -Licenses whose SPDX License Identifier has been deprecated by SPDX. - -Licenses without file extension -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -These are licenses whose file names are a valid SPDX License Identifier, but -which do not have a file extension. - -Missing licenses -~~~~~~~~~~~~~~~~ - -A license which is referred to in a comment header, but which is not found in -the ``LICENSES/`` directory. - -Unused licenses -~~~~~~~~~~~~~~~ - -A license found in the ``LICENSES/`` directory, but which is not referred to in -any comment header. - -Read errors -~~~~~~~~~~~ +The criteria are the same as used in :manpage:`reuse-lint(1)`, but using only a +subset: -Not technically a criterion, but files that cannot be read by the operating -system are read errors, and need to be fixed. - -Files without copyright and license information -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Every file needs to have copyright and licensing information associated with it. -The REUSE Specification details several ways of doing it. By and large, these -are the methods: - -- Placing tags in the header of the file. -- Placing tags in a ``.license`` file adjacent to the file. -- Putting the information in the ``REUSE.toml`` file. -- Putting the information in the ``.reuse/dep5`` file. (Deprecated) - -If a file is found that does not have copyright and/or license information -associated with it, then the project is not compliant. +- Missing licenses. +- Read errors. +- Files without copyright and license information. Options ------- -.. option:: - - File(s) that are linted. For example, ``reuse lint-file src/reuse/lint_file.py src/reuse/download.py``. - .. option:: -q, --quiet Do not print anything to STDOUT. -.. - TODO: specify the JSON output. - -.. option:: -j, --json - - Output the results of the lint as JSON. - -.. option:: -p, --plain - - Output the results of the lint as descriptive text. The text is valid - Markdown. - .. option:: -l, --lines - Output one line per error, prefixed by the file path. + Output one line per error, prefixed by the file path. This option is the + default. .. option:: -h, --help diff --git a/docs/man/reuse-lint.rst b/docs/man/reuse-lint.rst index 75bfcf94..e00d8470 100644 --- a/docs/man/reuse-lint.rst +++ b/docs/man/reuse-lint.rst @@ -90,7 +90,7 @@ Options .. option:: -p, --plain Output the results of the lint as descriptive text. The text is valid - Markdown. + Markdown. This option is the default. .. option:: -l, --lines diff --git a/src/reuse/_lint_file.py b/src/reuse/_lint_file.py index 5caa6661..39e5cd46 100644 --- a/src/reuse/_lint_file.py +++ b/src/reuse/_lint_file.py @@ -30,7 +30,13 @@ def add_arguments(parser: ArgumentParser) -> None: action="store_true", help=_("formats output as errors per line (default)"), ) - parser.add_argument("files", action="store", nargs="*", type=PathType("r")) + parser.add_argument( + "files", + action="store", + nargs="*", + type=PathType("r"), + help=_("files to lint"), + ) def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: From 4f420afe28b27daaf94baadd134dc5b7da708ac8 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 10 Sep 2024 12:15:43 +0200 Subject: [PATCH 17/18] Fix pre-commit hook Signed-off-by: Carmen Bianca BAKKER --- .pre-commit-hooks.yaml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index d1878041..9533c265 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -3,10 +3,18 @@ # SPDX-License-Identifier: GPL-3.0-or-later - id: reuse - name: reuse + name: reuse lint entry: reuse - args: ["lint", "lint-file"] + args: ["lint"] language: python pass_filenames: false description: - "Lint the project directory for compliance with the REUSE Specification" + "Lint the project directory for compliance with the REUSE Specification." + +- id: reuse-lint-file + name: reuse lint-file + entry: reuse + args: ["lint-file"] + language: python + description: + "Lint the changed files for compliance with the REUSE Specification." From 15861d23937fe80623cdc13ed6b494fa2782aac8 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 10 Sep 2024 12:22:15 +0200 Subject: [PATCH 18/18] Document in README Signed-off-by: Carmen Bianca BAKKER --- README.md | 13 ++++++++++++- docs/conf.py | 2 +- pyproject.toml | 6 ++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1908df36..33705fdc 100644 --- a/README.md +++ b/README.md @@ -247,7 +247,7 @@ Git. This uses [pre-commit](https://pre-commit.com/). Once you ```yaml repos: - repo: https://github.com/fsfe/reuse-tool - rev: v3.0.2 + rev: v4.0.3 hooks: - id: reuse ``` @@ -256,6 +256,17 @@ Then run `pre-commit install`. Now, every time you commit, `reuse lint` is run in the background, and will prevent your commit from going through if there was an error. +If you instead want to only lint files that were changed in your commit, you can +use the following configuration: + +```yaml +repos: + - repo: https://github.com/fsfe/reuse-tool + rev: v4.0.3 + hooks: + - id: reuse-lint-file +``` + ## Maintainers - Carmen Bianca Bakker diff --git a/docs/conf.py b/docs/conf.py index 9a53bb03..18bcd02c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ # The full version, including alpha/beta/rc tags. release = get_version("reuse") except PackageNotFoundError: - release = "3.0.2" + release = "4.0.3" # The short X.Y.Z version. version = ".".join(release.split(".")[:3]) diff --git a/pyproject.toml b/pyproject.toml index 02c3df14..1f8e51d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,6 +122,12 @@ push = false "src/reuse/__init__.py" = [ '__version__ = "{pep440_version}"$', ] +"docs/conf.py" = [ + 'release = "{pep440_version}"$', +] +"README.md" = [ + 'rev: {version}$', +] [tool.protokolo] changelog = "CHANGELOG.md"