Skip to content

Commit

Permalink
Reduce calls to iter_files
Browse files Browse the repository at this point in the history
Previously this function would be called three times on a lint:

- once by NestedReuseTOML.find_reuse_tomls in Project.find_global_licensing
- once by NestedReuseTOML.find_reuse_tomls in  NestedReuseTOML.from_file
- once to lint all the files

I now no longer use NestedReuseTOML.from_file. It's still not ideal to
go over all files twice, but it's better than thrice.

Signed-off-by: Carmen Bianca BAKKER <[email protected]>
  • Loading branch information
carmenbianca committed Sep 18, 2024
1 parent 8280798 commit a8be9db
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 31 deletions.
60 changes: 35 additions & 25 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

"""Module that contains the central Project class."""

import contextlib
import errno
import glob
import logging
Expand Down Expand Up @@ -51,6 +50,7 @@
NestedReuseTOML,
PrecedenceType,
ReuseDep5,
ReuseTOML,
)
from .vcs import VCSStrategy, VCSStrategyNone, all_vcs_strategies

Expand Down Expand Up @@ -151,11 +151,8 @@ def from_directory(
vcs_strategy=vcs_strategy,
)
if found:
global_licensing = found.cls.from_file(
found.path,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
global_licensing = cls._global_licensing_from_found(
found, str(root)
)

project = cls(
Expand Down Expand Up @@ -334,15 +331,15 @@ def find_global_licensing(
include_submodules: bool = False,
include_meson_subprojects: bool = False,
vcs_strategy: Optional[VCSStrategy] = None,
) -> Optional[GlobalLicensingFound]:
) -> List[GlobalLicensingFound]:
"""Find the path and corresponding class of a project directory's
:class:`GlobalLicensing`.
Raises:
GlobalLicensingConflict: if more than one global licensing config
file is present.
"""
candidate: Optional[GlobalLicensingFound] = None
candidates: List[GlobalLicensingFound] = []
dep5_path = root / ".reuse/dep5"
if (dep5_path).exists():
# Sneaky workaround to not print this warning.
Expand All @@ -355,31 +352,44 @@ def find_global_licensing(
),
PendingDeprecationWarning,
)
candidate = GlobalLicensingFound(dep5_path, ReuseDep5)

# TODO: the performance of this isn't great.
toml_path = None
with contextlib.suppress(StopIteration):
toml_path = next(
NestedReuseTOML.find_reuse_tomls(
root,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
)
candidates = [GlobalLicensingFound(dep5_path, ReuseDep5)]

reuse_toml_candidates = [
GlobalLicensingFound(path, ReuseTOML)
for path in NestedReuseTOML.find_reuse_tomls(
root,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
)
if toml_path is not None:
if candidate is not None:
]
if reuse_toml_candidates:
if candidates:
raise GlobalLicensingConflict(
_(
"Found both '{new_path}' and '{old_path}'. You"
" cannot keep both files simultaneously; they are"
" not intercompatible."
).format(new_path=toml_path, old_path=dep5_path)
).format(
new_path=reuse_toml_candidates[0].path,
old_path=dep5_path,
)
)
candidate = GlobalLicensingFound(root, NestedReuseTOML)
candidates = reuse_toml_candidates

return candidates

return candidate
@classmethod
def _global_licensing_from_found(
cls, found: List[GlobalLicensingFound], root: StrPath
) -> GlobalLicensing:
if len(found) == 1 and found[0].cls == ReuseDep5:
return ReuseDep5.from_file(found[0].path)
# This is an impossible scenario at time of writing.
if not all(item.cls == ReuseTOML for item in found):
raise NotImplementedError()
tomls = [ReuseTOML.from_file(item.path) for item in found]
return NestedReuseTOML(reuse_tomls=tomls, source=str(root))

def _identifier_of_license(self, path: Path) -> str:
"""Figure out the SPDX License identifier of a license given its path.
Expand Down
25 changes: 19 additions & 6 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from reuse.covered_files import iter_files
from reuse.global_licensing import (
GlobalLicensingParseError,
NestedReuseTOML,
ReuseDep5,
ReuseTOML,
)
from reuse.project import GlobalLicensingConflict, Project

Expand Down Expand Up @@ -567,8 +567,10 @@ def test_find_global_licensing_dep5(fake_repository_dep5):
"""Find the dep5 file. Also output a PendingDeprecationWarning."""
with warnings.catch_warnings(record=True) as caught_warnings:
result = Project.find_global_licensing(fake_repository_dep5)
assert result.path == fake_repository_dep5 / ".reuse/dep5"
assert result.cls == ReuseDep5
assert len(result) == 1
dep5 = result[0]
assert dep5.path == fake_repository_dep5 / ".reuse/dep5"
assert dep5.cls == ReuseDep5

assert len(caught_warnings) == 1
assert issubclass(
Expand All @@ -579,14 +581,25 @@ def test_find_global_licensing_dep5(fake_repository_dep5):
def test_find_global_licensing_reuse_toml(fake_repository_reuse_toml):
"""Find the REUSE.toml file."""
result = Project.find_global_licensing(fake_repository_reuse_toml)
assert result.path == fake_repository_reuse_toml / "."
assert result.cls == NestedReuseTOML
assert len(result) == 1
toml = result[0]
assert toml.path == fake_repository_reuse_toml / "REUSE.toml"
assert toml.cls == ReuseTOML


def test_find_global_licensing_reuse_toml_multiple(fake_repository_reuse_toml):
"""Find multiple REUSE.tomls."""
(fake_repository_reuse_toml / "src/REUSE.toml").write_text("version = 1")
result = Project.find_global_licensing(fake_repository_reuse_toml)
assert len(result) == 2
assert result[0].path == fake_repository_reuse_toml / "REUSE.toml"
assert result[1].path == fake_repository_reuse_toml / "src/REUSE.toml"


def test_find_global_licensing_none(empty_directory):
"""Find no file."""
result = Project.find_global_licensing(empty_directory)
assert result is None
assert not result


def test_find_global_licensing_conflict(fake_repository_dep5):
Expand Down

0 comments on commit a8be9db

Please sign in to comment.