Skip to content

Commit

Permalink
Make lint-file work
Browse files Browse the repository at this point in the history
Signed-off-by: Carmen Bianca BAKKER <[email protected]>
  • Loading branch information
carmenbianca committed Sep 6, 2024
1 parent f7f6586 commit 46e42a1
Show file tree
Hide file tree
Showing 10 changed files with 483 additions and 176 deletions.
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ jobs=0

disable=duplicate-code,
logging-fstring-interpolation,
implicit-str-concat
implicit-str-concat,
inconsistent-quotes
enable=useless-suppression

[REPORTS]
Expand Down
38 changes: 17 additions & 21 deletions src/reuse/lint_file.py → src/reuse/_lint_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
import sys
from argparse import ArgumentParser, Namespace
from gettext import gettext as _
from pathlib import Path
from typing import IO

from .lint import format_json, format_lines, format_plain
from ._util import PathType
from .lint import format_lines_subset
from .project import Project
from .report import ProjectReport
from .report import ProjectSubsetReport


def add_arguments(parser: ArgumentParser) -> None:
Expand All @@ -22,40 +24,34 @@ def add_arguments(parser: ArgumentParser) -> None:
mutex_group.add_argument(
"-q", "--quiet", action="store_true", help=_("prevents output")
)
mutex_group.add_argument(
"-j", "--json", action="store_true", help=_("formats output as JSON")
)
mutex_group.add_argument(
"-p",
"--plain",
action="store_true",
help=_("formats output as plain text"),
)
mutex_group.add_argument(
"-l",
"--lines",
action="store_true",
help=_("formats output as errors per line"),
help=_("formats output as errors per line (default)"),
)
parser.add_argument("files", nargs="*")
parser.add_argument("files", action="store", nargs="*", type=PathType("r"))


def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int:
"""List all non-compliant files from specified file list."""
report = ProjectReport.generate(
subset_files = {Path(file_) for file_ in args.files}
for file_ in subset_files:
if not file_.resolve().is_relative_to(project.root.resolve()):
args.parser.error(
_("'{file}' is not inside of '{root}'").format(
file=file_, root=project.root
)
)
report = ProjectSubsetReport.generate(
project,
do_checksum=False,
file_list=args.files,
subset_files,
multiprocessing=not args.no_multiprocessing,
)

if args.quiet:
pass
elif args.json:
out.write(format_json(report))
elif args.lines:
out.write(format_lines(report))
else:
out.write(format_plain(report))
out.write(format_lines_subset(report))

return 0 if report.is_compliant else 1
6 changes: 3 additions & 3 deletions src/reuse/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
__REUSE_version__,
__version__,
_annotate,
_lint_file,
convert_dep5,
download,
lint,
lint_file,
spdx,
supported_licenses,
)
Expand Down Expand Up @@ -178,8 +178,8 @@ def parser() -> argparse.ArgumentParser:
add_command(
subparsers,
"lint-file",
lint_file.add_arguments,
lint_file.run,
_lint_file.add_arguments,
_lint_file.run,
help=_("list non-compliant files from specified list of files"),
)

Expand Down
67 changes: 40 additions & 27 deletions src/reuse/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from . import __REUSE_version__
from .project import Project
from .report import ProjectReport
from .report import ProjectReport, ProjectReportSubsetProtocol


def add_arguments(parser: ArgumentParser) -> None:
Expand All @@ -36,7 +36,7 @@ def add_arguments(parser: ArgumentParser) -> None:
"-p",
"--plain",
action="store_true",
help=_("formats output as plain text"),
help=_("formats output as plain text (default)"),
)
mutex_group.add_argument(
"-l",
Expand Down Expand Up @@ -264,13 +264,43 @@ def custom_serializer(obj: Any) -> Any:
)


def format_lines_subset(report: ProjectReportSubsetProtocol) -> str:
"""Formats a subset of a report, namely missing licenses, read errors, files
without licenses, and files without copyright.
Args:
report: A populated report.
"""
output = StringIO()

# Missing licenses
for lic, files in sorted(report.missing_licenses.items()):
for path in sorted(files):
output.write(
_("{path}: missing license {lic}\n").format(path=path, lic=lic)
)

# Read errors
for path in sorted(report.read_errors):
output.write(_("{path}: read error\n").format(path=path))

# Without licenses
for path in report.files_without_licenses:
output.write(_("{path}: no license identifier\n").format(path=path))

# Without copyright
for path in report.files_without_copyright:
output.write(_("{path}: no copyright notice\n").format(path=path))

return output.getvalue()


def format_lines(report: ProjectReport) -> str:
"""Formats data dictionary as plaintext strings to be printed to sys.stdout
Sorting of output is not guaranteed.
Symbolic links can result in multiple entries per file.
"""Formats report as plaintext strings to be printed to sys.stdout. Sorting
of output is not guaranteed.
Args:
report: ProjectReport data
report: A populated report.
Returns:
String (in plaintext) that can be output to sys.stdout
Expand All @@ -281,6 +311,7 @@ def license_path(lic: str) -> Optional[Path]:
"""Resolve a license identifier to a license path."""
return report.licenses.get(lic)

subset_output = ""
if not report.is_compliant:
# Bad licenses
for lic, files in sorted(report.bad_licenses.items()):
Expand Down Expand Up @@ -312,28 +343,10 @@ def license_path(lic: str) -> Optional[Path]:
_("{lic_path}: unused license\n").format(lic_path=lic_path)
)

# Missing licenses
for lic, files in sorted(report.missing_licenses.items()):
for path in sorted(files):
output.write(
_("{path}: missing license {lic}\n").format(
path=path, lic=lic
)
)

# Read errors
for path in sorted(report.read_errors):
output.write(_("{path}: read error\n").format(path=path))

# Without licenses
for path in report.files_without_licenses:
output.write(_("{path}: no license identifier\n").format(path=path))

# Without copyright
for path in report.files_without_copyright:
output.write(_("{path}: no copyright notice\n").format(path=path))
# Everything else.
subset_output = format_lines_subset(report)

return output.getvalue()
return output.getvalue() + subset_output


def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int:
Expand Down
111 changes: 67 additions & 44 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,18 @@
from collections import defaultdict
from gettext import gettext as _
from pathlib import Path
from typing import DefaultDict, Dict, Iterator, List, NamedTuple, Optional, Type
from typing import (
Collection,
DefaultDict,
Dict,
Iterator,
List,
NamedTuple,
Optional,
Set,
Type,
cast,
)

from binaryornot.check import is_binary

Expand Down Expand Up @@ -158,53 +169,19 @@ def from_directory(

return project

def specific_files(
self, files: Optional[List], directory: Optional[StrPath] = None
def _iter_files(
self,
directory: Optional[StrPath] = None,
subset_files: Optional[Collection[StrPath]] = None,
) -> Iterator[Path]:
"""Yield all files in the specified file list within a directory.
The files that are not yielded are:
- Files ignored by VCS (e.g., see .gitignore)
- Files matching IGNORE_*_PATTERNS.
"""
if directory is None:
directory = self.root
directory = Path(directory)

if files is not None:
# Filter files.
for file_ in files:
the_file = directory / file_
if self._is_path_ignored(the_file):
_LOGGER.debug("ignoring '%s'", the_file)
continue
if the_file.is_symlink():
_LOGGER.debug("skipping symlink '%s'", the_file)
continue
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
if the_file.stat().st_size == 0:
_LOGGER.debug("skipping 0-sized file '%s'", the_file)
continue

_LOGGER.debug("yielding '%s'", the_file)
yield the_file

def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
"""Yield all files in *directory* and its subdirectories.
The files that are not yielded are:
- Files ignored by VCS (e.g., see .gitignore)
- Files/directories matching IGNORE_*_PATTERNS.
"""
# pylint: disable=too-many-branches
if directory is None:
directory = self.root
directory = Path(directory)
if subset_files is not None:
subset_files = cast(
Set[Path], {Path(file_).resolve() for file_ in subset_files}
)

for root_str, dirs, files in os.walk(directory):
root = Path(root_str)
Expand All @@ -213,6 +190,11 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
# Don't walk ignored directories
for dir_ in list(dirs):
the_dir = root / dir_
if subset_files is not None and not any(
file_.is_relative_to(the_dir.resolve())
for file_ in subset_files
):
continue
if self._is_path_ignored(the_dir):
_LOGGER.debug("ignoring '%s'", the_dir)
dirs.remove(dir_)
Expand All @@ -231,6 +213,11 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
# Filter files.
for file_ in files:
the_file = root / file_
if (
subset_files is not None
and the_file.resolve() not in subset_files
):
continue
if self._is_path_ignored(the_file):
_LOGGER.debug("ignoring '%s'", the_file)
continue
Expand All @@ -247,6 +234,42 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
_LOGGER.debug("yielding '%s'", the_file)
yield the_file

def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
"""Yield all files in *directory* and its subdirectories.
The files that are not yielded are those explicitly ignored by the REUSE
Specification. That means:
- LICENSE/COPYING files.
- VCS directories.
- .license files.
- .spdx files.
- Files ignored by VCS.
- Symlinks.
- Submodules (depending on the value of :attr:`include_submodules`).
- Meson subprojects (depending on the value of
:attr:`include_meson_subprojects`).
- 0-sized files.
Args:
directory: The directory in which to search.
"""
return self._iter_files(directory=directory)

def subset_files(
self, files: Collection[StrPath], directory: Optional[StrPath] = None
) -> Iterator[Path]:
"""Like :meth:`all_files`, but all files that are not in *files* are
filtered out.
Args:
files: A collection of paths relative to the current working
directory. Any files that are not in this collection are not
yielded.
directory: The directory in which to search.
"""
return self._iter_files(directory=directory, subset_files=files)

def reuse_info_of(self, path: StrPath) -> List[ReuseInfo]:
"""Return REUSE info of *path*.
Expand Down
Loading

0 comments on commit 46e42a1

Please sign in to comment.