Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for GitHub annotations in GitHub actions #1052

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,4 @@ Contributors
- rajivsunar07 <[email protected]>
- Сергій <[email protected]>
- Mersho <[email protected]>
- Michal Čihař <[email protected]>
2 changes: 2 additions & 0 deletions changelog.d/added/format-env.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Added `REUSE_OUTPUT_FORMAT` environment variable to configure output for
`lint`.
1 change: 1 addition & 0 deletions changelog.d/added/github-format.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- Added `--github` output option for `lint`.
13 changes: 13 additions & 0 deletions docs/man/reuse-lint.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,19 @@ Options

Output one line per error, prefixed by the file path.

.. option:: -g, --github

Output one line per error in GitHub workflow command syntax.

.. option:: -h, --help

Display help and exit.

Environment
-----------

.. envvar:: REUSE_OUTPUT_FORMAT

Specifies output format, one of ``plain``, ``lines``, ``github``, ``json``

It behaves same as corresponding command line options.
Comment on lines +107 to +114
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be a documented order of precedence here. If both REUSE_OUTPUT_FORMAT and a CLI flag are used, which takes precedence?

As written in the code, I believe the env var takes precedence. I do not know if this is desirable behaviour.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, should quiet be a valid value here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, what should happen if the value of REUSE_OUTPUT_FORMAT is invalid?

161 changes: 116 additions & 45 deletions src/reuse/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,40 @@
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2024 Nico Rikken <[email protected]>
# SPDX-FileCopyrightText: 2024 Michal Čihař <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""All linting happens here. The linting here is nothing more than reading
the reports and printing some conclusions.
"""

from __future__ import annotations

import json
import logging
import os
import sys
from argparse import ArgumentParser, Namespace
from gettext import gettext as _
from io import StringIO
from pathlib import Path
from textwrap import TextWrapper
from typing import IO, Any, Optional
from typing import IO, Any, Generator, NamedTuple

from . import __REUSE_version__
from .project import Project
from .report import ProjectReport

_LOGGER = logging.getLogger(__name__)


class PathError(NamedTuple):
"""A simple container with a path and an error message."""

path: Path
error: str


def add_arguments(parser: ArgumentParser) -> None:
"""Add arguments to parser."""
Expand All @@ -44,14 +58,20 @@ def add_arguments(parser: ArgumentParser) -> None:
action="store_true",
help=_("formats output as errors per line"),
)
mutex_group.add_argument(
"-g",
"--github",
action="store_true",
help=_("formats output as GitHub workflow commands per line"),
)


# pylint: disable=too-many-branches,too-many-statements,too-many-locals
def format_plain(report: ProjectReport) -> str:
"""Formats data dictionary as plaintext string to be printed to sys.stdout
"""Formats report as plaintext string to be printed to sys.stdout.

Args:
report: ProjectReport data
report: :class:`ProjectReport` data

Returns:
String (in plaintext) that can be output to sys.stdout
Expand Down Expand Up @@ -233,13 +253,13 @@ def format_plain(report: ProjectReport) -> str:


def format_json(report: ProjectReport) -> str:
"""Formats data dictionary as JSON string ready to be printed to sys.stdout
"""Formats report as JSON string ready to be printed to sys.stdout.

Args:
report: Dictionary containing formatted ProjectReport data
report: :class:`ProjectReport` data

Returns:
String (representing JSON) that can be output to sys.stdout
String (representing JSON) that can be output to sys.stdout.
"""

def custom_serializer(obj: Any) -> Any:
Expand All @@ -264,76 +284,114 @@ def custom_serializer(obj: Any) -> Any:
)


def format_lines(report: ProjectReport) -> str:
"""Formats data dictionary as plaintext strings to be printed to sys.stdout
Sorting of output is not guaranteed.
Symbolic links can result in multiple entries per file.
def get_errors(
report: ProjectReport,
) -> Generator[PathError, None, None]:
"""Returns a generator of paths and errors from a report. Sorting of output
is not guaranteed. Symbolic links can result in multiple entries per file.

Args:
report: ProjectReport data
report: :class:`ProjectReport` data

Returns:
String (in plaintext) that can be output to sys.stdout
Generator of :class:`PathError`s.
"""
output = StringIO()

def license_path(lic: str) -> Optional[Path]:
def license_path(lic: str) -> Path:
"""Resolve a license identifier to a license path."""
return report.licenses.get(lic)
result = report.licenses.get(lic)
# TODO: This should never happen. It basically only happens if the
# report or the project is malformed. There should be a better way to do
# this.
if result is None:
_LOGGER.error(
_(
"license {lic} has no known path; this should not happen"
).format(lic=lic)
)
result = Path(report.path)
return result

if not report.is_compliant:
# Bad licenses
for lic, files in sorted(report.bad_licenses.items()):
for path in sorted(files):
output.write(
_("{path}: bad license {lic}\n").format(path=path, lic=lic)
)
yield PathError(path, _("bad license {lic}").format(lic=lic))

# Deprecated licenses
for lic in sorted(report.deprecated_licenses):
lic_path = license_path(lic)
output.write(
_("{lic_path}: deprecated license\n").format(lic_path=lic_path)
)
yield PathError(lic_path, _("deprecated license"))

# Licenses without extension
for lic in sorted(report.licenses_without_extension):
lic_path = license_path(lic)
output.write(
_("{lic_path}: license without file extension\n").format(
lic_path=lic_path
)
)
yield PathError(lic_path, _("license without file extension"))

# Unused licenses
for lic in sorted(report.unused_licenses):
lic_path = license_path(lic)
output.write(
_("{lic_path}: unused license\n").format(lic_path=lic_path)
)
yield PathError(lic_path, _("unused license"))

# Missing licenses
for lic, files in sorted(report.missing_licenses.items()):
for path in sorted(files):
output.write(
_("{path}: missing license {lic}\n").format(
path=path, lic=lic
)
yield PathError(
path, _("missing license {lic}").format(lic=lic)
)

# Read errors
for path in sorted(report.read_errors):
output.write(_("{path}: read error\n").format(path=path))
yield PathError(path, _("read error"))

# Without licenses
for path in report.files_without_licenses:
output.write(_("{path}: no license identifier\n").format(path=path))
yield PathError(path, _("no license identifier"))

# Without copyright
for path in report.files_without_copyright:
output.write(_("{path}: no copyright notice\n").format(path=path))
yield PathError(path, _("no copyright notice"))

return output.getvalue()

def format_lines(report: ProjectReport) -> str:
"""Formats report as plaintext strings to be printed to sys.stdout. Sorting
of output is not guaranteed. Symbolic links can result in multiple entries
per file.

Args:
report: :class:`ProjectReport` data

Returns:
String (in plaintext) that can be output to sys.stdout.
"""
if not report.is_compliant:
return "".join(
f"{path}: {error}\n" for path, error in get_errors(report)
)

return ""


def format_github(report: ProjectReport) -> str:
"""Formats report as GitHub workflow commands to be printed to sys.stdout.
The format is documented at
<https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions>.
Sorting of output is not guaranteed. Symbolic links can result in multiple
entries per file.

Args:
report: :class:`ProjectReport` data

Returns:
String (in plaintext) that can be output to sys.stdout.
"""
if not report.is_compliant:
return "".join(
f"::error file={path}::{error}\n"
for path, error in get_errors(report)
)

return ""


def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int:
Expand All @@ -342,13 +400,26 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int:
project, do_checksum=False, multiprocessing=not args.no_multiprocessing
)

if args.quiet:
pass
elif args.json:
out.write(format_json(report))
elif args.lines:
out.write(format_lines(report))
else:
out.write(format_plain(report))
formatters = {
"json": format_json,
"lines": format_lines,
"github": format_github,
"plain": format_plain,
}

if not args.quiet:
output_format = os.environ.get("REUSE_OUTPUT_FORMAT")

if output_format is not None and output_format in formatters:
formatter = formatters[output_format]
out.write(formatter(report))
elif args.json:
out.write(format_json(report))
elif args.lines:
out.write(format_lines(report))
elif args.github:
out.write(format_github(report))
else:
out.write(format_plain(report))

return 0 if report.is_compliant else 1
44 changes: 43 additions & 1 deletion tests/test_lint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2024 Nico Rikken <[email protected]>
# SPDX-FileCopyrightText: 2024 Michal Čihař <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand All @@ -11,7 +12,7 @@

from conftest import cpython, posix

from reuse.lint import format_lines, format_plain
from reuse.lint import format_github, format_lines, format_plain
from reuse.project import Project
from reuse.report import ProjectReport

Expand Down Expand Up @@ -271,4 +272,45 @@ def test_lint_lines_read_errors(fake_repository):
assert "read error" in result


def test_lint_github_output(fake_repository):
"""Complete test for lint with github output."""
# Prepare a repository that includes all types of situations:
# missing_licenses, unused_licenses, bad_licenses, deprecated_licenses,
# licenses_without_extension, files_without_copyright,
# files_without_licenses, read_errors
(fake_repository / "invalid-license.py").write_text(
"SPDX-License-Identifier: invalid"
)
(fake_repository / "no-license.py").write_text(
"SPDX-FileCopyrightText: Jane Doe"
)
(fake_repository / "LICENSES" / "invalid-license-text").write_text(
"An invalid license text"
)
(fake_repository / "LICENSES" / "Nokia-Qt-exception-1.1.txt").write_text(
"Deprecated"
)
(fake_repository / "LICENSES" / "MIT").write_text("foo")
(fake_repository / "file with spaces.py").write_text("foo")

project = Project.from_directory(fake_repository)
report = ProjectReport.generate(project)

lines_result = format_github(report)
lines_result_lines = lines_result.splitlines()

assert len(lines_result_lines) == 12

for line in lines_result_lines:
assert re.match("::error file=.+::[^:]+", line)

assert lines_result.count("invalid-license.py") == 3
assert lines_result.count("no-license.py") == 1
assert lines_result.count("LICENSES") == 6
assert lines_result.count("invalid-license-text") == 3
assert lines_result.count("Nokia-Qt-exception-1.1.txt") == 2
assert lines_result.count("MIT") == 2
assert lines_result.count("file with spaces.py") == 2


# REUSE-IgnoreEnd
Loading