Skip to content

Commit

Permalink
refactor: better error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
msto committed Mar 15, 2024
1 parent 2ac66c4 commit 6b094cc
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 5 deletions.
19 changes: 15 additions & 4 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
from typing import Generic
from typing import Iterator
from typing import List
from typing import Optional
from typing import TypeVar

import attr
Expand Down Expand Up @@ -169,10 +170,15 @@ def read(
ignore_extra_fields: True to ignore any extra columns, False to raise an exception.
header_comment_char: Any lines beginning with this character will be ignored before
parsing the header.
Raises:
ValueError: if the file does not contain a header.
"""
parsers = cls._parsers()
with io.to_reader(path) as reader:
header: List[str] = Metric.read_header(reader, comment_char=header_comment_char)
header = Metric._read_header(reader, comment_char=header_comment_char)
if header is None:
raise ValueError(f"No header found in file: {path}")

# check the header
class_fields = set(cls.header())
Expand Down Expand Up @@ -334,27 +340,32 @@ def fast_concat(*inputs: Path, output: Path) -> None:
)

@staticmethod
def read_header(
def _read_header(
reader: io.Reader,
comment_char: str = DEFAULT_HEADER_COMMENT_CHAR,
) -> List[str]:
) -> Optional[List[str]]:
"""
Read the header from an open file.
Comment and empty lines will be ignored.
NB: This function returns `Optional` instead of raising an error because the name of the
source file is not in scope, making it difficult to provide a helpful error message. It is
the responsibility of the caller to raise an error if the file is empty.
Args:
reader: An open, readable file
comment_char: The character which indicates the start of a comment line.
Returns:
A list of field names found in the header line.
None if the file was empty or contained only comments or empty lines.
"""

for line in reader:
if not line.startswith(comment_char) and not line.strip() == "":
break
else:
raise ValueError("No header found")
return None

return line.rstrip("\r\n").split("\t")
31 changes: 30 additions & 1 deletion fgpyo/util/tests/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,35 @@ def test_read_header_can_read_picard(tmp_path: Path) -> None:
metrics_file.write("SAMPLE\tFOO\tBAR\n")

with metrics_path.open("r") as metrics_file:
header = Metric.read_header(metrics_file)
header = Metric._read_header(metrics_file)

assert header == ["SAMPLE", "FOO", "BAR"]


@pytest.mark.parametrize(
"lines",
[
[],
[""],
["# comment"],
["", "# comment"],
],
)
def test_read_validates_no_header(tmp_path: Path, lines: List[str]) -> None:
"""
Test our handling of a file with no header.
1. The helper `Metric._read_header` returns None
2. `Metric.read` raises `ValueError`
"""

metrics_path = tmp_path / "bad_metrics"

with metrics_path.open("w") as metrics_file:
metrics_file.writelines(lines)

with metrics_path.open("r") as metrics_file:
assert Metric._read_header(metrics_file) is None

with pytest.raises(ValueError, match="No header found"):
[m for m in DummyMetric.read(metrics_path)]

0 comments on commit 6b094cc

Please sign in to comment.