Skip to content

Commit

Permalink
feat: offer GZIP support for Metric IO
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Aug 21, 2023
1 parent f9c3774 commit 031486c
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 21 deletions.
5 changes: 2 additions & 3 deletions fgpyo/fasta/tests/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from tempfile import NamedTemporaryFile as NamedTemp

import pytest
from py._path.local import LocalPath as TmpDir
from pytest import raises

from fgpyo.fasta.builder import FastaBuilder
Expand Down Expand Up @@ -64,14 +63,14 @@ def test_bases_string_from_ContigBuilder_add(
bases: str,
times: int,
expected: str,
tmpdir: TmpDir,
tmp_path: Path,
) -> None:
"""
Reads bases back from fasta and checks that extra spaces are removed and bases are uppercase
"""
builder = FastaBuilder()
builder.add(name).add(bases, times)
with NamedTemp(suffix=".fa", dir=tmpdir, mode="w", delete=True) as fp:
with NamedTemp(suffix=".fa", dir=tmp_path, mode="w", delete=True) as fp:
builder.to_file(Path(fp.name))
with open(fp.name, "r") as read_fp:
for line in read_fp.readlines():
Expand Down
9 changes: 4 additions & 5 deletions fgpyo/sam/tests/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import pysam
import pytest
from py._path.local import LocalPath as TmpDir
from pysam import AlignmentHeader

import fgpyo.sam as sam
Expand Down Expand Up @@ -141,10 +140,10 @@ def test_sam_file_open_writing(
file_type: SamFileType,
expected_records: List[pysam.AlignedSegment],
header_dict: AlignmentHeader,
tmpdir: TmpDir,
tmp_path: Path,
) -> None:
# use header as a keyword argument
with NamedTemp(suffix=file_type.extension, dir=tmpdir, mode="w", delete=False) as fp:
with NamedTemp(suffix=file_type.extension, dir=tmp_path, mode="w", delete=False) as fp:
kwargs = {"header": header_dict}
with sam._pysam_open(
path=fp.file, open_for_reading=False, file_type=file_type, **kwargs # type: ignore
Expand All @@ -155,11 +154,11 @@ def test_sam_file_open_writing(


def test_sam_file_open_writing_header_keyword(
expected_records: List[pysam.AlignedSegment], header_dict: AlignmentHeader, tmpdir: TmpDir
expected_records: List[pysam.AlignedSegment], header_dict: AlignmentHeader, tmp_path: Path
) -> None:
# Use SamWriter
# use header as a keyword argument
with NamedTemp(suffix=".sam", dir=tmpdir, mode="w", delete=False) as fp:
with NamedTemp(suffix=".sam", dir=tmp_path, mode="w", delete=False) as fp:
with sam.writer(path=fp.name, header=header_dict, file_type=SamFileType.SAM) as sam_writer:
for r in expected_records:
sam_writer.write(r)
Expand Down
5 changes: 3 additions & 2 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@

import attr

from fgpyo import io
from fgpyo.util import inspect

MetricType = TypeVar("MetricType")
Expand Down Expand Up @@ -161,7 +162,7 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]:
ignore_extra_fields: True to ignore any extra columns, False to raise an exception.
"""
parsers = cls._parsers()
with path.open("r") as reader:
with io.to_reader(path) as reader:
header: List[str] = reader.readline().rstrip("\r\n").split("\t")
# check the header
class_fields = set(cls.header())
Expand Down Expand Up @@ -234,7 +235,7 @@ def write(cls, path: Path, *values: MetricType) -> None:
path: path to the output file
values: zero or more metrics.
"""
with path.open("w") as writer:
with io.to_writer(path) as writer:
writer.write("\t".join(cls.header()))
writer.write("\n")
for value in values:
Expand Down
36 changes: 25 additions & 11 deletions fgpyo/util/tests/test_metric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import enum
import gzip
from pathlib import Path
from typing import Any
from typing import Callable
Expand All @@ -10,7 +11,6 @@

import attr
import pytest
from py._path.local import LocalPath as TmpDir

from fgpyo.util.metric import Metric

Expand Down Expand Up @@ -138,8 +138,8 @@ class PersonDefault(Metric["PersonDefault"]):


@pytest.mark.parametrize("metric", DUMMY_METRICS)
def test_metric_roundtrip(tmpdir: TmpDir, metric: DummyMetric) -> None:
path: Path = Path(tmpdir) / "metrics.txt"
def test_metric_roundtrip(tmp_path: Path, metric: DummyMetric) -> None:
path: Path = Path(tmp_path) / "metrics.txt"

DummyMetric.write(path, metric)
metrics: List[DummyMetric] = list(DummyMetric.read(path=path))
Expand All @@ -148,8 +148,8 @@ def test_metric_roundtrip(tmpdir: TmpDir, metric: DummyMetric) -> None:
assert metrics[0] == metric


def test_metrics_roundtrip(tmpdir: TmpDir) -> None:
path: Path = Path(tmpdir) / "metrics.txt"
def test_metrics_roundtrip(tmp_path: Path) -> None:
path: Path = Path(tmp_path) / "metrics.txt"

DummyMetric.write(path, *DUMMY_METRICS)
metrics: List[DummyMetric] = list(DummyMetric.read(path=path))
Expand All @@ -158,9 +158,23 @@ def test_metrics_roundtrip(tmpdir: TmpDir) -> None:
assert metrics == DUMMY_METRICS


def test_metrics_read_extra_columns(tmpdir: TmpDir) -> None:
def test_metric_roundtrip_gzip(tmp_path: Path) -> None:
path: Path = Path(tmp_path) / "metrics.txt.gz"

DummyMetric.write(path, *DUMMY_METRICS)

with gzip.open(path, "r") as handle:
handle.read(1) # Will raise an exception if not a GZIP file.

metrics: List[DummyMetric] = list(DummyMetric.read(path=path))

assert len(metrics) == len(DUMMY_METRICS)
assert metrics == DUMMY_METRICS


def test_metrics_read_extra_columns(tmp_path: Path) -> None:
person = Person(name="Max", age=42)
path = Path(tmpdir) / "metrics.txt"
path = Path(tmp_path) / "metrics.txt"
with path.open("w") as writer:
header = Person.header()
header.append("foo")
Expand All @@ -173,9 +187,9 @@ def test_metrics_read_extra_columns(tmpdir: TmpDir) -> None:
list(Person.read(path=path, ignore_extra_fields=False))


def test_metrics_read_missing_optional_columns(tmpdir: TmpDir) -> None:
def test_metrics_read_missing_optional_columns(tmp_path: Path) -> None:
person = PersonMaybeAge(name="Max", age=None)
path = Path(tmpdir) / "metrics.txt"
path = Path(tmp_path) / "metrics.txt"

# The "age" column is optional, and not in the file, but that's ok
with path.open("w") as writer:
Expand All @@ -189,9 +203,9 @@ def test_metrics_read_missing_optional_columns(tmpdir: TmpDir) -> None:
list(PersonMaybeAge.read(path=path))


def test_metric_read_missing_column_with_default(tmpdir: TmpDir) -> None:
def test_metric_read_missing_column_with_default(tmp_path: Path) -> None:
person = PersonDefault(name="Max")
path = Path(tmpdir) / "metrics.txt"
path = Path(tmp_path) / "metrics.txt"

# The "age" column hs a default, and not in the file, but that's ok
with path.open("w") as writer:
Expand Down

0 comments on commit 031486c

Please sign in to comment.