Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support metric custom column ordering and sub-setting #178

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 60 additions & 10 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,33 @@
>>> Person(name=Name(first='john', last='doe'), age=42, address=None).formatted_values()
["first last", "42"]
```

Re-ordering and sub-setting the columns when writing is supported by overriding the
`_fields_to_write()` method. In the example below, the `weight` field is not written, but is
optional to support reading the metric back in. Also, the `name` and `age` columns are written in
reverse order.

```python
>>> from fgpyo.util.metric import Metric
>>> import attr
>>> @attr.s(auto_attribs=True, frozen=True)
... class Person(Metric["Person"]):
... name: str
... age: int
... weight: Optional[int]
... @classmethod
... def _fields_to_write(cls, field_types: List[FieldType] = None) -> List[str]:
... return ["age", "name"]
>>> person = Person(name="john", age=42, weight=180)
>>> person.header()
["age", "name"]
>>> list(person.values())
[42, "john"]
>>> Person.write(Path("/path/to/metrics.txt"), person)
>>> list(Person.read(Path("/path/to/metrics.txt")))
[Person(name="john", age=42, weight=None)]
```

"""

from abc import ABC
Expand All @@ -122,10 +149,13 @@
from typing import Generic
from typing import Iterator
from typing import List
from typing import Optional
from typing import TypeVar
from typing import final

from fgpyo import io
from fgpyo.util import inspect
from fgpyo.util.inspect import FieldType

MetricType = TypeVar("MetricType", bound="Metric")

Expand All @@ -141,19 +171,21 @@
[`format_value()`][fgpyo.util.metric.Metric.format_value].
"""

def values(self) -> Iterator[Any]:
def values(self, _header: Optional[List[str]] = None) -> Iterator[Any]:
"""An iterator over attribute values in the same order as the header."""
for field in inspect.get_fields(self.__class__): # type: ignore[arg-type]
yield getattr(self, field.name)
if _header is None:
_header = self.header()
for name in _header:
yield getattr(self, name)

def formatted_values(self) -> List[str]:
def formatted_values(self, _header: Optional[List[str]] = None) -> List[str]:
"""An iterator over formatted attribute values in the same order as the header."""
return [self.format_value(value) for value in self.values()]
return [self.format_value(value) for value in self.values(_header=_header)]

@classmethod
def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
"""Mapping of type to a specific parser for that type. The parser must accept a string
as a single parameter and return a single value of the given type. Sub-classes may
as a single parameter and return a single value of the given type. Subclasses may
override this method to support custom types."""
return {}

Expand Down Expand Up @@ -245,21 +277,39 @@
Args:
path: Path to the output file.
values: Zero or more metrics.

"""
header = cls.header()
with io.to_writer(path) as writer:
writer.write("\t".join(cls.header()))
writer.write("\n")
for value in values:
# Important, don't recurse on nested attr classes, instead let implementing class
# implement format_value.
writer.write("\t".join(cls.format_value(item) for item in value.values()))
writer.write(
"\t".join(cls.format_value(item) for item in value.values(_header=header))
)
writer.write("\n")

@classmethod
@final
def header(cls) -> List[str]:
"""The list of header values for the metric."""
return [a.name for a in inspect.get_fields(cls)] # type: ignore[arg-type]
field_types = list(inspect.get_fields(cls)) # type: ignore[arg-type]
field_names = {field.name for field in field_types}
header = cls._fields_to_write(field_types=field_types)
extra_fields = [h for h in header if h not in field_names]
if len(extra_fields) > 0:
raise ValueError("header() returned extra fields: " + ", ".join(extra_fields))

Check warning on line 302 in fgpyo/util/metric.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/metric.py#L302

Added line #L302 was not covered by tests
return header

@classmethod
def _fields_to_write(cls, field_types: List[FieldType]) -> List[str]:
"""Returns a list of field names for the header and values.

This method may be overridden to re-order or subset the columns written to file with
`write()` or returned by `values()`.
"""
return [a.name for a in field_types]

@classmethod
def format_value(cls, value: Any) -> str: # noqa: C901
Expand Down Expand Up @@ -315,7 +365,7 @@

@classmethod
def to_list(cls, value: str) -> List[Any]:
"""Returns a list value split on comma delimeter."""
"""Returns a list value split on comma delimiter."""
return [] if value == "" else value.split(",")

@staticmethod
Expand Down
73 changes: 73 additions & 0 deletions tests/fgpyo/util/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import attr
import pytest

from fgpyo.util.inspect import FieldType
from fgpyo.util.inspect import is_attr_class
from fgpyo.util.inspect import is_dataclasses_class
from fgpyo.util.metric import Metric
Expand Down Expand Up @@ -121,6 +122,24 @@ class NameMetric(Metric["NameMetric"]):
first: str
last: str

@make_dataclass(use_attr=use_attr)
class LastFirstMetric(Metric["LastFirstMetric"]):
first: str
last: str

@classmethod
def _fields_to_write(cls, field_types: Optional[List[FieldType]] = None) -> List[str]:
return ["last", "first"]

@make_dataclass(use_attr=use_attr)
class SubsetMetric(Metric["SubsetMetric"]):
written: str
hidden: Optional[str]

@classmethod
def _fields_to_write(cls, field_types: Optional[List[FieldType]] = None) -> List[str]:
return ["written"]

@make_dataclass(use_attr=use_attr)
class NamedPerson(Metric["NamedPerson"]):
name: Name
Expand Down Expand Up @@ -156,6 +175,8 @@ class ListPerson(Metric["ListPerson"]):
self.Person = Person
self.Name = Name
self.NameMetric = NameMetric
self.LastFirstMetric = LastFirstMetric
self.SubsetMetric = SubsetMetric
self.NamedPerson = NamedPerson
self.PersonMaybeAge = PersonMaybeAge
self.PersonDefault = PersonDefault
Expand Down Expand Up @@ -234,6 +255,10 @@ def test_is_correct_dataclass_type(use_attr: bool) -> None:
assert is_dataclasses_class(data_and_classes.Name) is not use_attr
assert is_attr_class(data_and_classes.NameMetric) is use_attr
assert is_dataclasses_class(data_and_classes.NameMetric) is not use_attr
assert is_attr_class(data_and_classes.LastFirstMetric) is use_attr
assert is_dataclasses_class(data_and_classes.LastFirstMetric) is not use_attr
assert is_attr_class(data_and_classes.SubsetMetric) is use_attr
assert is_dataclasses_class(data_and_classes.SubsetMetric) is not use_attr
assert is_attr_class(data_and_classes.NamedPerson) is use_attr
assert is_dataclasses_class(data_and_classes.NamedPerson) is not use_attr
assert is_attr_class(data_and_classes.PersonMaybeAge) is use_attr
Expand Down Expand Up @@ -368,6 +393,54 @@ def test_metric_read_missing_column_with_default(
list(PersonDefault.read(path=path))


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
ameynert marked this conversation as resolved.
Show resolved Hide resolved
def test_metric_read_different_column_order(tmp_path: Path, data_and_classes: DataBuilder) -> None:
NameMetric: TypeAlias = data_and_classes.NameMetric
name = NameMetric(first="Jane", last="Doe")
path = tmp_path / "metrics.txt"

with path.open("w") as writer:
writer.write("last\tfirst\n")
writer.write("Doe\tJane\n")
assert list(NameMetric.read(path=path)) == [name]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_write_different_column_order(tmp_path: Path, data_and_classes: DataBuilder) -> None:
LastFirstMetric: TypeAlias = data_and_classes.LastFirstMetric
name = LastFirstMetric(first="Jane", last="Doe")

assert LastFirstMetric.header() == ["last", "first"]
assert list(name.values()) == ["Doe", "Jane"]

path = tmp_path / "metrics.txt"
LastFirstMetric.write(path, name)
with path.open("r") as read:
header = read.readline().strip().split("\t")
fields = read.readline().strip().split("\t")

assert header == ["last", "first"]
assert fields == [name.last, name.first]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_write_subset(tmp_path: Path, data_and_classes: DataBuilder) -> None:
SubsetMetric: TypeAlias = data_and_classes.SubsetMetric
metric = SubsetMetric(written="present", hidden="absent")

assert SubsetMetric.header() == ["written"]
assert list(metric.values()) == ["present"]

path = tmp_path / "metrics.txt"
SubsetMetric.write(path, metric)
with path.open("r") as read:
header = read.readline().strip().split("\t")
fields = read.readline().strip().split("\t")

assert header == ["written"]
assert fields == [metric.written]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_header(data_and_classes: DataBuilder) -> None:
assert data_and_classes.DummyMetric.header() == [
Expand Down
Loading