Skip to content

Commit

Permalink
feat: support metric custom column ordering and sub-setting
Browse files Browse the repository at this point in the history
Fixes #177
  • Loading branch information
nh13 committed Aug 14, 2024
1 parent d574e5a commit 92aee19
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 12 deletions.
2 changes: 1 addition & 1 deletion fgpyo/util/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def get_parser() -> partial: # noqa: C901
nonlocal type_
nonlocal parsers
try:
return functools.partial(parsers[type_])
return functools.partial(parsers[type_]) # type: ignore[misc]
except KeyError as ex:
if (
type_ in [str, int, float]
Expand Down
77 changes: 66 additions & 11 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,32 @@
>>> Person(name=Name(first='john', last='doe'), age=42, address=None).formatted_values()
["first last", "42"]
```
Re-ordering and sub-setting the columns when writing is supported by overriding the `_header()`
method. In the example below, the `weight` field is not written, but is optional to support reading
the metric back in. Also, the `name` and `age` columns are written in reverse order.
```python
>>> from fgpyo.util.metric import Metric
>>> import attr
>>> @attr.s(auto_attribs=True, frozen=True)
... class Person(Metric["Person"]):
... name: str
... age: int
... weight: Optional[int]
... @classmethod
... def _header(cls, field_types: Optional[List[FieldType]] = None) -> List[str]:
... return ["age", "name"]
>>> person = Person(name="john", age=42, weight=180)
>>> person.header()
["age", "name"]
>>> list(person.values())
[42, "john"]
>>> Person.write(Path("/path/to/metrics.txt"), person)
>>> list(Person.read(Path("/path/to/metrics.txt")))
[Person(name="john", age=42, weight=None)]
```
"""

from abc import ABC
Expand All @@ -122,10 +148,13 @@
from typing import Generic
from typing import Iterator
from typing import List
from typing import Optional
from typing import TypeVar
from typing import final

from fgpyo import io
from fgpyo.util import inspect
from fgpyo.util.inspect import FieldType

MetricType = TypeVar("MetricType", bound="Metric")

Expand All @@ -141,19 +170,21 @@ class Metric(ABC, Generic[MetricType]):
[`format_value()`][fgpyo.util.metric.Metric.format_value].
"""

def values(self) -> Iterator[Any]:
def values(self, _header: Optional[List[str]] = None) -> Iterator[Any]:
"""An iterator over attribute values in the same order as the header."""
for field in inspect.get_fields(self.__class__): # type: ignore[arg-type]
yield getattr(self, field.name)
if _header is None:
_header = self.header()
for name in _header:
yield getattr(self, name)

def formatted_values(self) -> List[str]:
def formatted_values(self, _header: Optional[List[str]] = None) -> List[str]:
"""An iterator over formatted attribute values in the same order as the header."""
return [self.format_value(value) for value in self.values()]
return [self.format_value(value) for value in self.values(_header=_header)]

@classmethod
def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
"""Mapping of type to a specific parser for that type. The parser must accept a string
as a single parameter and return a single value of the given type. Sub-classes may
as a single parameter and return a single value of the given type. Subclasses may
override this method to support custom types."""
return {}

Expand Down Expand Up @@ -245,21 +276,45 @@ def write(cls, path: Path, *values: MetricType) -> None:
Args:
path: Path to the output file.
values: Zero or more metrics.
"""
header = cls.header()
with io.to_writer(path) as writer:
writer.write("\t".join(cls.header()))
writer.write("\n")
for value in values:
# Important, don't recurse on nested attr classes, instead let implementing class
# implement format_value.
writer.write("\t".join(cls.format_value(item) for item in value.values()))
writer.write(
"\t".join(cls.format_value(item) for item in value.values(_header=header))
)
writer.write("\n")

@classmethod
@final
def header(cls) -> List[str]:
"""The list of header values for the metric."""
return [a.name for a in inspect.get_fields(cls)] # type: ignore[arg-type]
"""The list of header values for the metric.
This method may be overridden to re-order or subset the columns written to file with
`write()` or returned by `values()`.
"""
field_types = list(inspect.get_fields(cls)) # type: ignore[arg-type]
field_names = {field.name for field in field_types}
header = cls._header(field_types=field_types)
extra_fields = [h for h in header if h not in field_names]
if len(extra_fields) > 0:
raise ValueError("header() returned extra fields: " + ", ".join(extra_fields))
return header

@classmethod
def _header(cls, field_types: Optional[List[FieldType]] = None) -> List[str]:
"""Returns a mapping of a field to the index it should be returned in `values()`
based on the field names returned by `header()`.
This is useful for re-ordering or sub-setting the output columns in `write()`.
"""
if field_types is None:
field_types = list(inspect.get_fields(cls)) # type: ignore[arg-type]
return [a.name for a in field_types]

@classmethod
def format_value(cls, value: Any) -> str: # noqa: C901
Expand Down Expand Up @@ -315,7 +370,7 @@ def format_value(cls, value: Any) -> str: # noqa: C901

@classmethod
def to_list(cls, value: str) -> List[Any]:
"""Returns a list value split on comma delimeter."""
"""Returns a list value split on comma delimiter."""
return [] if value == "" else value.split(",")

@staticmethod
Expand Down
73 changes: 73 additions & 0 deletions tests/fgpyo/util/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import attr
import pytest

from fgpyo.util.inspect import FieldType
from fgpyo.util.inspect import is_attr_class
from fgpyo.util.inspect import is_dataclasses_class
from fgpyo.util.metric import Metric
Expand Down Expand Up @@ -121,6 +122,24 @@ class NameMetric(Metric["NameMetric"]):
first: str
last: str

@make_dataclass(use_attr=use_attr)
class LastFirstMetric(Metric["LastFirstMetric"]):
first: str
last: str

@classmethod
def _header(cls, field_types: Optional[List[FieldType]] = None) -> List[str]:
return ["last", "first"]

@make_dataclass(use_attr=use_attr)
class SubsetMetric(Metric["SubsetMetric"]):
written: str
hidden: Optional[str]

@classmethod
def _header(cls, field_types: Optional[List[FieldType]] = None) -> List[str]:
return ["written"]

@make_dataclass(use_attr=use_attr)
class NamedPerson(Metric["NamedPerson"]):
name: Name
Expand Down Expand Up @@ -156,6 +175,8 @@ class ListPerson(Metric["ListPerson"]):
self.Person = Person
self.Name = Name
self.NameMetric = NameMetric
self.LastFirstMetric = LastFirstMetric
self.SubsetMetric = SubsetMetric
self.NamedPerson = NamedPerson
self.PersonMaybeAge = PersonMaybeAge
self.PersonDefault = PersonDefault
Expand Down Expand Up @@ -234,6 +255,10 @@ def test_is_correct_dataclass_type(use_attr: bool) -> None:
assert is_dataclasses_class(data_and_classes.Name) is not use_attr
assert is_attr_class(data_and_classes.NameMetric) is use_attr
assert is_dataclasses_class(data_and_classes.NameMetric) is not use_attr
assert is_attr_class(data_and_classes.LastFirstMetric) is use_attr
assert is_dataclasses_class(data_and_classes.LastFirstMetric) is not use_attr
assert is_attr_class(data_and_classes.SubsetMetric) is use_attr
assert is_dataclasses_class(data_and_classes.SubsetMetric) is not use_attr
assert is_attr_class(data_and_classes.NamedPerson) is use_attr
assert is_dataclasses_class(data_and_classes.NamedPerson) is not use_attr
assert is_attr_class(data_and_classes.PersonMaybeAge) is use_attr
Expand Down Expand Up @@ -368,6 +393,54 @@ def test_metric_read_missing_column_with_default(
list(PersonDefault.read(path=path))


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_read_different_column_order(tmp_path: Path, data_and_classes: DataBuilder) -> None:
NameMetric: TypeAlias = data_and_classes.NameMetric
name = NameMetric(first="Jane", last="Doe")
path = tmp_path / "metrics.txt"

with path.open("w") as writer:
writer.write("last\tfirst\n")
writer.write("Doe\tJane\n")
assert list(NameMetric.read(path=path)) == [name]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_write_different_column_order(tmp_path: Path, data_and_classes: DataBuilder) -> None:
LastFirstMetric: TypeAlias = data_and_classes.LastFirstMetric
name = LastFirstMetric(first="Jane", last="Doe")

assert LastFirstMetric.header() == ["last", "first"]
assert list(name.values()) == ["Doe", "Jane"]

path = tmp_path / "metrics.txt"
LastFirstMetric.write(path, name)
with path.open("r") as read:
header = read.readline().strip().split("\t")
fields = read.readline().strip().split("\t")

assert header == ["last", "first"]
assert fields == [name.last, name.first]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_write_subset(tmp_path: Path, data_and_classes: DataBuilder) -> None:
SubsetMetric: TypeAlias = data_and_classes.SubsetMetric
metric = SubsetMetric(written="present", hidden="absent")

assert SubsetMetric.header() == ["written"]
assert list(metric.values()) == ["present"]

path = tmp_path / "metrics.txt"
SubsetMetric.write(path, metric)
with path.open("r") as read:
header = read.readline().strip().split("\t")
fields = read.readline().strip().split("\t")

assert header == ["written"]
assert fields == [metric.written]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_header(data_and_classes: DataBuilder) -> None:
assert data_and_classes.DummyMetric.header() == [
Expand Down

0 comments on commit 92aee19

Please sign in to comment.