diff --git a/fgpyo/util/metric.py b/fgpyo/util/metric.py index 33156397..59036dd4 100644 --- a/fgpyo/util/metric.py +++ b/fgpyo/util/metric.py @@ -116,8 +116,11 @@ """ +import dataclasses +import sys from abc import ABC from enum import Enum +from inspect import isclass from pathlib import Path from typing import Any from typing import Callable @@ -127,8 +130,17 @@ from typing import List from typing import TypeVar +if sys.version_info >= (3, 10): + from typing import TypeGuard +else: + from typing_extensions import TypeGuard + +import attr + from fgpyo import io from fgpyo.util import inspect +from fgpyo.util.inspect import AttrsInstance +from fgpyo.util.inspect import DataclassInstance MetricType = TypeVar("MetricType", bound="Metric") @@ -334,3 +346,65 @@ def fast_concat(*inputs: Path, output: Path) -> None: io.write_lines( path=output, lines_to_write=list(io.read_lines(input_path))[1:], append=True ) + + +def _is_dataclass_instance(metric: Metric) -> TypeGuard[DataclassInstance]: + """ + Test if the given metric is a dataclass instance. + + NB: `dataclasses.is_dataclass` returns True for both dataclass instances and class objects, and + we need to override the built-in function's `TypeGuard`. + + Args: + metric: An instance of a Metric. + + Returns: + True if the given metric is an instance of a dataclass-decorated Metric. + False otherwise. + """ + return not isclass(metric) and dataclasses.is_dataclass(metric) + + +def _is_attrs_instance(metric: Metric) -> TypeGuard[AttrsInstance]: + """ + Test if the given metric is an attr.s instance. + + NB: `attr.has` provides a type guard, but only on the class object - we want to narrow the type + of the metric instance, so we implement a guard here. + + Args: + metric: An instance of a Metric. + + Returns: + True if the given metric is an instance of an attr.s-decorated Metric. + False otherwise. + """ + return not isclass(metric) and attr.has(metric.__class__) + + +def asdict(metric: Metric) -> Dict[str, Any]: + """ + Convert a Metric instance to a dictionary. + + No formatting is performed on the values, and they are returned as contained (and typed) in the + underlying dataclass. Use `Metric.format_value` to format the values to string. + + Args: + metric: An instance of a Metric. + + Returns: + A dictionary representation of the given metric. + + Raises: + TypeError: If the given metric is not an instance of a `dataclass` or `attr.s`-decorated + Metric. + """ + if _is_dataclass_instance(metric): + return dataclasses.asdict(metric) + elif _is_attrs_instance(metric): + return attr.asdict(metric) + else: + raise TypeError( + "The provided metric is not an instance of a `dataclass` or `attr.s`-decorated Metric " + f"class: {metric.__class__}" + ) diff --git a/fgpyo/util/tests/test_metric.py b/fgpyo/util/tests/test_metric.py index e84e04dd..c5fe02a0 100644 --- a/fgpyo/util/tests/test_metric.py +++ b/fgpyo/util/tests/test_metric.py @@ -29,6 +29,9 @@ from fgpyo.util.inspect import is_attr_class from fgpyo.util.inspect import is_dataclasses_class from fgpyo.util.metric import Metric +from fgpyo.util.metric import _is_attrs_instance +from fgpyo.util.metric import _is_dataclass_instance +from fgpyo.util.metric import asdict class EnumTest(enum.Enum): @@ -519,3 +522,49 @@ def test_metric_columns_out_of_order(tmp_path: Path, data_and_classes: DataBuild names = list(NameMetric.read(path=path)) assert len(names) == 1 assert names[0] == name + + +def test_is_dataclass_instance() -> None: + """Test that _is_dataclass_instance works as expected.""" + + # True for `dataclass`-decorated instances but not `attr.s`-decorated instances + assert _is_dataclass_instance(dataclasses_data_and_classes.Person(name="name", age=42)) + assert not _is_dataclass_instance(attr_data_and_classes.Person(name="name", age=42)) + + # And False for both classes + assert not _is_dataclass_instance(dataclasses_data_and_classes.Person) + assert not _is_dataclass_instance(attr_data_and_classes.Person) + + +def test_is_attrs_instance() -> None: + """Test that _is_attrs_instance works as expected.""" + + # True for `attr.s`-decorated instances but not `dataclass`-decorated instances + assert not _is_attrs_instance(dataclasses_data_and_classes.Person(name="name", age=42)) + assert _is_attrs_instance(attr_data_and_classes.Person(name="name", age=42)) + + # And False for both classes + assert not _is_attrs_instance(dataclasses_data_and_classes.Person) + assert not _is_attrs_instance(attr_data_and_classes.Person) + + +@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes)) +def test_asdict(data_and_classes: DataBuilder) -> None: + """Test that asdict works as expected on both dataclass and attr.s decoreated metrics.""" + + assert asdict(data_and_classes.Person(name="name", age=42)) == {"name": "name", "age": 42} + + +def test_asdict_raises() -> None: + """Test that we raise a TypeError when asdict is called on a non-metric class.""" + + class UndecoratedMetric(Metric["UndecoratedMetric"]): + foo: int + bar: str + + def __init__(self, foo: int, bar: str): + self.foo = foo + self.bar = bar + + with pytest.raises(TypeError, match="The provided metric is not an instance"): + asdict(UndecoratedMetric(foo=1, bar="a"))