Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add asdict() #109

Merged
merged 7 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,11 @@

"""

import dataclasses
import sys
from abc import ABC
from enum import Enum
from inspect import isclass
from pathlib import Path
from typing import Any
from typing import Callable
Expand All @@ -127,8 +130,17 @@
from typing import List
from typing import TypeVar

if sys.version_info >= (3, 10):
from typing import TypeGuard
else:
from typing_extensions import TypeGuard

import attr

from fgpyo import io
from fgpyo.util import inspect
from fgpyo.util.inspect import AttrsInstance
from fgpyo.util.inspect import DataclassInstance

MetricType = TypeVar("MetricType", bound="Metric")

Expand Down Expand Up @@ -334,3 +346,65 @@ def fast_concat(*inputs: Path, output: Path) -> None:
io.write_lines(
path=output, lines_to_write=list(io.read_lines(input_path))[1:], append=True
)


def _is_dataclass_instance(metric: Metric) -> TypeGuard[DataclassInstance]:
"""
Test if the given metric is a dataclass instance.

NB: `dataclasses.is_dataclass` returns True for both dataclass instances and class objects, and
we need to override the built-in function's `TypeGuard`.

Args:
metric: An instance of a Metric.

Returns:
True if the given metric is an instance of a dataclass-decorated Metric.
False otherwise.
"""
return not isclass(metric) and dataclasses.is_dataclass(metric)


def _is_attrs_instance(metric: Metric) -> TypeGuard[AttrsInstance]:
"""
Test if the given metric is an attr.s instance.

NB: `attr.has` provides a type guard, but only on the class object - we want to narrow the type
of the metric instance, so we implement a guard here.

Args:
metric: An instance of a Metric.

Returns:
True if the given metric is an instance of an attr.s-decorated Metric.
False otherwise.
"""
return not isclass(metric) and attr.has(metric.__class__)


def asdict(metric: Metric) -> Dict[str, Any]:
msto marked this conversation as resolved.
Show resolved Hide resolved
"""
Convert a Metric instance to a dictionary.
msto marked this conversation as resolved.
Show resolved Hide resolved

No formatting is performed on the values, and they are returned as contained (and typed) in the
underlying dataclass. Use `Metric.format_value` to format the values to string.

Args:
metric: An instance of a Metric.

Returns:
A dictionary representation of the given metric.

Raises:
TypeError: If the given metric is not an instance of a `dataclass` or `attr.s`-decorated
Metric.
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This overlooks the format_value method on Metric that is used to format values when written to a file.

Copy link
Contributor Author

@msto msto May 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nh13 the omission was deliberate. I left a comment on the topic in the PR description, but given how much conversation this one has attracted it was easy to overlook 🙂

NB: it may be valuable to permit formatting/casting the values as part of this function (e.g. by adding a parameter format: bool = False), but to do so we'll probably want to extract Metric.format_value() to a standalone function instead of a classmethod

I do not think we should have an asdict() function that changes the value types by default. This function is primarily intended as a dispatcher, selecting the correct (dataclasses or attr.s) function depending on how the Metric in question was decorated.

Ideally, this function will be deprecated once we drop support for attrs. As I mentioned in my comments above to Clint, when that happens it would be preferable to be able to transparently replace this with dataclasses.asdict .

At that time we could simply import dataclasses.asdict into this module to avoid breakage? Or replace from fgpyo.util.metric import asdict with from dataclasses import asdict

I am open to adding an argument to optionally support formatting (e.g. format_values: bool = False), with the stipulation that it should be False by default and the caveat that I expect it to add debt and increase friction when we deprecate attr.s.

However, I'd prefer to leave as is, and then call format_value on the resulting dict when necessary, e.g.

metric_dict: dict[str, str] = {key: format_value(val) for key, val in asdict(metric)}

Thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let’s omit it but document it clearly.

I do like that the format_value is a class method so that any new Metric type can perform custom formatting but overriding the class method. I think passing in a parsing function, like defopt, in rare situations causes a conflict when we want to format the type differently.

Copy link
Contributor Author

@msto msto Jun 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea of overriding the classmethod, but wouldn't consider doing so in practice. Given that it specifies formatting behavior for a wide variety of primitive and compound types, it doesn't seem to lend itself to easy extension or modification. (Since in order to override formatting for one type, you would have to override them all.)

if _is_dataclass_instance(metric):
return dataclasses.asdict(metric)
elif _is_attrs_instance(metric):
return attr.asdict(metric)
else:
raise TypeError(
"The provided metric is not an instance of a `dataclass` or `attr.s`-decorated Metric "
f"class: {metric.__class__}"
)
49 changes: 49 additions & 0 deletions fgpyo/util/tests/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from fgpyo.util.inspect import is_attr_class
from fgpyo.util.inspect import is_dataclasses_class
from fgpyo.util.metric import Metric
from fgpyo.util.metric import _is_attrs_instance
from fgpyo.util.metric import _is_dataclass_instance
from fgpyo.util.metric import asdict


class EnumTest(enum.Enum):
Expand Down Expand Up @@ -519,3 +522,49 @@ def test_metric_columns_out_of_order(tmp_path: Path, data_and_classes: DataBuild
names = list(NameMetric.read(path=path))
assert len(names) == 1
assert names[0] == name


def test_is_dataclass_instance() -> None:
"""Test that _is_dataclass_instance works as expected."""

# True for `dataclass`-decorated instances but not `attr.s`-decorated instances
assert _is_dataclass_instance(dataclasses_data_and_classes.Person(name="name", age=42))
assert not _is_dataclass_instance(attr_data_and_classes.Person(name="name", age=42))

# And False for both classes
assert not _is_dataclass_instance(dataclasses_data_and_classes.Person)
assert not _is_dataclass_instance(attr_data_and_classes.Person)


def test_is_attrs_instance() -> None:
"""Test that _is_attrs_instance works as expected."""

# True for `attr.s`-decorated instances but not `dataclass`-decorated instances
assert not _is_attrs_instance(dataclasses_data_and_classes.Person(name="name", age=42))
assert _is_attrs_instance(attr_data_and_classes.Person(name="name", age=42))

# And False for both classes
assert not _is_attrs_instance(dataclasses_data_and_classes.Person)
assert not _is_attrs_instance(attr_data_and_classes.Person)


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_asdict(data_and_classes: DataBuilder) -> None:
"""Test that asdict works as expected on both dataclass and attr.s decoreated metrics."""

assert asdict(data_and_classes.Person(name="name", age=42)) == {"name": "name", "age": 42}


def test_asdict_raises() -> None:
"""Test that we raise a TypeError when asdict is called on a non-metric class."""

class UndecoratedMetric(Metric["UndecoratedMetric"]):
foo: int
bar: str

def __init__(self, foo: int, bar: str):
self.foo = foo
self.bar = bar

with pytest.raises(TypeError, match="The provided metric is not an instance"):
asdict(UndecoratedMetric(foo=1, bar="a"))
Loading