diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c8c1c82..5dd1f8d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ files: |
   )/.*\.py$
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: check-added-large-files
         files: ".*"
@@ -26,7 +26,7 @@ repos:
       - id: debug-statements
         files: '^src/.*\.py$'
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.23.3
+    rev: 0.27.1
     hooks:
       - id: check-github-workflows
         files: '^github/workflows/.*\.ya?ml$'
@@ -34,7 +34,7 @@ repos:
       - id: check-dependabot
         files: '^\.github/dependabot\.ya?ml$'
   - repo: https://github.com/ambv/black
-    rev: 23.7.0
+    rev: 23.11.0
     hooks:
       - id: black
         name: black formatter mfpbench
@@ -43,7 +43,7 @@ repos:
         name: black formatter tests
         args: ["--config=pyproject.toml"]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.5.1
+    rev: v1.7.1
     hooks:
       - id: mypy
         name: mypy
@@ -55,7 +55,7 @@ repos:
           - "--show-traceback"
           - "--allow-untyped-decorators" # Test decorators are not properly typed
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.278
+    rev: v0.1.6
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix, --no-cache]
diff --git a/docs/quickstart.md b/docs/quickstart.md
index e809ff8..630ed17 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -139,7 +139,7 @@ print("contains", "X_1" in config)
 
 print("len", len(config))
 
-print("dict", dict(config))
+print("dict", config.as_dict())
 ```
 
 ??? tip "How is that done?"
@@ -150,16 +150,16 @@ print("dict", dict(config))
     and other pythonic things!
 
 
-=== "`dict()`/`from_dict()`"
+=== "`as_dict()`/`from_dict()`"
 
-    [`Config.dict()`][mfpbench.Config.dict] returns a dictionary of the config. This is useful for
+    [`Config.as_dict()`][mfpbench.Config.as_dict] returns a dictionary of the config. This is useful for
     working with the config in other libraries.
 
     ```python exec="true" source="material-block" result="python" session="quickstart"
     config = benchmark.sample()
     print(config)
 
-    config_dict = config.dict()
+    config_dict = config.as_dict()
     print(config_dict)
 
     new_config = benchmark.Config.from_dict(config_dict)
@@ -246,7 +246,7 @@ print("cost", result.cost)
 print(result)
 ```
 
-These share the [`dict()`][mfpbench.Result.dict] and [`from_dict()`][mfpbench.Result.from_dict]
+These share the [`as_dict()`][mfpbench.Result.as_dict] and [`from_dict()`][mfpbench.Result.from_dict]
 methods as [`Config`][mfpbench.Config] objects but do not behave like dictionaries.
 
 The most notable property of [`Result`][mfpbench.Result] objects is that also have the
@@ -278,7 +278,7 @@ identify the config in the table. **This is what's used to retrieve results from
 If this is missing when doing a [`query()`][mfpbench.Benchmark.query], we'll do our best to match
 the config to the table and get the correct id, but this is not guaranteed.
 
-When using [`dict()`][mfpbench.TabularConfig.dict], this `id` is **not** included in the dictionary.
+When using [`as_dict()`][mfpbench.TabularConfig.as_dict], this `id` is **not** included in the dictionary.
 In general you should either store the `config` object itself or at least `config.id`, that you can
 include back in before calling [`query()`][mfpbench.Benchmark.query].
 
diff --git a/pyproject.toml b/pyproject.toml
index d1e6ead..97793e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,12 +10,12 @@ dependencies = [
   "more_itertools",
   "pyarrow"
 ]
-version = "1.7.3"
+version = "1.7.4"
 description = "A wrapper for multi-fidelity benchmarks with priors"
 authors = [{name = "Eddie Bergman", email="eddiebergmanhs@gmail.com"}]
 readme = "README.md"
 license = { file = "LICENSE.txt" }
-requires-python = ">=3.7"
+requires-python = ">=3.8"
 classifiers = [
   'Intended Audience :: Science/Research',
   'Intended Audience :: Developers',
@@ -61,7 +61,7 @@ dev = [
 
 [tool.pytest.ini_options]
 testpaths = ["tests"] # path to the test directory
-minversion = "3.7"
+minversion = "3.8"
 # addopts = "--cov=mfpbench" # Should be package name
 
 [tool.coverage.run]
diff --git a/src/mfpbench/__init__.py b/src/mfpbench/__init__.py
index 3c18fb7..0bceeae 100644
--- a/src/mfpbench/__init__.py
+++ b/src/mfpbench/__init__.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from mfpbench.benchmark import Benchmark
-from mfpbench.config import Config, GenericTabularConfig, TabularConfig
+from mfpbench.config import Config, TabularConfig
 from mfpbench.get import _mapping, get
 from mfpbench.jahs import JAHSBenchmark
 from mfpbench.lcbench_tabular import (
@@ -9,6 +9,7 @@
     LCBenchTabularConfig,
     LCBenchTabularResult,
 )
+from mfpbench.metric import Metric
 from mfpbench.pd1 import (
     PD1Benchmark,
     PD1cifar100_wideresnet_2048,
@@ -17,7 +18,7 @@
     PD1translatewmt_xformer_64,
     PD1uniref50_transformer_128,
 )
-from mfpbench.result import GenericTabularResult, Result
+from mfpbench.result import Result
 from mfpbench.synthetic.hartmann import (
     MFHartmann3Benchmark,
     MFHartmann3BenchmarkBad,
@@ -31,7 +32,7 @@
     MFHartmann6BenchmarkTerrible,
     MFHartmannBenchmark,
 )
-from mfpbench.tabular import GenericTabularBenchmark, TabularBenchmark
+from mfpbench.tabular import TabularBenchmark
 from mfpbench.yahpo import (
     IAMLglmnetBenchmark,
     IAMLrangerBenchmark,
@@ -58,11 +59,8 @@
     "YAHPOBenchmark",
     "PD1Benchmark",
     "TabularBenchmark",
-    "GenericTabularBenchmark",
     "Config",
     "TabularConfig",
-    "GenericTabularConfig",
-    "GenericTabularResult",
     "MFHartmannBenchmark",
     "MFHartmann3Benchmark",
     "MFHartmann6Benchmark",
@@ -97,5 +95,6 @@
     "PD1lm1b_transformer_2048",
     "PD1translatewmt_xformer_64",
     "PD1uniref50_transformer_128",
+    "Metric",
     "_mapping",
 ]
diff --git a/src/mfpbench/benchmark.py b/src/mfpbench/benchmark.py
index 1b7a733..0665095 100644
--- a/src/mfpbench/benchmark.py
+++ b/src/mfpbench/benchmark.py
@@ -3,7 +3,17 @@
 import copy
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generic, Iterator, Mapping, TypeVar, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Generic,
+    Iterable,
+    Iterator,
+    Mapping,
+    TypeVar,
+    overload,
+)
 
 import numpy as np
 
@@ -14,6 +24,8 @@
 if TYPE_CHECKING:
     from ConfigSpace import ConfigurationSpace
 
+    from mfpbench.metric import Metric
+
 HERE = Path(__file__).parent.parent
 PRIOR_DIR = HERE / "priors"
 
@@ -30,50 +42,49 @@
 class Benchmark(Generic[C, R, F], ABC):
     """Base class for a Benchmark."""
 
-    fidelity_range: tuple[F, F, F]
-    """The fidelity range of this benchmark, (start, end, step)"""
-
-    start: F
-    """The start of the fidelity range"""
-
-    end: F
-    """The end of the fidelity range"""
-
-    step: F
-    """The step of the fidelity range"""
-
-    fidelity_name: str
-    """The name of the fidelity used in this benchmark"""
-
-    space: ConfigurationSpace
-    """The configuration space used in this benchmark"""
-
-    Config: type[C]
-    """The config type of this benchmark"""
-
-    Result: type[R]
-    """The result type of this benchmark"""
-
-    has_conditionals: bool = False
-    """Whether this benchmark has conditionals in it or not"""
-
-    _default_prior_dir = PRIOR_DIR
+    _default_prior_dir: ClassVar[Path] = PRIOR_DIR
     """The default directory for priors"""
 
-    def __init__(
+    _result_renames: ClassVar[Mapping[str, str] | None] = None
+    """Any renaming to be done to raw result names before being passed
+    to the `Result` type. This can be useful if for example, the benchmark returns
+    a result named `valid-error-rate` but the `Result` type expects
+    `valid_error_rate`, as you can't have `-` in a python identifier.
+    """
+
+    _config_renames: ClassVar[Mapping[str, str] | None] = None
+    """Any renaming to be done to raw result names before being passed
+    to the `Config` type. This can be useful if for example, the benchmark returns
+    a result named `lambda` which is a reserved keyword in python but the `Config`
+    type expects `_lambda` as the key.
+    """
+
+    def __init__(  # noqa: PLR0913
         self,
         name: str,
         space: ConfigurationSpace,
+        config_type: type[C],
+        result_type: type[R],
+        fidelity_range: tuple[F, F, F],
+        fidelity_name: str,
         *,
+        has_conditionals: bool = False,
         seed: int | None = None,
         prior: str | Path | C | Mapping[str, Any] | None = None,
         perturb_prior: float | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ):
         """Initialize the benchmark.
 
         Args:
             name: The name of this benchmark
             space: The configuration space to use for the benchmark.
+            config_type: The type of config to use for the benchmark.
+            result_type: The type of result to use for the benchmark.
+            fidelity_name: The name of the fidelity to use for the benchmark.
+            fidelity_range: The range of fidelities to use for the benchmark.
+            has_conditionals: Whether this benchmark has conditionals in it or not.
             seed: The seed to use.
             prior: The prior to use for the benchmark. If None, no prior is used.
                 If a str, will check the local location first for a prior
@@ -84,13 +95,35 @@ def __init__(
                 For numericals, this is interpreted as the standard deviation of a
                 normal distribution while for categoricals, this is interpreted
                 as the probability of swapping the value for a random one.
+            value_metric: The metric to use for this benchmark. Uses
+                the default metric from the Result if None.
+            cost_metric: The cost to use for this benchmark. Uses
+                the default cost from the Result if None.
         """
+        if value_metric is None:
+            value_metric = result_type.default_value_metric
+
+        if cost_metric is None:
+            cost_metric = result_type.default_cost_metric
+
         self.name = name
         self.seed = seed
         self.space = space
-        self.start: F = self.fidelity_range[0]
-        self.end: F = self.fidelity_range[1]
-        self.step: F = self.fidelity_range[2]
+        self.value_metric = value_metric
+        self.cost_metric = cost_metric
+        self.fidelity_range: tuple[F, F, F] = fidelity_range
+        self.fidelity_name = fidelity_name
+        self.has_conditionals = has_conditionals
+        self.Config = config_type
+        self.Result = result_type
+        self.metric_optimums = {
+            metric_name: metric.optimum_value
+            for metric_name, metric in self.Result.metric_defs.items()
+        }
+
+        if value_metric is None:
+            assert getattr(self.Result, "value_metric", None) is not None
+            value_metric = self.Result.value_metric
 
         self._prior_arg = prior
 
@@ -108,7 +141,6 @@ def __init__(
 
         if prior is not None:
             self.prior = self._load_prior(prior, benchname=self.name)
-            self.prior.validate()
         else:
             self.prior = None
 
@@ -123,18 +155,37 @@ def __init__(
         if self.prior is not None:
             self.prior.set_as_default_prior(space)
 
-    @classmethod
+    @property
+    def metrics(self) -> dict[str, Metric]:
+        """The metrics for this benchmark."""
+        return dict(self.Result.metric_defs)
+
+    @property
+    def start(self) -> F:
+        """The start of the fidelity range."""
+        return self.fidelity_range[0]
+
+    @property
+    def end(self) -> F:
+        """The end of the fidelity range."""
+        return self.fidelity_range[1]
+
+    @property
+    def step(self) -> F:
+        """The step of the fidelity range."""
+        return self.fidelity_range[2]
+
     def _load_prior(
-        cls,
+        self,
         prior: str | Path | Mapping[str, Any] | C,
         benchname: str | None = None,
     ) -> C:
-        Config: type[C] = cls.Config  # Need to be a bit explicit here
+        Config: type[C] = self.Config  # Need to be a bit explicit here
 
         if isinstance(prior, str):
             # It's a str, use as a key into available priors
             if benchname is not None:
-                assumed_path = cls._default_prior_dir / f"{benchname}-{prior}.yaml"
+                assumed_path = self._default_prior_dir / f"{benchname}-{prior}.yaml"
                 if assumed_path.exists():
                     return Config.from_file(assumed_path)
 
@@ -148,7 +199,7 @@ def _load_prior(
             return prior
 
         if isinstance(prior, Mapping):
-            return Config.from_dict(prior)
+            return Config.from_dict(prior, renames=self._config_renames)
 
         raise ValueError(f"Unknown prior type {type(prior)}")
 
@@ -196,20 +247,23 @@ def load(self) -> None:
     def query(
         self,
         config: C | Mapping[str, Any],
-        at: F | None = None,
         *,
-        argmax: str | None = None,
-        argmin: str | None = None,
+        at: F | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ) -> R:
         """Submit a query and get a result.
 
         Args:
             config: The query to use
             at: The fidelity at which to query, defaults to None which means *maximum*
-            argmax: Whether to return the argmax up to the point `at`. Will be slower as
-                it has to get the entire trajectory. Uses the key from the Results.
-            argmin: Whether to return the argmin up to the point `at`. Will be slower as
-                it has to get the entire trajectory. Uses the key from the Results.
+            value_metric: The metric to use for this result. Uses
+                the value metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
+            cost_metric: The metric to use for this result. Uses
+                the cost metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
+
 
         Returns:
             The result of the query
@@ -217,29 +271,27 @@ def query(
         at = at if at is not None else self.end
         assert self.start <= at <= self.end
 
-        if argmax is not None and argmin is not None:
-            raise ValueError("Can't have both argmax and argmin")
-
-        if argmax is not None:
-            _argmax = argmax
-            return max(
-                self.trajectory(config, frm=self.start, to=at),
-                key=lambda r: getattr(r, _argmax),
-            )
-
-        if argmin is not None:
-            _argmin = argmin
-            return min(
-                self.trajectory(config, frm=self.start, to=at),
-                key=lambda r: getattr(r, _argmin),
-            )
-
         if not isinstance(config, self.Config):
-            _config = self.Config.from_dict(config)
+            _config = self.Config.from_dict(config, renames=self._config_renames)
         else:
             _config = config
 
-        return self._objective_function(_config, at=at)
+        __config = dict(_config)
+        if self._config_renames is not None:
+            _reverse_renames = {v: k for k, v in self._config_renames.items()}
+            __config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}
+
+        value_metric = value_metric if value_metric is not None else self.value_metric
+        cost_metric = cost_metric if cost_metric is not None else self.cost_metric
+
+        return self.Result.from_dict(
+            config=config,
+            fidelity=at,
+            result=self._objective_function(__config, at=at),
+            value_metric=str(value_metric),
+            cost_metric=str(cost_metric),
+            renames=self._result_renames,
+        )
 
     def trajectory(
         self,
@@ -248,6 +300,8 @@ def trajectory(
         frm: F | None = None,
         to: F | None = None,
         step: F | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ) -> list[R]:
         """Get the full trajectory of a configuration.
 
@@ -256,6 +310,12 @@ def trajectory(
             frm: Start of the curve, should default to the start
             to: End of the curve, should default to the total
             step: Step size, defaults to ``cls.default_step``
+            value_metric: The metric to use for this result. Uses
+                the value metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
+            cost_metric: The metric to use for this result. Uses
+                the cost metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
 
         Returns:
             A list of the results for this config
@@ -264,15 +324,38 @@ def trajectory(
         frm = frm if frm is not None else self.start
         step = step if step is not None else self.step
 
-        if not isinstance(config, self.Config):
-            _config = self.Config.from_dict(config)
-        else:
-            _config = config
+        __config = dict(config)
+        if self._config_renames is not None:
+            _reverse_renames = {v: k for k, v in self._config_renames.items()}
+            __config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}
 
-        return self._trajectory(_config, frm=frm, to=to, step=step)
+        value_metric = value_metric if value_metric is not None else self.value_metric
+        cost_metric = cost_metric if cost_metric is not None else self.cost_metric
+
+        return [
+            self.Result.from_dict(
+                config=config,
+                fidelity=fidelity,
+                result=result,
+                value_metric=str(value_metric),
+                cost_metric=str(cost_metric),
+                renames=self._result_renames,
+            )
+            for fidelity, result in self._trajectory(
+                __config,
+                frm=frm,
+                to=to,
+                step=step,
+            )
+        ]
 
     @abstractmethod
-    def _objective_function(self, config: C, *, at: F) -> R:
+    def _objective_function(
+        self,
+        config: Mapping[str, Any],
+        *,
+        at: F,
+    ) -> Mapping[str, float]:
         """Get the value of the benchmark for a config at a fidelity.
 
         Args:
@@ -280,11 +363,18 @@ def _objective_function(self, config: C, *, at: F) -> R:
             at: The fidelity to get the result at
 
         Returns:
-            The result of the config
+            The result of the config as key value pairs
         """
         ...
 
-    def _trajectory(self, config: C, *, frm: F, to: F, step: F) -> list[R]:
+    def _trajectory(
+        self,
+        config: Mapping[str, Any],
+        *,
+        frm: F,
+        to: F,
+        step: F,
+    ) -> Iterable[tuple[F, Mapping[str, float]]]:
         """Get the trajectory of a config.
 
         By default this will just call the
@@ -301,7 +391,7 @@ def _trajectory(self, config: C, *, frm: F, to: F, step: F) -> list[R]:
             A list of the results for this config
         """
         return [
-            self._objective_function(config, at=fidelity)
+            (fidelity, self._objective_function(config, at=fidelity))
             for fidelity in self.iter_fidelities(frm=frm, to=to, step=step)
         ]
 
@@ -347,23 +437,34 @@ def sample(
         """
         space = copy.deepcopy(self.space)
         if isinstance(seed, np.random.RandomState):
-            rng = seed.randint(0, 2**32 - 1)
+            rng = seed.randint(0, 2**31 - 1)
         else:
             rng = (
                 seed
                 if seed is not None
-                else np.random.default_rng().integers(0, 2**32 - 1)
+                else np.random.default_rng().integers(0, 2**31 - 1)
             )
 
         space.seed(rng)
         if n is None:
-            return self.Config.from_dict(space.sample_configuration())
+            return self.Config.from_dict(
+                space.sample_configuration(),
+                renames=self._config_renames,
+            )
 
         # Just because of how configspace works
         if n == 1:
-            return [self.Config.from_dict(space.sample_configuration())]
+            return [
+                self.Config.from_dict(
+                    space.sample_configuration(),
+                    renames=self._config_renames,
+                ),
+            ]
 
-        return [self.Config.from_dict(c) for c in space.sample_configuration(n)]
+        return [
+            self.Config.from_dict(c, renames=self._config_renames)
+            for c in space.sample_configuration(n)
+        ]
 
     def frame(self) -> ResultFrame[C, F, R]:
         """Get an empty frame to record with."""
diff --git a/src/mfpbench/config.py b/src/mfpbench/config.py
index 82caefe..175b723 100644
--- a/src/mfpbench/config.py
+++ b/src/mfpbench/config.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 import json
-from abc import ABC, abstractmethod
+from abc import ABC
 from dataclasses import asdict, dataclass, field, fields, replace
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Iterator, Mapping
+from typing import Any, Iterator, Mapping
 from typing_extensions import Self, override
 
 import numpy as np
@@ -18,9 +18,6 @@
 
 from mfpbench.util import perturb
 
-if TYPE_CHECKING:
-    import pandas as pd
-
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
 class Config(ABC, Mapping[str, Any]):
@@ -35,20 +32,22 @@ class Config(ABC, Mapping[str, Any]):
     """
 
     @classmethod
-    def from_dict(cls, d: Mapping[str, Any]) -> Self:
+    def from_dict(
+        cls,
+        d: Mapping[str, Any],
+        renames: Mapping[str, str] | None = None,
+    ) -> Self:
         """Create from a dict or mapping object."""
+        if renames is not None:
+            d = {renames.get(k, k): v for k, v in d.items()}
+
         field_names = {f.name for f in fields(cls)}
         if not field_names.issuperset(d.keys()):
             raise ValueError(f"Dict keys {d.keys()} must be a subset of {field_names}")
 
         return cls(**{f.name: d[f.name] for f in fields(cls) if f.name in d})
 
-    @classmethod
-    def from_row(cls, row: pd.Series) -> Self:
-        """Create from a row of a dataframe."""
-        return cls.from_dict(row.to_dict())
-
-    def dict(self) -> dict[str, Any]:
+    def as_dict(self) -> dict[str, Any]:
         """As a raw dictionary."""
         return asdict(self)
 
@@ -99,24 +98,15 @@ def perturb(
 
         return self.mutate(**new_values)
 
-    @abstractmethod
-    def validate(self) -> None:
-        """Validate the config, just useful early on while testing.
-
-        Raises:
-            AssertionError: If the config is not valid
-        """
-        ...
-
     def __eq__(self, that: Any) -> bool:
         """Equality is defined in terms of their dictionary repr."""
-        this = self.dict()
+        this = self.as_dict()
         if isinstance(that, dict):
             that = that.copy()
         elif isinstance(that, Configuration):
             that = dict(that)
         elif isinstance(that, self.__class__):
-            that = that.dict()
+            that = that.as_dict()
         else:
             return False
 
@@ -129,13 +119,13 @@ def __eq__(self, that: Any) -> bool:
         return this == _that
 
     def __getitem__(self, key: str) -> Any:
-        return self.dict()[key]
+        return self.as_dict()[key]
 
     def __len__(self) -> int:
-        return len(self.dict())
+        return len(self.as_dict())
 
     def __iter__(self) -> Iterator[str]:
-        return self.dict().__iter__()
+        return self.as_dict().__iter__()
 
     def set_as_default_prior(self, configspace: ConfigurationSpace) -> None:
         """Apply this configuration as a prior on a configspace.
@@ -144,7 +134,7 @@ def set_as_default_prior(self, configspace: ConfigurationSpace) -> None:
             configspace: The space to apply this config to
         """
         # We convert to dict incase there's any special transformation that happen
-        d = self.dict()
+        d = self.as_dict()
         for k, v in d.items():
             hp = configspace[k]
             # https://github.com/automl/ConfigSpace/issues/270
@@ -211,7 +201,7 @@ def save(self, path: str | Path, format: str | None = None) -> None:
             path: Where to save to. Will infer json or yaml based on filename
             format: The format to save as. Will use file suffix if not provided
         """
-        d = self.dict()
+        d = self.as_dict()
         path = Path(path)
         if format is None:
             if path.suffix == "json":
@@ -246,29 +236,31 @@ class TabularConfig(Config):
         an id key.
     """
 
-    @classmethod
-    def from_row(cls, row: pd.Series) -> Self:
-        """Create from a row of a dataframe."""
-        return cls.from_dict({"id": row.name, **row.to_dict()})
-
     @override
-    def dict(self, *, with_id: bool = False) -> Any:
+    def as_dict(self, *, with_id: bool = False) -> Any:
         """As a raw dictionary.
 
 
         Args:
             with_id: Whether to include the id key
         """
-        d = {**super().dict()}
+        d = {**super().as_dict()}
         if not with_id:
             d.pop("id")
         return d
 
     @classmethod
     @override
-    def from_dict(cls, d: Mapping[str, Any]) -> Self:
+    def from_dict(
+        cls,
+        d: Mapping[str, Any],
+        renames: Mapping[str, str] | None = None,
+    ) -> Self:
         """Create from a dict or mapping object."""
-        d = dict(d)
+        if renames is not None:
+            d = {renames.get(k, k): v for k, v in d.items()}
+        else:
+            d = dict(d)
         d.setdefault("id", None)
         return cls(**d)
 
@@ -276,51 +268,3 @@ def from_dict(cls, d: Mapping[str, Any]) -> Self:
     def names(cls) -> list[str]:
         """The names of entries in this config."""
         return [f.name for f in fields(cls) if f.name not in ("id",)]
-
-    def validate(self) -> None:
-        """Validate the config, just useful early on while testing.
-
-        !!! note "Not implemented"
-
-            Does not do anything for Tabular Benchmarks
-        """
-
-
-@dataclass(frozen=True, eq=False)  # type: ignore[misc]
-class GenericTabularConfig(TabularConfig):
-    """A generic tabular config.
-
-    This is useful for adhoc tabular benchmarks and is what they will return, i.e.
-    directly creating a benchmark from TabularBenchmark.
-    """
-
-    _values: dict[str, Any]
-
-    def __hash__(self) -> int:
-        """Hash based on the dictionary repr."""
-        return hash(self.id) ^ hash(tuple(self._values.items()))
-
-    @override
-    def dict(self, *, with_id: bool = False) -> Any:
-        """As a raw dictionary.
-
-        Args:
-            with_id: Whether to include the id key
-        """
-        d = {**self._values}
-        if with_id:
-            d["id"] = self.id
-        return d
-
-    # Make .property acces work
-    def __getattr__(self, __name: str) -> Any:
-        # To prevent recursion
-        return self._values[__name]
-
-    @classmethod
-    @override
-    def from_dict(cls, d: Mapping[str, Any]) -> Self:
-        """Create from a dict or mapping object."""
-        d = dict(d)
-        id = d.pop("id")
-        return cls(id=id, _values=d)
diff --git a/src/mfpbench/get.py b/src/mfpbench/get.py
index 1626448..878ba92 100644
--- a/src/mfpbench/get.py
+++ b/src/mfpbench/get.py
@@ -90,6 +90,8 @@
 def get(
     name: str,
     *,
+    value_metric: str | None = None,
+    cost_metric: str | None = None,
     prior: str | Path | Config | None = None,
     preload: bool = False,
     **kwargs: Any,
@@ -98,6 +100,10 @@ def get(
 
     Args:
         name: The name of the benchmark
+        value_metric: The value metric to use for the benchmark. If not specified,
+            the default value metric is used.
+        cost_metric: The cost metric to use for the benchmark. If not specified,
+            the default cost metric is used.
         prior: The prior to use for the benchmark.
             * str -
                 If it ends in {.json} or {.yaml, .yml}, it will convert it to a path and
@@ -190,7 +196,7 @@ def get(
     ):
         prior = Path(prior)
 
-    bench = b(prior=prior, **kwargs)
+    bench = b(prior=prior, cost_metric=cost_metric, value_metric=value_metric, **kwargs)
 
     if preload:
         bench.load()
diff --git a/src/mfpbench/jahs/benchmark.py b/src/mfpbench/jahs/benchmark.py
index ae60b1b..08b05d4 100644
--- a/src/mfpbench/jahs/benchmark.py
+++ b/src/mfpbench/jahs/benchmark.py
@@ -3,7 +3,7 @@
 from abc import ABC
 from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal, Mapping
+from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Mapping
 from typing_extensions import override
 
 import numpy as np
@@ -16,9 +16,9 @@
 
 from mfpbench.benchmark import Benchmark
 from mfpbench.config import Config
+from mfpbench.metric import Metric
 from mfpbench.result import Result
 from mfpbench.setup_benchmark import JAHSBenchSource
-from mfpbench.util import rename
 
 if TYPE_CHECKING:
     import jahs_bench
@@ -51,81 +51,40 @@ class JAHSConfig(Config):
     LearningRate: float
     WeightDecay: float
 
-    def validate(self) -> None:
-        """Validate this config incase required."""
-        # Just being explicit to catch bugs easily, we can remove later
-        assert self.N in [1, 3, 5]
-        assert self.W in [4, 8, 16]
-        assert self.Op1 in [0, 1, 2, 3, 4, 5]
-        assert self.Op2 in [0, 1, 2, 3, 4, 5]
-        assert self.Op3 in [0, 1, 2, 3, 4, 5]
-        assert self.Op4 in [0, 1, 2, 3, 4, 5]
-        assert self.Op5 in [0, 1, 2, 3, 4, 5]
-        assert self.Op6 in [0, 1, 2, 3, 4, 5]
-        assert self.Resolution in [0.25, 0.5, 1.0]
-        assert isinstance(self.TrivialAugment, bool)
-        assert self.Activation in ["ReLU", "Hardswish", "Mish"]
-        assert self.Optimizer in ["SGD"]
-        assert 1e-3 <= self.LearningRate <= 1e0
-        assert 1e-5 <= self.WeightDecay <= 1e-2
-
 
 @dataclass(frozen=True)  # type: ignore[misc]
 class JAHSResult(Result[JAHSConfig, int]):
+    default_value_metric: ClassVar[str] = "valid_acc"
+    default_cost_metric: ClassVar[str] = "runtime"
+
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "runtime": Metric(minimize=True, bounds=(0, np.inf)),
+        "valid_acc": Metric(minimize=False, bounds=(0, 100)),
+        "test_acc": Metric(minimize=False, bounds=(0, 100)),
+    }
+
     # Info
     # size: float  # remove
     # flops: float # remove
     # latency: float  # unit? remove
-    runtime: float  # unit?
+    runtime: Metric.Value  # unit?
 
     # Scores (0 - 100)
-    valid_acc: float
-    test_acc: float
+    valid_acc: Metric.Value
+    test_acc: Metric.Value
     # train_acc: float # remove
 
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        return self.valid_acc
-
-    @property
-    def error(self) -> float:
-        """The error of interest."""
-        return 100 - self.valid_acc
-
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        return self.test_acc
-
-    @property
-    def test_error(self) -> float:
-        """The error on the test set."""
-        return 100 - self.test_acc
-
-    @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        return self.valid_acc
-
-    @property
-    def val_error(self) -> float:
-        """The error on the validation set."""
-        return 100 - self.valid_acc
-
-    @property
-    def cost(self) -> float:
-        """The time taken (assumed to be seconds)."""
-        return self.runtime
-
 
 class JAHSBenchmark(Benchmark[JAHSConfig, JAHSResult, int], ABC):
-    Config = JAHSConfig
-    Result = JAHSResult
-    fidelity_name = "epoch"
-    fidelity_range = (3, 200, 1)  # TODO: min budget plays a huge role in SH/HB algos
+    JAHS_FIDELITY_NAME: ClassVar[str] = "epoch"
+    JAHS_FIDELITY_RANGE: ClassVar[tuple[int, int, int]] = (3, 200, 1)
+    JAHS_METRICS_TO_ACTIVATE: ClassVar[tuple[str, ...]] = (
+        "valid-acc",
+        "test-acc",
+        "runtime",
+    )
 
-    task_ids: tuple[str, ...] = (
+    task_ids: ClassVar[tuple[str, str, str]] = (
         "CIFAR10",
         "ColorectalHistology",
         "FashionMNIST",
@@ -137,14 +96,13 @@ class JAHSBenchmark(Benchmark[JAHSConfig, JAHSResult, int], ABC):
     ```
     """
 
-    _result_renames: Mapping[str, str] = {
+    _result_renames: ClassVar[Mapping[str, str]] = {
         "size_MB": "size",
         "FLOPS": "flops",
         "valid-acc": "valid_acc",
         "test-acc": "test_acc",
         "train-acc": "train_acc",
     }
-    _result_metrics_active: tuple[str, ...] = ("valid-acc", "test-acc", "runtime")
 
     def __init__(
         self,
@@ -154,6 +112,8 @@ def __init__(
         seed: int | None = None,
         prior: str | Path | JAHSConfig | Mapping[str, Any] | None = None,
         perturb_prior: float | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ):
         """Initialize the benchmark.
 
@@ -171,6 +131,10 @@ def __init__(
 
             perturb_prior: If given, will perturb the prior by this amount.
                 Only used if `prior=` is given as a config.
+            value_metric: The metric to use for this benchmark. Uses
+                the default metric from the Result if None.
+            cost_metric: The cost to use for this benchmark. Uses
+                the default cost from the Result if None.
         """
         cls = self.__class__
         if datadir is None:
@@ -193,9 +157,15 @@ def __init__(
         super().__init__(
             seed=seed,
             name=name,
+            config_type=JAHSConfig,
+            result_type=JAHSResult,
+            fidelity_name=self.JAHS_FIDELITY_NAME,
+            fidelity_range=self.JAHS_FIDELITY_RANGE,
             space=cls._jahs_configspace(name=name, seed=seed),
             prior=prior,
             perturb_prior=perturb_prior,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
         )
 
     # explicit overwrite
@@ -231,52 +201,38 @@ def bench(self) -> jahs_bench.Benchmark:
                 task=self.task_id,
                 save_dir=self.datadir,
                 download=False,
-                metrics=self._result_metrics_active,
+                metrics=self.JAHS_METRICS_TO_ACTIVATE,
             )
 
         return self._bench
 
     @override
-    def _objective_function(self, config: JAHSConfig, at: int) -> JAHSResult:
-        query = config.dict()
-
+    def _objective_function(
+        self,
+        config: Mapping[str, Any],
+        at: int,
+    ) -> dict[str, float]:
+        query = dict(config)
         results = self.bench.__call__(query, nepochs=at)
-        result = results[at]
-
-        return self.Result.from_dict(
-            config=config,
-            result=rename(result, keys=self._result_renames),
-            fidelity=at,
-        )
+        return results[at]
 
     @override
     def _trajectory(
         self,
-        config: JAHSConfig,
+        config: Mapping[str, Any],
         *,
         frm: int,
         to: int,
         step: int,
-    ) -> list[JAHSResult]:
-        query = config.dict()
+    ) -> Iterable[tuple[int, Mapping[str, float]]]:
+        query = dict(config)
 
         try:
-            results = self.bench.__call__(query, nepochs=to, full_trajectory=True)
+            return self.bench.__call__(query, nepochs=to, full_trajectory=True).items()
         except TypeError:
             # See: https://github.com/automl/jahs_bench_201/issues/5
-            results = {
-                f: self.bench.__call__(query, nepochs=f)[f]
-                for f in self.iter_fidelities(frm=frm, to=to, step=step)
-            }
-
-        return [
-            self.Result.from_dict(
-                config=config,
-                fidelity=i,
-                result=rename(results[i], keys=self._result_renames),
-            )
-            for i in self.iter_fidelities(frm=frm, to=to, step=step)
-        ]
+            # Revert back to calling individually, default behaviour
+            return super()._trajectory(config, frm=frm, to=to, step=step)
 
     @classmethod
     def _jahs_configspace(
diff --git a/src/mfpbench/lcbench_tabular/benchmark.py b/src/mfpbench/lcbench_tabular/benchmark.py
index 4e18337..8a39f2b 100644
--- a/src/mfpbench/lcbench_tabular/benchmark.py
+++ b/src/mfpbench/lcbench_tabular/benchmark.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import Any, ClassVar, Mapping
 
+import numpy as np
 import pandas as pd
 from ConfigSpace import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -13,6 +14,7 @@
 )
 
 from mfpbench.config import TabularConfig
+from mfpbench.metric import Metric
 from mfpbench.result import Result
 from mfpbench.setup_benchmark import LCBenchTabularSource
 from mfpbench.tabular import TabularBenchmark
@@ -136,55 +138,28 @@ class LCBenchTabularConfig(TabularConfig):
 
 @dataclass(frozen=True)  # type: ignore[misc]
 class LCBenchTabularResult(Result[LCBenchTabularConfig, int]):
-    time: float
-    val_accuracy: float
-    val_cross_entropy: float
-    val_balanced_accuracy: float
-    test_accuracy: float
-    test_cross_entropy: float
-    test_balanced_accuracy: float
-
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        return self.val_score
-
-    @property
-    def error(self) -> float:
-        """The error of interest."""
-        return self.val_error
-
-    @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        return self.val_accuracy / 100
-
-    @property
-    def val_error(self) -> float:
-        """The error on the validation set."""
-        return (100 - self.val_accuracy) / 100
-
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        return self.test_accuracy / 100
-
-    @property
-    def test_error(self) -> float:
-        """The error on the test set."""
-        return (100 - self.test_accuracy) / 100
-
-    @property
-    def cost(self) -> float:
-        """The time to train the configuration (assumed to be seconds)."""
-        return self.time
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "val_accuracy": Metric(minimize=False, bounds=(0, 100)),
+        "val_balanced_accuracy": Metric(minimize=False, bounds=(0, 100)),
+        "val_cross_entropy": Metric(minimize=True, bounds=(0, np.inf)),
+        "test_accuracy": Metric(minimize=False, bounds=(0, 100)),
+        "test_balanced_accuracy": Metric(minimize=False, bounds=(0, 100)),
+        "test_cross_entropy": Metric(minimize=True, bounds=(0, np.inf)),
+        "time": Metric(minimize=True, bounds=(0, np.inf)),
+    }
+    default_value_metric: ClassVar[str] = "val_balanced_accuracy"
+    default_cost_metric: ClassVar[str] = "time"
+
+    time: Metric.Value
+    val_accuracy: Metric.Value
+    test_accuracy: Metric.Value
+    val_balanced_accuracy: Metric.Value
+    test_balanced_accuracy: Metric.Value
+    val_cross_entropy: Metric.Value
+    test_cross_entropy: Metric.Value
 
 
 class LCBenchTabularBenchmark(TabularBenchmark):
-    Config = LCBenchTabularConfig
-    Result = LCBenchTabularResult
-    fidelity_name: str = "epoch"
-
     task_ids: ClassVar[tuple[str, ...]] = (
         "adult",
         "airlines",
@@ -238,6 +213,8 @@ def __init__(
         seed: int | None = None,
         prior: str | Path | LCBenchTabularConfig | Mapping[str, Any] | None = None,
         perturb_prior: float | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ) -> None:
         """Initialize the benchmark.
 
@@ -257,6 +234,10 @@ def __init__(
                 For numericals, this is interpreted as the standard deviation of a
                 normal distribution while for categoricals, this is interpreted
                 as the probability of swapping the value for a random one.
+            value_metric: The metric to use for this benchmark. Uses
+                the default metric from the Result if None.
+            cost_metric: The cost to use for this benchmark. Uses
+                the default cost from the Result if None.
         """
         cls = self.__class__
         if task_id not in cls.task_ids:
@@ -297,10 +278,11 @@ def __init__(
             table=table,  # type: ignore
             name=benchmark_task_name,
             id_key="id",
-            fidelity_key=cls.fidelity_name,
-            result_keys=LCBenchTabularResult.names(),
-            config_keys=LCBenchTabularConfig.names(),
-            remove_constants=remove_constants,
+            fidelity_key="epoch",
+            result_type=LCBenchTabularResult,
+            config_type=LCBenchTabularConfig,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
             space=space,
             seed=seed,
             prior=prior,
diff --git a/src/mfpbench/metric.py b/src/mfpbench/metric.py
new file mode 100644
index 0000000..40f2549
--- /dev/null
+++ b/src/mfpbench/metric.py
@@ -0,0 +1,134 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+import numpy as np
+
+
+class OutOfBoundsError(ValueError):
+    """Raised when a value is outside of the bounds of a metric."""
+
+
+@dataclass(frozen=True)
+class Metric:
+    """A metric to be used in the benchmark.
+
+    It's main use is to convert a raw value into a value that can always be
+    minimized.
+    """
+
+    minimize: bool
+    """Whether or not to minimize the metric."""
+
+    bounds: tuple[float, float] = field(default_factory=lambda: (-np.inf, np.inf))
+    """The bounds of the metric."""
+
+    def __post_init__(self) -> None:
+        if self.bounds[0] >= self.bounds[1]:
+            raise ValueError(
+                f"bounds[0] must be less than bounds[1], got {self.bounds}",
+            )
+
+    def as_value(self, value: float) -> Metric.Value:
+        """Convert a raw value into a metric value.
+
+        Args:
+            value: The raw value to convert.
+
+        Returns:
+            The metric value.
+        """
+        return Metric.Value(value=value, definition=self)
+
+    @property
+    def optimum_value(self) -> Metric.Value:
+        """Get the optimum value for this metric.
+
+        Returns:
+            The optimum value.
+        """
+        if self.minimize:
+            return self.as_value(self.bounds[0])
+
+        return self.as_value(self.bounds[1])
+
+    @dataclass(frozen=True)
+    class Value:
+        """A value of a metric."""
+
+        value: float
+        definition: Metric = field(hash=False)
+
+        def __post_init__(self) -> None:
+            if not self.definition.bounds[0] <= self.value <= self.definition.bounds[1]:
+                raise OutOfBoundsError(
+                    f"Value {self.value} is outside of bounds {self.definition.bounds}",
+                )
+
+        @property
+        def error(self) -> float:
+            """Calculate a minimization value for the metric based on its raw value.
+
+            The calculation is as follows:
+
+                | direction | lower | upper |     | error                              |
+                |-----------|-------|-------|-----|------------------------------------|
+                | minimize  | inf   | inf   |     | value                              |
+                | minimize  | A     | inf   |     | value                              |
+                | minimize  | inf   | B     |     | value                              |
+                | minimize  | A     | B     |     | abs(A - value) / abs(B - A)  # 0-1 |
+                | ---       | ---   | ---   | --- | ---                                |
+                | maximize  | inf   | inf   |     | -value                             |
+                | maximize  | A     | inf   |     | -value                             |
+                | maximize  | inf   | B     |     | -value                             |
+                | maximize  | A     | B     |     | abs(B - value) / abs(B - a) # 0 -1 |
+
+            Returns:
+                The cost of the metric.
+            """
+            value = self.value
+            lower, upper = self.definition.bounds
+            if self.definition.minimize:
+                if np.isinf(lower) or np.isinf(upper):
+                    return value
+
+                return abs(lower - value) / abs(upper - lower)
+
+            if np.isinf(upper) or np.isinf(lower):
+                return -value
+
+            return abs(upper - value) / abs(upper - lower)
+
+        @property
+        def score(self) -> float:
+            """Calculate a minimization value for the metric based on its raw value.
+
+            The calculation is as follows:
+
+                | direction | lower | upper |     | score                              |
+                |-----------|-------|-------|-----|------------------------------------|
+                | minimize  | inf   | inf   |     | -value                             |
+                | minimize  | A     | inf   |     | -value                             |
+                | minimize  | inf   | B     |     | -value                             |
+                | minimize  | A     | B     |     | abs(B - value) / abs(B - A)  # 0-1 |
+                | ---       | ---   | ---   | --- | ---                                |
+                | maximize  | inf   | inf   |     | value                              |
+                | maximize  | A     | inf   |     | value                              |
+                | maximize  | inf   | B     |     | value                              |
+                | maximize  | A     | B     |     | abs(A - value) / abs(B - A) # 0 -1 |
+
+            Returns:
+                The cost of the metric.
+            """
+            value = self.value
+            lower, upper = self.definition.bounds
+            if self.definition.minimize:
+                if np.isinf(lower) or np.isinf(upper):
+                    return -value
+
+                return abs(upper - value) / abs(upper - lower)
+
+            if np.isinf(upper) or np.isinf(lower):
+                return value
+
+            return abs(lower - value) / abs(upper - lower)
diff --git a/src/mfpbench/pd1/benchmark.py b/src/mfpbench/pd1/benchmark.py
index dc98b65..acc080a 100644
--- a/src/mfpbench/pd1/benchmark.py
+++ b/src/mfpbench/pd1/benchmark.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import warnings
 from abc import abstractmethod
 from dataclasses import dataclass
 from pathlib import Path
@@ -12,6 +11,7 @@
 
 from mfpbench.benchmark import Benchmark
 from mfpbench.config import Config
+from mfpbench.metric import Metric
 from mfpbench.result import Result
 from mfpbench.setup_benchmark import PD1Source
 
@@ -19,6 +19,8 @@
     from ConfigSpace import ConfigurationSpace
     from xgboost import XGBRegressor
 
+PD1_FIDELITY_NAME = "epoch"
+
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
 class PD1Config(Config):
@@ -30,116 +32,60 @@ class PD1Config(Config):
     opt_momentum: float
 
 
-C = TypeVar("C", bound=PD1Config)
-
-
-@dataclass(frozen=True)  # type: ignore[misc]
-class PD1Result(Result[PD1Config, int]):
-    valid_error_rate: float  # (0, 1)
-    train_cost: float  #
-
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        return 1 - self.valid_error_rate
-
-    @property
-    def error(self) -> float:
-        """The error of interest."""
-        return self.valid_error_rate
-
-    @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        return 1 - self.valid_error_rate
-
-    @property
-    def val_error(self) -> float:
-        """The error on the validation set."""
-        return self.valid_error_rate
-
-    @property
-    def cost(self) -> float:
-        """The train cost of the model (asssumed to be seconds).
-
-        Please double check with YAHPO.
-        """
-        return self.train_cost
-
-
 @dataclass(frozen=True)  # type: ignore[misc]
-class PD1ResultSimple(PD1Result):
+class PD1ResultSimple(Result[PD1Config, int]):
     """Used for all PD1 benchmarks, except imagenet, lm1b, translate_wmt, uniref50."""
 
-    test_error_rate: float = np.inf
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "valid_error_rate": Metric(minimize=True, bounds=(0, np.inf)),
+        "test_error_rate": Metric(minimize=True, bounds=(0, np.inf)),
+        "train_cost": Metric(minimize=True, bounds=(0, np.inf)),
+    }
+    default_value_metric: ClassVar[str] = "valid_error_rate"
+    default_cost_metric: ClassVar[str] = "train_cost"
 
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        return 1 - self.test_error_rate
-
-    @property
-    def test_error(self) -> float:
-        """The error on the test set."""
-        return self.test_error_rate
+    valid_error_rate: Metric.Value
+    test_error_rate: Metric.Value
+    train_cost: Metric.Value
 
 
 @dataclass(frozen=True)
-class PD1ResultTransformer(PD1Result):
+class PD1ResultTransformer(Result[PD1Config, int]):
     """Imagenet, lm1b, translate_wmt, uniref50, cifar100 contains no test error."""
 
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        warnings.warn(
-            "Using valid error rate as there is no test error rate",
-            UserWarning,
-            stacklevel=2,
-        )
-        return 1 - self.valid_error_rate
-
-    @property
-    def test_error(self) -> float:
-        """The error on the test set."""
-        warnings.warn(
-            "Using valid error rate as there is no test error rate",
-            UserWarning,
-            stacklevel=2,
-        )
-        return self.valid_error_rate
-
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "valid_error_rate": Metric(minimize=True, bounds=(0, np.inf)),
+        "train_cost": Metric(minimize=True, bounds=(0, np.inf)),
+    }
+    default_value_metric: ClassVar[str] = "valid_error_rate"
+    default_cost_metric: ClassVar[str] = "train_cost"
 
-R = TypeVar("R", PD1ResultTransformer, PD1ResultSimple)
+    valid_error_rate: Metric.Value
+    train_cost: Metric.Value
 
 
-class PD1Benchmark(Benchmark[C, R, int]):
-    pd1_dataset: ClassVar[str]
-    """The dataset that this benchmark uses."""
+R = TypeVar("R", bound=Result)
 
-    pd1_model: ClassVar[str]
-    """The model that this benchmark uses."""
 
-    pd1_batchsize: ClassVar[int]
-    """The batchsize that this benchmark uses."""
+class PD1Benchmark(Benchmark[PD1Config, R, int]):
+    pd1_fidelity_range: ClassVar[tuple[int, int, int]]
+    """The fidelity range for this benchmark."""
 
-    pd1_metrics: ClassVar[tuple[str, ...]]
-    """The metrics that are available for this benchmark."""
+    pd1_name: ClassVar[str]
+    """The name to access surrogates from."""
 
-    Config: type[C]
-    """The config type for this benchmark."""
-
-    Result: type[R]
+    pd1_result_type: type[R]
     """The result type for this benchmark."""
 
-    has_conditionals = False
-
     def __init__(
         self,
         *,
         datadir: str | Path | None = None,
         seed: int | None = None,
-        prior: str | Path | C | Mapping[str, Any] | None = None,
+        prior: str | Path | PD1Config | Mapping[str, Any] | None = None,
         perturb_prior: float | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ):
         """Create a PD1 Benchmark.
 
@@ -151,10 +97,13 @@ def __init__(
                 is interpreted as the std of a normal from which to perturb
                 numerical hyperparameters of the prior, and the raw probability
                 of swapping a categorical value.
+            value_metric: The metric to use for this benchmark. Uses
+                the default metric from the Result if None.
+            cost_metric: The cost to use for this benchmark. Uses
+                the default cost from the Result if None.
         """
         cls = self.__class__
         space = cls._create_space(seed=seed)
-        name = f"{cls.pd1_dataset}-{cls.pd1_model}-{cls.pd1_batchsize}"
         if datadir is None:
             datadir = PD1Source.default_location()
 
@@ -169,10 +118,16 @@ def __init__(
 
         super().__init__(
             seed=seed,
-            name=name,
+            name=self.pd1_name,
+            config_type=PD1Config,
+            fidelity_name=PD1_FIDELITY_NAME,
+            fidelity_range=cls.pd1_fidelity_range,
+            result_type=cls.pd1_result_type,
             prior=prior,
             perturb_prior=perturb_prior,
             space=space,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
         )
 
     def load(self) -> None:
@@ -209,38 +164,47 @@ def surrogate_paths(self) -> dict[str, Path]:
         """The paths to the surrogates."""
         return {
             metric: self.surrogate_dir / f"{self.name}-{metric}.json"
-            for metric in self.pd1_metrics
+            for metric in self.Result.metric_defs
         }
 
     @override
-    def _objective_function(self, config: C, at: int) -> R:
+    def _objective_function(
+        self,
+        config: Mapping[str, Any],
+        at: int,
+    ) -> dict[str, float]:
         return self._results_for(config, fidelities=[at])[0]
 
     @override
-    def _trajectory(self, config: C, *, frm: int, to: int, step: int) -> list[R]:
-        return self._results_for(config, fidelities=self.iter_fidelities(frm, to, step))
-
-    def _results_for(self, config: C, fidelities: Iterable[int]) -> list[R]:
+    def _trajectory(
+        self,
+        config: Mapping[str, Any],
+        *,
+        frm: int,
+        to: int,
+        step: int,
+    ) -> Iterable[tuple[int, Mapping[str, float]]]:
+        fidelities = list(self.iter_fidelities(frm, to, step))
+        return zip(fidelities, self._results_for(config, fidelities))
+
+    def _results_for(
+        self,
+        config: Mapping[str, Any],
+        fidelities: Iterable[int],
+    ) -> list[dict[str, float]]:
         # Add the fidelities into the query and make a dataframe
-        c = config.dict()
+        c = dict(config)
         queries = [{**c, self.fidelity_name: f} for f in fidelities]
         xs = pd.DataFrame(queries)
 
         # Predict the metric for everything in the dataframe
         features = xs.columns
         for metric, surrogate in self.surrogates.items():
-            xs[metric] = surrogate.predict(xs[features])
+            # We clip as sometimes the surrogate produces negative values
+            xs[metric] = surrogate.predict(xs[features]).clip(min=0)
 
         metrics = list(self.surrogates.keys())
-
-        return [
-            self.Result.from_dict(
-                config=config,  # Our original config
-                fidelity=r[self.fidelity_name],  # fidelity  # type: ignore
-                result=r[metrics],  # Grab the metrics  # type: ignore
-            )
-            for _, r in xs.iterrows()
-        ]
+        return [dict(r[metrics]) for _, r in xs.iterrows()]
 
     @classmethod
     @abstractmethod
diff --git a/src/mfpbench/pd1/benchmarks/cifar100.py b/src/mfpbench/pd1/benchmarks/cifar100.py
index 2482936..977772f 100644
--- a/src/mfpbench/pd1/benchmarks/cifar100.py
+++ b/src/mfpbench/pd1/benchmarks/cifar100.py
@@ -1,34 +1,14 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing_extensions import override
-
 from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
 
-from mfpbench.pd1.benchmark import PD1Benchmark, PD1Config, PD1ResultTransformer
-
-
-@dataclass(frozen=True, eq=False, unsafe_hash=True)
-class PD1Config_cifar100_wideresnet_2048(PD1Config):
-    @override
-    def validate(self) -> None:
-        assert 0.010093 <= self.lr_decay_factor <= 0.989012
-        assert 0.000010 <= self.lr_initial <= 9.779176
-        assert 0.100708 <= self.lr_power <= 1.999376
-        assert 0.000059 <= self.opt_momentum <= 0.998993
+from mfpbench.pd1.benchmark import PD1Benchmark, PD1ResultTransformer
 
 
 class PD1cifar100_wideresnet_2048(PD1Benchmark):
-    fidelity_name = "epoch"
-    fidelity_range = (45, 199, 1)
-
-    Config = PD1Config_cifar100_wideresnet_2048
-    Result = PD1ResultTransformer
-
-    pd1_dataset = "cifar100"
-    pd1_model = "wide_resnet"
-    pd1_batchsize = 2048
-    pd1_metrics = ("valid_error_rate", "train_cost")
+    pd1_fidelity_range = (45, 199, 1)
+    pd1_result_type = PD1ResultTransformer
+    pd1_name = "cifar100-wideresnet-2048"
 
     @classmethod
     def _create_space(cls, seed: int | None = None) -> ConfigurationSpace:
diff --git a/src/mfpbench/pd1/benchmarks/imagenet.py b/src/mfpbench/pd1/benchmarks/imagenet.py
index 8714669..cf1e818 100644
--- a/src/mfpbench/pd1/benchmarks/imagenet.py
+++ b/src/mfpbench/pd1/benchmarks/imagenet.py
@@ -1,33 +1,14 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing_extensions import override
-
 from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
 
-from mfpbench.pd1.benchmark import PD1Benchmark, PD1Config, PD1ResultTransformer
-
-
-@dataclass(frozen=True, eq=False, unsafe_hash=True)
-class PD1Config_imagenet_resnet_512(PD1Config):
-    @override
-    def validate(self) -> None:
-        assert 0.010294 <= self.lr_decay_factor <= 0.989753
-        assert 0.000010 <= self.lr_initial <= 9.774312
-        assert 0.100225 <= self.lr_power <= 1.999326
-        assert 0.000059 <= self.opt_momentum <= 0.998993
+from mfpbench.pd1.benchmark import PD1Benchmark, PD1ResultTransformer
 
 
 class PD1imagenet_resnet_512(PD1Benchmark):
-    fidelity_name = "epoch"
-    fidelity_range = (3, 99, 1)
-    Config = PD1Config_imagenet_resnet_512
-    Result = PD1ResultTransformer
-
-    pd1_dataset = "imagenet"
-    pd1_model = "resnet"
-    pd1_batchsize = 512
-    pd1_metrics = ("valid_error_rate", "train_cost")
+    pd1_result_type = PD1ResultTransformer
+    pd1_fidelity_range = (3, 99, 1)
+    pd1_name = "imagenet-resnet-512"
 
     @classmethod
     def _create_space(cls, seed: int | None = None) -> ConfigurationSpace:
diff --git a/src/mfpbench/pd1/benchmarks/lm1b.py b/src/mfpbench/pd1/benchmarks/lm1b.py
index 6ac2b3a..7a9840d 100644
--- a/src/mfpbench/pd1/benchmarks/lm1b.py
+++ b/src/mfpbench/pd1/benchmarks/lm1b.py
@@ -1,37 +1,16 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing_extensions import override
-
 from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
 
-from mfpbench.pd1.benchmark import PD1Benchmark, PD1Config, PD1ResultTransformer
-
-
-@dataclass(frozen=True, eq=False, unsafe_hash=True)
-class PD1Config_lm1b_transformer_2048(PD1Config):
-    @override
-    def validate(self) -> None:
-        assert 0.010543 <= self.lr_decay_factor <= 9.885653e-01
-        assert 0.000010 <= self.lr_initial <= 9.986256e00
-        assert 0.100811 <= self.lr_power <= 1.999659e00
-        assert 0.000059 <= self.opt_momentum <= 9.989986e-01
+from mfpbench.pd1.benchmark import PD1Benchmark, PD1ResultTransformer
 
 
 class PD1lm1b_transformer_2048(PD1Benchmark):
-    fidelity_name = "epoch"
-    fidelity_range = (1, 74, 1)
-
-    Config = PD1Config_lm1b_transformer_2048
-    Result = PD1ResultTransformer
-
-    pd1_dataset = "lm1b"
-    pd1_model = "transformer"
-    pd1_batchsize = 2048
-    pd1_metrics = ("valid_error_rate", "train_cost")
+    pd1_fidelity_range = (1, 74, 1)
+    pd1_result_type = PD1ResultTransformer
+    pd1_name = "lm1b-transformer-2048"
 
     @classmethod
-    @override
     def _create_space(cls, seed: int | None = None) -> ConfigurationSpace:
         cs = ConfigurationSpace(seed=seed)
         cs.add_hyperparameters(
diff --git a/src/mfpbench/pd1/benchmarks/translate_wmt.py b/src/mfpbench/pd1/benchmarks/translate_wmt.py
index bf7adff..dd14bf8 100644
--- a/src/mfpbench/pd1/benchmarks/translate_wmt.py
+++ b/src/mfpbench/pd1/benchmarks/translate_wmt.py
@@ -1,34 +1,14 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing_extensions import override
-
 from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
 
-from mfpbench.pd1.benchmark import PD1Benchmark, PD1Config, PD1ResultTransformer
-
-
-@dataclass(frozen=True, eq=False, unsafe_hash=True)
-class PD1Config_translatewmt_xformer_64(PD1Config):
-    @override
-    def validate(self) -> None:
-        assert 0.0100221257 <= self.lr_decay_factor <= 0.988565263
-        assert 1.00276e-05 <= self.lr_initial <= 9.8422475735
-        assert 0.1004250993 <= self.lr_power <= 1.9985927056
-        assert 5.86114e-05 <= self.opt_momentum <= 0.9989999746
+from mfpbench.pd1.benchmark import PD1Benchmark, PD1ResultTransformer
 
 
 class PD1translatewmt_xformer_64(PD1Benchmark):
-    fidelity_name = "epoch"
-    fidelity_range = (1, 19, 1)
-
-    Config = PD1Config_translatewmt_xformer_64
-    Result = PD1ResultTransformer
-
-    pd1_dataset = "translate_wmt"
-    pd1_model = "xformer_translate"
-    pd1_batchsize = 64
-    pd1_metrics = ("valid_error_rate", "train_cost")
+    pd1_fidelity_range = (1, 19, 1)
+    pd1_result_type = PD1ResultTransformer
+    pd1_name = "translate-wmt-xformer-64"
 
     @classmethod
     def _create_space(cls, seed: int | None = None) -> ConfigurationSpace:
diff --git a/src/mfpbench/pd1/benchmarks/uniref50.py b/src/mfpbench/pd1/benchmarks/uniref50.py
index 2111fab..8ca1943 100644
--- a/src/mfpbench/pd1/benchmarks/uniref50.py
+++ b/src/mfpbench/pd1/benchmarks/uniref50.py
@@ -1,34 +1,14 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing_extensions import override
-
 from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
 
-from mfpbench.pd1.benchmark import PD1Benchmark, PD1Config, PD1ResultTransformer
-
-
-@dataclass(frozen=True, eq=False, unsafe_hash=True)
-class PD1Config_uniref50_transformer_128(PD1Config):
-    @override
-    def validate(self) -> None:
-        assert 0.0111588123 <= self.lr_decay_factor <= 0.9898713967
-        assert 1.00564e-05 <= self.lr_initial <= 0.4429248972
-        assert 0.1001570089 <= self.lr_power <= 1.9989163336
-        assert 5.86114e-05 <= self.opt_momentum <= 0.9989940217
+from mfpbench.pd1.benchmark import PD1Benchmark, PD1ResultTransformer
 
 
 class PD1uniref50_transformer_128(PD1Benchmark):
-    fidelity_name = "epoch"
-    fidelity_range = (1, 22, 1)
-
-    Config = PD1Config_uniref50_transformer_128
-    Result = PD1ResultTransformer
-
-    pd1_dataset = "uniref50"
-    pd1_model = "transformer"
-    pd1_batchsize = 128
-    pd1_metrics = ("valid_error_rate", "train_cost")
+    pd1_fidelity_range = (1, 22, 1)
+    pd1_result_type = PD1ResultTransformer
+    pd1_name = "uniref50-transformer-128"
 
     @classmethod
     def _create_space(cls, seed: int | None = None) -> ConfigurationSpace:
diff --git a/src/mfpbench/pd1/surrogate/train_xgboost.py b/src/mfpbench/pd1/surrogate/train_xgboost.py
index e6ce503..7c9100e 100644
--- a/src/mfpbench/pd1/surrogate/train_xgboost.py
+++ b/src/mfpbench/pd1/surrogate/train_xgboost.py
@@ -55,7 +55,7 @@ def train_xgboost(
 if __name__ == "__main__":
     import argparse
 
-    from xgboost import XGBRegressor  # noqa: F811
+    from xgboost import XGBRegressor
 
     parser = argparse.ArgumentParser()
     parser.add_argument("--data", required=True, type=str)
diff --git a/src/mfpbench/result.py b/src/mfpbench/result.py
index e2903f2..20c63af 100644
--- a/src/mfpbench/result.py
+++ b/src/mfpbench/result.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
-from abc import ABC, abstractmethod
-from dataclasses import asdict, dataclass, field, fields
-from typing import Any, Generic, Mapping, TypeVar
-from typing_extensions import Self, override
+from abc import ABC
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, ClassVar, Generic, Mapping, TypeVar
+from typing_extensions import Self
 
 from mfpbench.config import Config
 
+if TYPE_CHECKING:
+    from mfpbench.metric import Metric
+
 # The Config kind
 C = TypeVar("C", bound=Config)
 
@@ -18,194 +21,97 @@
 class Result(ABC, Generic[C, F]):
     """Collect all results in a class for clarity."""
 
+    metric_defs: ClassVar[Mapping[str, Metric]]
+    """The metric definitions of this result."""
+
+    default_value_metric: ClassVar[str]
+    """The default metric to use for this result."""
+
+    default_cost_metric: ClassVar[str]
+    """The default cost to use for this result."""
+
     fidelity: F
     """The fidelity of this result."""
 
-    config: C = field(repr=False)
+    config: C
     """The config used to generate this result."""
 
+    value_metric: str
+    """The metric to use for this result."""
+
+    cost_metric: str
+    """The cost to use for this result."""
+
     @classmethod
     def from_dict(
         cls,
         config: C,
-        result: Mapping[str, Any],
         fidelity: F,
+        result: Mapping[str, float],
+        *,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
+        renames: Mapping[str, str] | None = None,
     ) -> Self:
         """Create from a dict or mapping object."""
-        fieldnames = set(cls.names())
-        if not fieldnames.issubset(result.keys()):
-            raise ValueError(
-                f"Result dict is missing fields: {fieldnames - result.keys()}",
+        values = {
+            k: (
+                metric.as_value(v)
+                if (metric := cls.metric_defs.get(k)) is not None
+                else v
             )
-        # To help with serialization, we need to convert floats to... ehh floats
-        # This is due to some things returning an np.float -_-
-        result = {
-            k: float(v) if isinstance(v, float) else v
             for k, v in result.items()
-            if k in fieldnames
         }
-        return cls(config=config, fidelity=fidelity, **result)
-
-    @classmethod
-    def names(cls) -> tuple[str, ...]:
-        """The names of the fields in this result."""
-        return tuple(
-            f.name for f in fields(cls) if f.name not in ("config", "fidelity")
+        if renames is not None:
+            values = {renames.get(k, k): v for k, v in values.items()}
+        if value_metric is None:
+            value_metric = cls.default_value_metric
+        if cost_metric is None:
+            cost_metric = cls.default_cost_metric
+
+        return cls(
+            config=config,
+            fidelity=fidelity,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
+            **values,  # type: ignore
         )
 
-    @classmethod
-    def from_row(
-        cls,
-        config: C,
-        row: Mapping[str, Any],
-        fidelity: F,
-    ) -> Self:
-        """Create from a row of a dataframe."""
-        return cls.from_dict(config, dict(row), fidelity)
-
-    @property
-    @abstractmethod
-    def score(self) -> float:
-        """The score of interest."""
-        ...
-
-    @property
-    @abstractmethod
-    def error(self) -> float:
-        """The error of interest."""
-        ...
-
-    @property
-    @abstractmethod
-    def test_score(self) -> float:
-        """The score on the test set."""
-        ...
-
-    @property
-    @abstractmethod
-    def test_error(self) -> float:
-        """The error on the test set."""
-        ...
-
-    @property
-    @abstractmethod
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        ...
+    def as_dict(self) -> dict[str, Any]:
+        """As a raw dictionary."""
+        return self.values
 
-    @property
-    @abstractmethod
-    def val_error(self) -> float:
-        """The score on the validation set."""
-        ...
+    def __getitem__(self, key: str) -> Metric.Value:
+        if key not in self.metric_defs:
+            raise KeyError(f"Metric {key} not in {self.metric_defs.keys()}")
+        return getattr(self, key)
 
     @property
-    @abstractmethod
     def cost(self) -> float:
         """The time cost for evaluting this config."""
-        ...
-
-    def dict(self) -> dict[str, Any]:
-        """Create a dict from this result."""
-        d = asdict(self)
-        del d["config"]
-        del d["fidelity"]
-        return d
-
-
-@dataclass(frozen=True, eq=False)  # type: ignore[misc]
-class GenericTabularResult(Result[C, F], Generic[C, F]):
-    """A generic tabular result.
-
-    This is useful for adhoc tabular benchmarks.
-    """
-
-    _values: dict[str, Any]
-
-    def __hash__(self) -> int:
-        """Hash based on the dictionary repr."""
-        return (
-            hash(self.config) ^ hash(self.fidelity) ^ hash(tuple(self._values.items()))
-        )
-
-    def dict(self) -> Any:
-        """As a raw dictionary."""
-        return dict(self._values)
-
-    def __getitem__(self, key: str) -> Any:
-        return self._values[key]
-
-    # Make .property acces work
-    def __getattr__(self, __name: str) -> Any:
-        return self._values[__name]
-
-    @override
-    @classmethod
-    def from_dict(cls, config: C, result: Mapping[str, Any], fidelity: F) -> Self:
-        """Create from a dict or mapping object."""
-        return cls(config=config, _values=dict(result), fidelity=fidelity)
-
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        if "score" in self._values:
-            return float(self._values["score"])
-
-        raise KeyError("GenericTabularResult does not have a score")
+        return self[self.cost_metric].error
 
     @property
     def error(self) -> float:
         """The error of interest."""
-        if "error" in self._values:
-            return float(self._values["error"])
-
-        raise KeyError("GenericTabularResult does not have an error")
+        return self[self.value_metric].error
 
     @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        if "test_score" in self._values:
-            return float(self._values["test_score"])
-
-        raise KeyError("GenericTabularResult does not have a test_score")
+    def score(self) -> float:
+        """The score of interest."""
+        return self[self.value_metric].score
 
     @property
-    def test_error(self) -> float:
-        """The error on the test set."""
-        if "test_error" in self._values:
-            return float(self._values["test_error"])
-
-        raise KeyError("GenericTabularResult does not have a test_error")
+    def values(self) -> dict[str, Any]:
+        """Create a dict from this result with the raw values."""
+        return {k: getattr(self, k).value for k in self.metric_defs}
 
     @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        if "val_score" in self._values:
-            return float(self._values["val_score"])
-
-        raise KeyError("GenericTabularResult does not have a val_score")
+    def errors(self) -> dict[str, float]:
+        """Create a dict from this result with the error values."""
+        return {k: getattr(self, k).error for k in self.metric_defs}
 
     @property
-    def val_error(self) -> float:
-        """The score on the validation set."""
-        if "val_error" in self._values:
-            return float(self._values["val_error"])
-
-        raise KeyError("GenericTabularResult does not have a val_error")
-
-    @property
-    def cost(self) -> float:
-        """The time cost for evaluting this config."""
-        if "cost" in self._values:
-            return float(self._values["cost"])
-
-        raise KeyError("GenericTabularResult does not have a cost")
-
-    @classmethod
-    def names(cls) -> tuple[str, ...]:
-        """The names of the fields in this result."""
-        return tuple(
-            f.name
-            for f in fields(cls)
-            if f.name not in ("config", "fidelity", "__values")
-        )
+    def scores(self) -> dict[str, float]:
+        """Create a dict from this result with the score values."""
+        return {k: getattr(self, k).score for k in self.metric_defs}
diff --git a/src/mfpbench/setup_benchmark.py b/src/mfpbench/setup_benchmark.py
index 1105b5f..7521477 100644
--- a/src/mfpbench/setup_benchmark.py
+++ b/src/mfpbench/setup_benchmark.py
@@ -263,7 +263,7 @@ def download_status(source: str, datadir: Path | None = None) -> bool:
     _source = BenchmarkSetup.source(source)
     source_path = datadir / _source.name
     return source_path.exists() and bool(
-        next(source_path.iterdir(), False),  # noqa: FBT003
+        next(source_path.iterdir(), False),
     )
 
 
@@ -366,7 +366,6 @@ def setup(
             print(f"Finished downloading to {source_path}")
         else:
             print(f"Already found something at {source_path}")
-            pass
 
     if install is not False:
         if install is True:
diff --git a/src/mfpbench/synthetic/__init__.py b/src/mfpbench/synthetic/__init__.py
index 82184f0..ac3b59e 100644
--- a/src/mfpbench/synthetic/__init__.py
+++ b/src/mfpbench/synthetic/__init__.py
@@ -15,7 +15,6 @@
     MFHartmann6Config,
     MFHartmannBenchmark,
     MFHartmannGenerator,
-    MFHartmannResult,
 )
 
 __all__ = [
@@ -35,5 +34,4 @@
     "MFHartmann3",
     "MFHartmann6",
     "MFHartmannGenerator",
-    "MFHartmannResult",
 ]
diff --git a/src/mfpbench/synthetic/hartmann/__init__.py b/src/mfpbench/synthetic/hartmann/__init__.py
index 6850fc1..db1077e 100644
--- a/src/mfpbench/synthetic/hartmann/__init__.py
+++ b/src/mfpbench/synthetic/hartmann/__init__.py
@@ -12,7 +12,6 @@
     MFHartmann6BenchmarkTerrible,
     MFHartmann6Config,
     MFHartmannBenchmark,
-    MFHartmannResult,
 )
 from mfpbench.synthetic.hartmann.generators import (
     MFHartmann3,
@@ -37,5 +36,4 @@
     "MFHartmann3",
     "MFHartmann6",
     "MFHartmannGenerator",
-    "MFHartmannResult",
 ]
diff --git a/src/mfpbench/synthetic/hartmann/benchmark.py b/src/mfpbench/synthetic/hartmann/benchmark.py
index 55458ea..3362feb 100644
--- a/src/mfpbench/synthetic/hartmann/benchmark.py
+++ b/src/mfpbench/synthetic/hartmann/benchmark.py
@@ -15,10 +15,12 @@
 from typing import Any, ClassVar, Generic, Mapping, TypeVar
 from typing_extensions import override
 
+import numpy as np
 from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
 
 from mfpbench.benchmark import Benchmark
 from mfpbench.config import Config
+from mfpbench.metric import Metric
 from mfpbench.result import Result
 from mfpbench.synthetic.hartmann.generators import (
     MFHartmann3,
@@ -26,6 +28,8 @@
     MFHartmannGenerator,
 )
 
+G = TypeVar("G", bound=MFHartmannGenerator)
+
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
 class MFHartmann3Config(Config):
@@ -33,12 +37,6 @@ class MFHartmann3Config(Config):
     X_1: float
     X_2: float
 
-    def validate(self) -> None:
-        """Validate this config."""
-        assert 0.0 <= self.X_0 <= 1.0
-        assert 0.0 <= self.X_1 <= 1.0
-        assert 0.0 <= self.X_2 <= 1.0
-
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
 class MFHartmann6Config(Config):
@@ -49,93 +47,58 @@ class MFHartmann6Config(Config):
     X_4: float
     X_5: float
 
-    def validate(self) -> None:
-        """Validate this config."""
-        assert 0.0 <= self.X_0 <= 1.0
-        assert 0.0 <= self.X_1 <= 1.0
-        assert 0.0 <= self.X_2 <= 1.0
-        assert 0.0 <= self.X_3 <= 1.0
-        assert 0.0 <= self.X_4 <= 1.0
-        assert 0.0 <= self.X_5 <= 1.0
-
-
-C = TypeVar("C", MFHartmann3Config, MFHartmann6Config)
-
 
 @dataclass(frozen=True)  # type: ignore[misc]
-class MFHartmannResult(Result[C, int]):
-    value: float
-    fid_cost: float
+class MFHartmann3Result(Result[MFHartmann3Config, int]):
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        # TODO: There's probably some analytical upper bound...
+        "value": Metric(minimize=True, bounds=(-3.86278, np.inf)),
+        "fid_cost": Metric(minimize=True, bounds=(0.05, 1)),
+    }
+    default_value_metric: ClassVar[str] = "value"
+    default_cost_metric: ClassVar[str] = "fid_cost"
 
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        # TODO: what should be an appropriate score since flipping signs may not be
-        #  adequate or meaningful. When is the property score used?
-        # Hartmann functions have multiple minimas with the global valued at < 0
-        # The function evaluates to a y-value that needs to be minimized
-        #  https://www.sfu.ca/~ssurjano/hart3.html
-        raise NotImplementedError("There's no meaninfgul score for Hartmann functions")
+    value: Metric.Value
+    fid_cost: Metric.Value
 
-    @property
-    def error(self) -> float:
-        """The score of interest."""
-        # TODO: verify
-        # Hartmann functions have multiple minimas with the global valued at < 0
-        # The function evaluates to a y-value that needs to be minimized
-        #  https://www.sfu.ca/~ssurjano/hart3.html
-        return self.value
 
-    @property
-    def test_score(self) -> float:
-        """Just returns the score."""
-        raise NotImplementedError("There's no meaninfgul score for Hartmann functions")
+@dataclass(frozen=True)  # type: ignore[misc]
+class MFHartmann6Result(Result[MFHartmann6Config, int]):
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        # TODO: There's probably some analytical upper bound...
+        "value": Metric(minimize=True, bounds=(-3.32237, np.inf)),
+        "fid_cost": Metric(minimize=True, bounds=(0.05, 1)),
+    }
+    default_value_metric: ClassVar[str] = "value"
+    default_cost_metric: ClassVar[str] = "fid_cost"
 
-    @property
-    def test_error(self) -> float:
-        """Just returns the error."""
-        return self.error
+    value: Metric.Value
+    fid_cost: Metric.Value
 
-    @property
-    def val_score(self) -> float:
-        """Just returns the score."""
-        raise NotImplementedError("There's no meaninfgul score for Hartmann functions")
 
-    @property
-    def val_error(self) -> float:
-        """Just returns the error."""
-        return self.error
+C = TypeVar("C", bound=Config)
+R = TypeVar("R", bound=Result)
 
-    @property
-    def cost(self) -> float:
-        """Just retuns the fidelity."""
-        # return self.fidelity
-        return self.fid_cost
 
+class MFHartmannBenchmark(Benchmark[C, R, int], Generic[G, C, R]):
+    mfh_generator_type: type[G]
+    """The underlying mfhartmann function generator."""
 
-G = TypeVar("G", bound=MFHartmannGenerator)
+    mfh_config_type: type[C]
+    """The config type for this benchmark."""
 
+    mfh_result_type: type[R]
+    """The result type for this benchmark."""
 
-class MFHartmannBenchmark(Benchmark, Generic[G, C]):
     mfh_dims: ClassVar[int]
     """How many dimensions there are to the Hartmann function."""
 
     mfh_suffix: ClassVar[str]
     """Suffix for the benchmark name"""
 
-    Config: type[C]
-    """The Config type for this mfhartmann benchmark."""
-
-    Generator: type[G]
-    """The underlying mfhartmann function generator."""
-
     mfh_bias_noise: ClassVar[tuple[float, float]] = (0.5, 0.1)
     """The default bias and noise for mfhartmann benchmarks."""
 
-    fidelity_name = "z"
-    fidelity_range = (3, 100, 1)
-    Result = MFHartmannResult
-
     def __init__(
         self,
         *,
@@ -144,6 +107,8 @@ def __init__(
         noise: float | None = None,
         prior: str | Path | C | Mapping[str, Any] | None = None,
         perturb_prior: float | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ):
         """Initialize the benchmark.
 
@@ -160,12 +125,19 @@ def __init__(
             perturb_prior: If not None, will perturb the prior by this amount.
                 For numericals, while for categoricals, this is interpreted as
                 the probability of swapping the value for a random one.
+            value_metric: The metric to use for this benchmark. Uses
+                the default metric from the Result if None.
+            cost_metric: The cost to use for this benchmark. Uses
+                the default cost from the Result if None.
         """
         cls = self.__class__
         self.bias = bias if bias is not None else cls.mfh_bias_noise[0]
         self.noise = noise if noise is not None else cls.mfh_bias_noise[1]
-        self.mfh = cls.Generator(
-            n_fidelities=cls.fidelity_range[1],
+
+        _max_fidelity = 100
+
+        self.mfh = cls.mfh_generator_type(
+            n_fidelities=_max_fidelity,
             fidelity_noise=self.noise,
             fidelity_bias=self.bias,
             seed=seed,
@@ -185,27 +157,31 @@ def __init__(
         )
         super().__init__(
             name=name,
+            config_type=self.mfh_config_type,
+            result_type=self.mfh_result_type,
+            fidelity_name="z",
+            fidelity_range=(3, _max_fidelity, 1),
             space=space,
             seed=seed,
             prior=prior,
             perturb_prior=perturb_prior,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
         )
 
     @override
-    def _objective_function(self, config: C, *, at: int) -> MFHartmannResult[C]:
-        query = config.dict()
+    def _objective_function(
+        self,
+        config: Mapping[str, Any],
+        *,
+        at: int,
+    ) -> dict[str, float]:
+        query = dict(config)
 
         # It's important here that we still have X_0, X_1, ..., X_n
         # We strip out the numerical part and sort by that
         Xs = tuple(query[s] for s in sorted(query, key=lambda k: int(k.split("_")[-1])))
-        value = self.mfh(z=at, Xs=Xs)
-        cost = self._fidelity_cost(at)
-
-        return self.Result.from_dict(
-            config=config,
-            fidelity=at,
-            result={"value": value, "fid_cost": cost},
-        )
+        return {"value": self.mfh(z=at, Xs=Xs), "fid_cost": self._fidelity_cost(at)}
 
     def _fidelity_cost(self, at: int) -> float:
         # λ(z) on Pg 18 from https://arxiv.org/pdf/1703.06240.pdf
@@ -214,16 +190,23 @@ def _fidelity_cost(self, at: int) -> float:
     @property
     def optimum(self) -> C:
         """The optimum of the benchmark."""
-        optimum = {f"X_{i}": x for i, x in enumerate(self.Generator.optimum)}
+        optimum = {f"X_{i}": x for i, x in enumerate(self.mfh_generator_type.optimum)}
         return self.Config.from_dict(optimum)
 
 
 # -----------
 # MFHartmann3
 # -----------
-class MFHartmann3Benchmark(MFHartmannBenchmark):
-    Generator = MFHartmann3
-    Config = MFHartmann3Config
+class MFHartmann3Benchmark(
+    MFHartmannBenchmark[
+        MFHartmann3,
+        MFHartmann3Config,
+        MFHartmann3Result,
+    ],
+):
+    mfh_generator_type = MFHartmann3
+    mfh_config_type = MFHartmann3Config
+    mfh_result_type = MFHartmann3Result
     mfh_dims = MFHartmann3.dims
     mfh_suffix = ""
 
@@ -251,9 +234,16 @@ class MFHartmann3BenchmarkGood(MFHartmann3Benchmark):
 # -----------
 # MFHartmann6
 # -----------
-class MFHartmann6Benchmark(MFHartmannBenchmark):
-    Generator = MFHartmann6
-    Config = MFHartmann6Config
+class MFHartmann6Benchmark(
+    MFHartmannBenchmark[
+        MFHartmann6,
+        MFHartmann6Config,
+        MFHartmann6Result,
+    ],
+):
+    mfh_generator_type = MFHartmann6
+    mfh_config_type = MFHartmann6Config
+    mfh_result_type = MFHartmann6Result
     mfh_dims = MFHartmann6.dims
     mfh_suffix = ""
 
diff --git a/src/mfpbench/tabular.py b/src/mfpbench/tabular.py
index d95f309..14333d5 100644
--- a/src/mfpbench/tabular.py
+++ b/src/mfpbench/tabular.py
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
-from datetime import datetime
 from pathlib import Path
-from typing import Any, Callable, Mapping, Sequence, TypeVar, overload
+from typing import TYPE_CHECKING, Any, Iterable, Mapping, TypeVar, overload
 from typing_extensions import override
 
 import numpy as np
@@ -11,8 +10,12 @@
 from more_itertools import first_true
 
 from mfpbench.benchmark import Benchmark
-from mfpbench.config import GenericTabularConfig, TabularConfig
-from mfpbench.result import GenericTabularResult, Result
+from mfpbench.config import TabularConfig
+from mfpbench.result import Result
+
+if TYPE_CHECKING:
+    from mfpbench.metric import Metric
+
 
 # The kind of Config to the **tabular** benchmark
 CTabular = TypeVar("CTabular", bound=TabularConfig)
@@ -25,31 +28,6 @@
 
 
 class TabularBenchmark(Benchmark[CTabular, R, F]):
-    id_key: str
-    """The column in the table that contains the config id. Will be set to the index"""
-
-    fidelity_key: str
-    """The name of the fidelity used in this benchmark"""
-
-    config_keys: Sequence[str]
-    """The keys in the table that contain the config"""
-
-    result_keys: Sequence[str]
-    """The keys in the table that contain the results"""
-
-    table: pd.DataFrame
-    """The table of results used for this benchmark"""
-
-    configs: Mapping[str, CTabular]
-    """The configs used in this benchmark"""
-
-    # The config and result type of this benchmark
-    Config: type[CTabular]
-    Result: type[R]
-
-    # Whether this benchmark has conditonals in it or not
-    has_conditionals: bool = False
-
     def __init__(  # noqa: PLR0913
         self,
         name: str,
@@ -57,9 +35,10 @@ def __init__(  # noqa: PLR0913
         *,
         id_key: str,
         fidelity_key: str,
-        result_keys: Sequence[str],
-        config_keys: Sequence[str],
-        remove_constants: bool = False,
+        result_type: type[R],
+        config_type: type[CTabular],
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
         space: ConfigurationSpace | None = None,
         seed: int | None = None,
         prior: str | Path | CTabular | Mapping[str, Any] | None = None,
@@ -72,9 +51,12 @@ def __init__(  # noqa: PLR0913
             table: The table to use for the benchmark.
             id_key: The column in the table that contains the config id
             fidelity_key: The column in the table that contains the fidelity
-            result_keys: The columns in the table that contain the results
-            config_keys: The columns in the table that contain the config values
-            remove_constants: Remove constant config columns from the data or not.
+            result_type: The result type for this benchmark.
+            config_type: The config type for this benchmark.
+            value_metric: The metric to use for this benchmark. Uses
+                the default metric from the Result if None.
+            cost_metric: The cost to use for this benchmark. Uses
+                the default cost from the Result if None.
             space: The configuration space to use for the benchmark. If None, will
                 just be an empty space.
             prior: The prior to use for the benchmark. If None, no prior is used.
@@ -87,8 +69,6 @@ def __init__(  # noqa: PLR0913
                 probability of swapping the value for a random one.
             seed: The seed to use for the benchmark.
         """
-        cls = self.__class__
-
         # Make sure we work with a clean slate, no issue with index.
         table = table.reset_index()
 
@@ -99,9 +79,13 @@ def __init__(  # noqa: PLR0913
         if fidelity_key not in table.columns:
             raise ValueError(f"'{fidelity_key=}' not in columns {table.columns}")
 
+        result_keys: list[str] = list(result_type.metric_defs.keys())
         if not all(key in table.columns for key in result_keys):
-            raise ValueError(f"{result_keys=} not in columns {table.columns}")
+            raise ValueError(
+                f"Not all {result_keys=} not in columns {table.columns}",
+            )
 
+        config_keys: list[str] = config_type.names()
         if not all(key in table.columns for key in config_keys):
             raise ValueError(f"{config_keys=} not in columns {table.columns}")
 
@@ -112,19 +96,6 @@ def __init__(  # noqa: PLR0913
                 " Please drop it or rename it.",
             )
 
-        # Remove constants from the table
-        if remove_constants:
-
-            def is_constant(_s: pd.Series) -> bool:
-                _arr = _s.to_numpy()
-                return bool((_arr == _arr[0]).all())
-
-            constant_cols = [
-                col for col in table.columns if is_constant(table[col])  # type: ignore
-            ]
-            table = table.drop(columns=constant_cols)  # type: ignore
-            config_keys = [k for k in config_keys if k not in constant_cols]
-
         # Remap their id column to `id`
         table = table.rename(columns={id_key: "id"})
 
@@ -169,7 +140,7 @@ def is_constant(_s: pd.Series) -> bool:
         #   ...
         id_table = table.groupby(level="id").agg("first")
         configs = {
-            str(config_id): cls.Config.from_dict(
+            str(config_id): config_type.from_dict(
                 {
                     **row[config_keys].to_dict(),  # type: ignore
                     "id": str(config_id),
@@ -184,27 +155,48 @@ def is_constant(_s: pd.Series) -> bool:
 
         self.table = table
         self.configs = configs
-        self.fidelity_key = fidelity_key
         self.id_key = id_key
+        self.fidelity_key = fidelity_key
         self.config_keys = sorted(config_keys)
         self.result_keys = sorted(result_keys)
-        self.fidelity_range = (start, end, step)  # type: ignore
 
         super().__init__(
             name=name,
             seed=seed,
+            config_type=config_type,
+            result_type=result_type,
+            fidelity_name=fidelity_key,
+            fidelity_range=(start, end, step),
             space=space,
             prior=prior,
             perturb_prior=perturb_prior,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
         )
 
+        _raw_optimums = {
+            (k, metric): (
+                float(table[k].min()) if metric.minimize else float(table[k].max())
+            )
+            for k, metric in self.Result.metric_defs.items()
+        }
+        self.table_optimums: dict[str, Metric.Value] = {
+            k: metric.as_value(v) for (k, metric), v in _raw_optimums.items()
+        }
+
+        if self.value_metric not in self.result_keys:
+            raise ValueError(f"{self.value_metric=} not in {self.result_keys}")
+
+        if self.cost_metric not in self.result_keys:
+            raise ValueError(f"{self.cost_metric=} not in {self.result_keys}")
+
     def query(
         self,
         config: CTabular | Mapping[str, Any] | str,
-        at: F | None = None,
         *,
-        argmax: str | None = None,
-        argmin: str | None = None,
+        at: F | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ) -> R:
         """Submit a query and get a result.
 
@@ -228,20 +220,36 @@ def query(
         Args:
             config: The query to use
             at: The fidelity at which to query, defaults to None which means *maximum*
-            argmax: Whether to return the argmax up to the point `at`. Will be slower as
-                it has to get the entire trajectory. Uses the key from the Results.
-            argmin: Whether to return the argmin up to the point `at`. Will be slower as
-                it has to get the entire trajectory. Uses the key from the Results.
+            value_metric: The metric to use for this result. Uses
+                the value metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
+            cost_metric: The metric to use for this result. Uses
+                the cost metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
 
         Returns:
             The result of the query
         """
         _config = self._find_config(config)
-        return super().query(
-            _config,
-            at=at,  # type: ignore
-            argmax=argmax,
-            argmin=argmin,
+
+        at = at if at is not None else self.end
+        assert self.start <= at <= self.end
+
+        __config = _config.as_dict(with_id=True)
+        if self._config_renames is not None:
+            _reverse_renames = {v: k for k, v in self._config_renames.items()}
+            __config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}
+
+        value_metric = value_metric if value_metric is not None else self.value_metric
+        cost_metric = cost_metric if cost_metric is not None else self.cost_metric
+
+        return self.Result.from_dict(
+            config=config,
+            fidelity=at,
+            result=self._objective_function(__config, at=at),
+            value_metric=str(value_metric),
+            cost_metric=str(cost_metric),
+            renames=self._result_renames,
         )
 
     @override
@@ -252,6 +260,8 @@ def trajectory(
         frm: F | None = None,
         to: F | None = None,
         step: F | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ) -> list[R]:
         """Submit a query and get a result.
 
@@ -277,12 +287,46 @@ def trajectory(
             frm: Start of the curve, should default to the start
             to: End of the curve, should default to the total
             step: Step size, defaults to ``cls.default_step``
+            value_metric: The metric to use for this result. Uses
+                the value metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
+            cost_metric: The metric to use for this result. Uses
+                the cost metric passed in to the constructor if not specified,
+                otherwise the default metric from the Result if None.
 
         Returns:
             The result of the query
         """
         _config = self._find_config(config)
-        return super().trajectory(_config, frm=frm, to=to, step=step)  # type: ignore
+
+        to = to if to is not None else self.end
+        frm = frm if frm is not None else self.start
+        step = step if step is not None else self.step
+
+        __config = _config.as_dict(with_id=True)
+        if self._config_renames is not None:
+            _reverse_renames = {v: k for k, v in self._config_renames.items()}
+            __config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}
+
+        value_metric = value_metric if value_metric is not None else self.value_metric
+        cost_metric = cost_metric if cost_metric is not None else self.cost_metric
+
+        return [
+            self.Result.from_dict(
+                config=config,
+                fidelity=fidelity,
+                result=result,
+                value_metric=str(value_metric),
+                cost_metric=str(cost_metric),
+                renames=self._result_renames,
+            )
+            for fidelity, result in self._trajectory(
+                __config,
+                frm=frm,
+                to=to,
+                step=step,
+            )
+        ]
 
     def _find_config(
         self,
@@ -299,6 +343,10 @@ def _find_config(
 
         # If's a Config, that's fine
         if isinstance(config, self.Config):
+            if config.id not in self.configs:
+                raise ValueError(
+                    f"Config {config.id} not in {self.configs.keys()}",
+                )
             return config
 
         # At this point, we assume we're basically dealing with a dictionary
@@ -319,7 +367,7 @@ def _find_config(
         # id that way
         match = first_true(
             self.configs.values(),
-            pred=lambda c: c == config,  # type: ignore
+            pred=lambda c: c.as_dict(with_id=False) == config,  # type: ignore
             default=None,
         )
         if match is None:
@@ -330,7 +378,12 @@ def _find_config(
         return match
 
     @override
-    def _objective_function(self, config: CTabular, at: F) -> R:
+    def _objective_function(
+        self,
+        config: Mapping[str, Any],
+        *,
+        at: F,
+    ) -> Mapping[str, float]:
         """Submit a query and get a result.
 
         Args:
@@ -340,12 +393,46 @@ def _objective_function(self, config: CTabular, at: F) -> R:
         Returns:
             The result of the query
         """
-        row = self.table.loc[(config.id, at)]
+        config = dict(config)
+        _id = config.pop("id")
+        row = self.table.loc[(_id, at)]
+
+        row.name = _id
+        _config = dict(row[self.config_keys])
+        if config != _config:
+            raise ValueError(
+                f"Config queried with is not equal to the one in the table with {_id=}."
+                f"\nconfig provided {config=}"
+                f"\nconfig in table {_config=}",
+            )
 
-        row.name = config.id
-        config = self.Config.from_row(row[self.config_keys])
-        results = row[self.result_keys]
-        return self.Result.from_row(config=config, row=results, fidelity=at)
+        return dict(row[self.result_keys])
+
+    @override
+    def _trajectory(
+        self,
+        config: Mapping[str, Any],
+        *,
+        frm: F,
+        to: F,
+        step: F,
+    ) -> Iterable[tuple[F, Mapping[str, float]]]:
+        config = dict(config)
+        _id = config.pop("id")
+        rows = self.table.loc[(_id, frm):(_id, to):step]  # type: ignore
+        first_config = dict(rows.iloc[0][self.config_keys])
+
+        if config != first_config:
+            raise ValueError(
+                f"Config queried with is not equal to the one in the table with {_id=}."
+                f"\nconfig provided {config=}"
+                f"\nconfig in table {first_config=}",
+            )
+
+        return [
+            (fidelity, dict(row[self.result_keys]))
+            for (_, fidelity), row in rows.iterrows()
+        ]
 
     # No number specified, just return one config
     @overload
@@ -390,7 +477,7 @@ def sample(
         """
         _seed: int | None
         if isinstance(seed, np.random.RandomState):
-            _seed = seed.random_integers(0, 2**32 - 1)
+            _seed = seed.random_integers(0, 2**31 - 1)
         else:
             _seed = seed
 
@@ -413,133 +500,19 @@ def sample(
         return [config_items[i] for i in indices]
 
 
-class GenericTabularBenchmark(
-    TabularBenchmark[
-        GenericTabularConfig,
-        GenericTabularResult[GenericTabularConfig, F],
-        F,
-    ],
-):
-    Result = GenericTabularResult
-    Config = GenericTabularConfig
-
-    def __init__(  # noqa: PLR0913
-        self,
-        table: pd.DataFrame,
-        *,
-        name: str | None = None,
-        id_key: str,
-        fidelity_key: str,
-        result_keys: Sequence[str],
-        config_keys: Sequence[str],
-        result_mapping: (dict[str, str | Callable[[pd.DataFrame], Any]] | None) = None,
-        remove_constants: bool = False,
-        space: ConfigurationSpace | None = None,
-        seed: int | None = None,
-        prior: str | Path | GenericTabularConfig | Mapping[str, Any] | None = None,
-        perturb_prior: float | None = None,
-    ):
-        """Initialize the benchmark.
-
-        Args:
-            table: The table to use for the benchmark
-            name: The name of the benchmark. If None, will be set to
-                `unknown-{datetime.now().isoformat()}`
-            id_key: The column in the table that contains the config id
-            fidelity_key: The column in the table that contains the fidelity
-            result_keys: The columns in the table that contain the results
-            config_keys: The columns in the table that contain the config values
-            result_mapping: A mapping from the result keys to the table keys.
-                If a string, will be used as the key in the table. If a callable,
-                will be called with the table and the result will be used as the value.
-            remove_constants: Remove constant config columns from the data or not.
-            space: The configuration space to use for the benchmark. If None, will
-                just be an empty space.
-            seed: The seed to use.
-            prior: The prior to use for the benchmark. If None, no prior is used.
-                If a str, will check the local location first for a prior
-                specific for this benchmark, otherwise assumes it to be a Path.
-                If a Path, will load the prior from the path.
-                If a Mapping, will be used directly.
-            perturb_prior: If not None, will perturb the prior by this amount.
-                For numericals, this is interpreted as the standard deviation of a
-                normal distribution while for categoricals, this is interpreted
-                as the probability of swapping the value for a random one.
-        """
-        if name is None:
-            name = f"unknown-{datetime.now().isoformat()}"
-
-        _result_mapping: dict = result_mapping if result_mapping is not None else {}
-
-        # Remap the result keys so it works with the generic result types
-        if _result_mapping is not None:
-            for k, v in _result_mapping.items():
-                if isinstance(v, str):
-                    if v not in table.columns:
-                        raise ValueError(f"{v} not in columns\n{table.columns}")
-
-                    table[k] = table[v]
-                elif callable(v):
-                    table[k] = v(table)
-                else:
-                    raise ValueError(f"Unknown result mapping {v} for {k}")
-
-        super().__init__(
-            name=name,
-            table=table,
-            id_key=id_key,
-            fidelity_key=fidelity_key,
-            result_keys=[*result_keys, *_result_mapping.keys()],
-            config_keys=config_keys,
-            remove_constants=remove_constants,
-            space=space,
-            seed=seed,
-            prior=prior,
-            perturb_prior=perturb_prior,
-        )
-
-
 if __name__ == "__main__":
     HERE = Path(__file__).parent
     path = HERE.parent.parent / "data" / "lcbench-tabular" / "adult.parquet"
     table = pd.read_parquet(path)
-    benchmark = GenericTabularBenchmark(
-        table=table,
+    from mfpbench.lcbench_tabular import LCBenchTabularConfig, LCBenchTabularResult
+
+    benchmark = TabularBenchmark(
+        "toy",
+        table,
         id_key="id",
         fidelity_key="epoch",
-        result_keys=[
-            "time",
-            "val_accuracy",
-            "val_cross_entropy",
-            "val_balanced_accuracy",
-            "test_accuracy",
-            "test_cross_entropy",
-            "test_balanced_accuracy",
-        ],
-        result_mapping={
-            "error": lambda df: 1 - df["val_accuracy"],
-            "score": lambda df: df["val_accuracy"],
-        },
-        config_keys=[
-            "batch_size",
-            "loss",
-            "imputation_strategy",
-            "learning_rate_scheduler",
-            "network",
-            "max_dropout",
-            "normalization_strategy",
-            "optimizer",
-            "cosine_annealing_T_max",
-            "cosine_annealing_eta_min",
-            "activation",
-            "max_units",
-            "mlp_shape",
-            "num_layers",
-            "learning_rate",
-            "momentum",
-            "weight_decay",
-        ],
-        remove_constants=True,
+        result_type=LCBenchTabularResult,
+        config_type=LCBenchTabularConfig,
     )
     # benchmark = LCBenchTabular(task="adult")
     all_configs = benchmark.configs  # type: ignore
@@ -550,7 +523,7 @@ def __init__(  # noqa: PLR0913
     config_id = config.id
 
     result = benchmark.query(config, at=1)
-    argmin_score = benchmark.query(config, at=42, argmin="error")
+    argmin_score = benchmark.query(config, at=42)
 
     trajectory = benchmark.trajectory(config, frm=1, to=10)
 
diff --git a/src/mfpbench/util.py b/src/mfpbench/util.py
index 3896e99..1919446 100644
--- a/src/mfpbench/util.py
+++ b/src/mfpbench/util.py
@@ -72,7 +72,7 @@ def remove_hyperparameter(name: str, space: ConfigurationSpace) -> Configuration
     hps = [copy(hp) for hp in space.get_hyperparameters() if hp.name != name]
 
     if isinstance(space.random, np.random.RandomState):
-        new_seed = space.random.randint(2**32 - 1)
+        new_seed = space.random.randint(2**31 - 1)
     else:
         new_seed = copy(space.random)
 
diff --git a/src/mfpbench/yahpo/benchmark.py b/src/mfpbench/yahpo/benchmark.py
index 5ce61f9..0e77b89 100644
--- a/src/mfpbench/yahpo/benchmark.py
+++ b/src/mfpbench/yahpo/benchmark.py
@@ -4,14 +4,12 @@
 import tempfile
 import uuid
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, ClassVar, Mapping, Sequence, TypeVar
+from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Mapping, TypeVar
 from typing_extensions import override
 
-from mfpbench.benchmark import Benchmark
+from mfpbench.benchmark import Benchmark, Config, Result
 from mfpbench.setup_benchmark import YAHPOSource
 from mfpbench.util import remove_hyperparameter
-from mfpbench.yahpo.config import YAHPOConfig
-from mfpbench.yahpo.result import YAHPOResult
 
 if TYPE_CHECKING:
     import onnxruntime
@@ -120,8 +118,8 @@ def _ensure_yahpo_config_set(datapath: Path) -> None:
 
 
 # A Yahpo Benchmark is parametrized by a YAHPOConfig, YAHPOResult and fidelity
-C = TypeVar("C", bound=YAHPOConfig)
-R = TypeVar("R", bound=YAHPOResult)
+C = TypeVar("C", bound=Config)
+R = TypeVar("R", bound=Result)
 F = TypeVar("F", int, float)
 
 
@@ -129,26 +127,31 @@ class YAHPOBenchmark(Benchmark[C, R, F]):
     yahpo_base_benchmark_name: ClassVar[str]
     """Base name of the yahpo benchmark."""
 
-    yahpo_instances: tuple[str, ...] | None
+    yahpo_config_type: type[C]
+    """The config type for this benchmark."""
+
+    yahpo_result_type: type[R]
+    """The result type for this benchmark."""
+
+    yahpo_fidelity_name: ClassVar[str]
+    """The name of the fidelity for this benchmark."""
+
+    yahpo_fidelity_range: tuple[F, F, F]
+    """The fidelity range for this benchmark."""
+
+    yahpo_has_conditionals: ClassVar[bool] = False
+    """Whether this benchmark has conditionals."""
+
+    yahpo_instances: ClassVar[tuple[str, ...] | None] = None
     """The instances available for this benchmark, if Any."""
 
-    yahpo_task_id_name: ClassVar[str | None]
+    yahpo_task_id_name: ClassVar[str | None] = None
     """Name of hp used to indicate task."""
 
-    yahpo_forced_remove_hps: Mapping[str, int | float | str] | None
+    yahpo_forced_remove_hps: ClassVar[Mapping[str, int | float | str] | None] = None
     """Any hyperparameters that should be forcefully deleted from the space
     but have default values filled in"""
 
-    yahpo_replacements_hps: Sequence[tuple[str, str]] | None
-    """Any replacements that need to be done in hyperparameters
-    [(dataclass_version, dict_version)]"""
-
-    datadir: Path
-    """The path to where the data is stored."""
-
-    task_id: str
-    """The task id for this benchmark."""
-
     def __init__(  # noqa: C901, PLR0912
         self,
         task_id: str,
@@ -158,6 +161,8 @@ def __init__(  # noqa: C901, PLR0912
         prior: str | Path | C | Mapping[str, Any] | None = None,
         perturb_prior: float | None = None,
         session: onnxruntime.InferenceSession | None = None,
+        value_metric: str | None = None,
+        cost_metric: str | None = None,
     ):
         """Initialize a Yahpo Benchmark.
 
@@ -180,18 +185,22 @@ def __init__(  # noqa: C901, PLR0912
 
                     This is only a backdoor for onnx compatibility issues with YahpoGym.
                     You are advised not to use this unless you know what you are doing.
+            value_metric: The metric to use for this benchmark. Uses
+                the default metric from the Result if None.
+            cost_metric: The cost to use for this benchmark. Uses
+                the default cost from the Result if None.
         """
         # Validation
         cls = self.__class__
 
         # These errors are maintainers errors, not user errors
-        if cls.yahpo_forced_remove_hps is not None and cls.has_conditionals:
+        if cls.yahpo_forced_remove_hps is not None and cls.yahpo_has_conditionals:
             raise NotImplementedError(
                 "Error setting up a YAHPO Benchmark with conditionals",
                 " and forced hps",
             )
 
-        if cls.yahpo_task_id_name is not None and cls.has_conditionals:
+        if cls.yahpo_task_id_name is not None and cls.yahpo_has_conditionals:
             raise NotImplementedError(
                 f"{self.name} has conditionals, can't remove task_id from space",
             )
@@ -264,9 +273,16 @@ def __init__(  # noqa: C901, PLR0912
         super().__init__(
             name=name,
             seed=seed,
+            config_type=cls.yahpo_config_type,
+            result_type=cls.yahpo_result_type,
+            fidelity_name=cls.yahpo_fidelity_name,
+            fidelity_range=cls.yahpo_fidelity_range,  # type: ignore
+            has_conditionals=cls.yahpo_has_conditionals,
             space=space,
             prior=prior,
             perturb_prior=perturb_prior,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
         )
 
     @property
@@ -288,8 +304,15 @@ def load(self) -> None:
         _ = self.bench
 
     @override
-    def _objective_function(self, config: C, at: F) -> R:
-        query = config.dict()
+    def _trajectory(
+        self,
+        config: Mapping[str, Any],
+        *,
+        frm: F,
+        to: F,
+        step: F,
+    ) -> Iterable[tuple[F, Mapping[str, float]]]:
+        query = dict(config)
 
         if self.yahpo_forced_remove_hps is not None:
             query.update(self.yahpo_forced_remove_hps)
@@ -297,24 +320,22 @@ def _objective_function(self, config: C, at: F) -> R:
         if self.task_id is not None and self.yahpo_task_id_name is not None:
             query[self.yahpo_task_id_name] = self.task_id
 
-        query[self.fidelity_name] = at
+        # Copy same config and insert fidelities for each
+        queries: list[dict] = [
+            {**query, self.fidelity_name: f}
+            for f in self.iter_fidelities(frm=frm, to=to, step=step)
+        ]
 
         # NOTE: seed is allowed to be int | None
         results: list[dict] = self.bench.objective_function(
-            query,
+            queries,
             seed=self.seed,  # type: ignore
         )
-        result = results[0]
-
-        return self.Result.from_dict(
-            config=config,
-            result=result,
-            fidelity=at,
-        )
+        return zip(self.iter_fidelities(frm=frm, to=to, step=step), results)
 
     @override
-    def _trajectory(self, config: C, *, frm: F, to: F, step: F) -> list[R]:
-        query = config.dict()
+    def _objective_function(self, config: Mapping[str, Any], at: F) -> dict[str, float]:
+        query = dict(config)
 
         if self.yahpo_forced_remove_hps is not None:
             query.update(self.yahpo_forced_remove_hps)
@@ -322,24 +343,11 @@ def _trajectory(self, config: C, *, frm: F, to: F, step: F) -> list[R]:
         if self.task_id is not None and self.yahpo_task_id_name is not None:
             query[self.yahpo_task_id_name] = self.task_id
 
-        # Copy same config and insert fidelities for each
-        queries: list[dict] = [
-            {**query, self.fidelity_name: f}
-            for f in self.iter_fidelities(frm=frm, to=to, step=step)
-        ]
+        query[self.fidelity_name] = at
 
         # NOTE: seed is allowed to be int | None
         results: list[dict] = self.bench.objective_function(
-            queries,
+            query,
             seed=self.seed,  # type: ignore
         )
-
-        return [
-            self.Result.from_dict(
-                config=config,
-                result=result,
-                fidelity=query[self.fidelity_name],
-            )
-            # We need to loop over q's for fidelity
-            for result, query in zip(results, queries)
-        ]
+        return results[0]
diff --git a/src/mfpbench/yahpo/benchmarks/iaml/__init__.py b/src/mfpbench/yahpo/benchmarks/iaml/__init__.py
index a61c505..fddc446 100644
--- a/src/mfpbench/yahpo/benchmarks/iaml/__init__.py
+++ b/src/mfpbench/yahpo/benchmarks/iaml/__init__.py
@@ -2,27 +2,22 @@
 from mfpbench.yahpo.benchmarks.iaml.iaml_glmnet import (
     IAMLglmnetBenchmark,
     IAMLglmnetConfig,
-    IAMLglmnetResult,
 )
 from mfpbench.yahpo.benchmarks.iaml.iaml_ranger import (
     IAMLrangerBenchmark,
     IAMLrangerConfig,
-    IAMLrangerResult,
 )
 from mfpbench.yahpo.benchmarks.iaml.iaml_rpart import (
     IAMLrpartBenchmark,
     IAMLrpartConfig,
-    IAMLrpartResult,
 )
 from mfpbench.yahpo.benchmarks.iaml.iaml_super import (
     IAMLSuperBenchmark,
     IAMLSuperConfig,
-    IAMLSuperResult,
 )
 from mfpbench.yahpo.benchmarks.iaml.iaml_xgboost import (
     IAMLxgboostBenchmark,
     IAMLxgboostConfig,
-    IAMLxgboostResult,
 )
 
 __all__ = [
@@ -30,18 +25,13 @@
     "IAMLConfig",
     "IAMLResult",
     "IAMLSuperBenchmark",
-    "IAMLSuperResult",
     "IAMLSuperConfig",
     "IAMLglmnetBenchmark",
-    "IAMLglmnetResult",
     "IAMLglmnetConfig",
     "IAMLrangerBenchmark",
-    "IAMLrangerResult",
     "IAMLrangerConfig",
     "IAMLrpartBenchmark",
-    "IAMLrpartResult",
     "IAMLrpartConfig",
     "IAMLxgboostBenchmark",
-    "IAMLxgboostResult",
     "IAMLxgboostConfig",
 ]
diff --git a/src/mfpbench/yahpo/benchmarks/iaml/iaml.py b/src/mfpbench/yahpo/benchmarks/iaml/iaml.py
index 898eaff..41b7836 100644
--- a/src/mfpbench/yahpo/benchmarks/iaml/iaml.py
+++ b/src/mfpbench/yahpo/benchmarks/iaml/iaml.py
@@ -1,91 +1,77 @@
 from __future__ import annotations
 
 from dataclasses import asdict, dataclass
-from typing import Any, Mapping, Sequence, TypeVar
+from typing import Any, ClassVar, Mapping, TypeVar
+from typing_extensions import Self
 
+import numpy as np
+
+from mfpbench.benchmark import Config, Result
+from mfpbench.metric import Metric
 from mfpbench.yahpo.benchmark import YAHPOBenchmark
-from mfpbench.yahpo.config import YAHPOConfig
-from mfpbench.yahpo.result import YAHPOResult
 
 C = TypeVar("C", bound="IAMLConfig")
 R = TypeVar("R", bound="IAMLResult")
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
-class IAMLConfig(YAHPOConfig):
+class IAMLConfig(Config):
     @classmethod
-    def from_dict(cls: type[C], d: Mapping[str, Any]) -> C:
+    def from_dict(
+        cls,
+        d: Mapping[str, Any],
+        renames: Mapping[str, str] | None = None,
+    ) -> Self:
         """Create from a dict or mapping object."""
         # We may have keys that are conditional and hence we need to flatten them
         config = {k.replace(".", "__"): v for k, v in d.items()}
-        return cls(**config)
+        return super().from_dict(config, renames)
 
-    def dict(self) -> dict[str, Any]:
+    def as_dict(self) -> dict[str, Any]:
         """Converts the config to a raw dictionary."""
         d = asdict(self)
         return {k.replace("__", "."): v for k, v in d.items() if v is not None}
 
 
 @dataclass(frozen=True)  # type: ignore[misc]
-class IAMLResult(YAHPOResult[C, float]):
-    fidelity: float
-
-    mmce: float
-    f1: float
-    auc: float
-    logloss: float
-
-    timetrain: float
-    timepredict: float
-
-    ramtrain: float
-    rammodel: float
-    rampredict: float
-
-    mec: float
-    ias: float
-    nf: float
-
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        return self.f1
-
-    @property
-    def error(self) -> float:
-        """The error of interest."""
-        return 1 - self.f1
-
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        return self.f1
-
-    @property
-    def test_error(self) -> float:
-        """The error on the test set."""
-        return 1 - self.f1
-
-    @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        return self.score
-
-    @property
-    def val_error(self) -> float:
-        """The error on the validation set."""
-        return self.error
-
-    @property
-    def cost(self) -> float:
-        """The time taken in seconds to train the config."""
-        return self.timetrain
-
-
-class IAMLBenchmark(YAHPOBenchmark):
+class IAMLResult(Result[C, float]):
+    default_value_metric: ClassVar[str] = "f1"
+    default_cost_metric: ClassVar[str] = "timetrain"
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "mmce": Metric(minimize=True, bounds=(0, np.inf)),
+        "f1": Metric(minimize=False, bounds=(0, 1)),
+        "auc": Metric(minimize=False, bounds=(0, 1)),
+        "logloss": Metric(minimize=True, bounds=(0, np.inf)),
+        "timetrain": Metric(minimize=True, bounds=(0, np.inf)),
+        "timepredict": Metric(minimize=True, bounds=(0, np.inf)),
+        "ramtrain": Metric(minimize=True, bounds=(0, np.inf)),
+        "rammodel": Metric(minimize=True, bounds=(0, np.inf)),
+        "rampredict": Metric(minimize=True, bounds=(0, np.inf)),
+    }
+
+    mmce: Metric.Value
+    f1: Metric.Value
+    auc: Metric.Value
+    logloss: Metric.Value
+
+    timetrain: Metric.Value
+    timepredict: Metric.Value
+
+    ramtrain: Metric.Value
+    rammodel: Metric.Value
+    rampredict: Metric.Value
+
+    # Definitions taken from YAHPO-gym paper appendix
+    # Whether to minimize is not really fully relevant
+    # so these are not given a real Metric definition.
+    mec: float  # main effect complexity of features
+    ias: float  # Iteration stregth of features
+    nf: float  # Number of features used
+
+
+class IAMLBenchmark(YAHPOBenchmark[C, IAMLResult, float]):
+    yahpo_result_type = IAMLResult
     # IAML class of benchmarks share train size as fidelity
-    fidelity_range = (0.03, 1.0, 0.05)
-    fidelity_name = "trainsize"
+    yahpo_fidelity_range = (0.03, 1.0, 0.05)
+    yahpo_fidelity_name = "trainsize"
     yahpo_task_id_name = "task_id"
-    yahpo_replacements_hps: Sequence[tuple[str, str]] | None = None
-    yahpo_forced_remove_hps = None
diff --git a/src/mfpbench/yahpo/benchmarks/iaml/iaml_glmnet.py b/src/mfpbench/yahpo/benchmarks/iaml/iaml_glmnet.py
index 49537fe..360fdda 100644
--- a/src/mfpbench/yahpo/benchmarks/iaml/iaml_glmnet.py
+++ b/src/mfpbench/yahpo/benchmarks/iaml/iaml_glmnet.py
@@ -1,9 +1,8 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 
-from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig, IAMLResult
+from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -11,22 +10,9 @@ class IAMLglmnetConfig(IAMLConfig):
     alpha: float
     s: float  # log
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert 0.0 <= self.alpha <= 1.0
-        assert 0.00010000000000000009 <= self.s <= 999.9999999999998
-
-
-@dataclass(frozen=True)
-class IAMLglmnetResult(IAMLResult):
-    config: IAMLglmnetConfig
-
-
-class IAMLglmnetBenchmark(IAMLBenchmark):
-    Result = IAMLglmnetResult
-    Config = IAMLglmnetConfig
-    has_conditionals = False
 
+class IAMLglmnetBenchmark(IAMLBenchmark[IAMLglmnetConfig]):
+    yahpo_config_type = IAMLglmnetConfig
+    yahpo_has_conditionals = False
     yahpo_base_benchmark_name = "iaml_glmnet"
     yahpo_instances = ("40981", "41146", "1489", "1067")
diff --git a/src/mfpbench/yahpo/benchmarks/iaml/iaml_ranger.py b/src/mfpbench/yahpo/benchmarks/iaml/iaml_ranger.py
index 332c69d..1ca27c7 100644
--- a/src/mfpbench/yahpo/benchmarks/iaml/iaml_ranger.py
+++ b/src/mfpbench/yahpo/benchmarks/iaml/iaml_ranger.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig, IAMLResult
+from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -18,34 +17,9 @@ class IAMLrangerConfig(IAMLConfig):
 
     num__random__splits: int | None = None
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert 1 <= self.min__node__size <= 100
-        assert 0 <= self.mtry__power <= 1
-        assert 1 <= self.num__trees <= 2000
-        assert self.respect__unordered__factors in [
-            "ignore",
-            "order",
-            "partition",
-        ]
-        assert 0.1 <= self.sample__fraction <= 1.0
-        assert self.splitrule in ["gini", "extratrees"]
-
-        if self.num__random__splits is not None:
-            assert self.splitrule == "extratrees"
-            assert 1 <= self.num__random__splits <= 100
-
-
-@dataclass(frozen=True)
-class IAMLrangerResult(IAMLResult):
-    config: IAMLrangerConfig
-
-
-class IAMLrangerBenchmark(IAMLBenchmark):
-    Result = IAMLrangerResult
-    Config = IAMLrangerConfig
-    has_conditionals = True
 
+class IAMLrangerBenchmark(IAMLBenchmark[IAMLrangerConfig]):
+    yahpo_config_type = IAMLrangerConfig
+    yahpo_has_conditionals = True
     yahpo_base_benchmark_name = "iaml_ranger"
     yahpo_instances = ("40981", "41146", "1489", "1067")
diff --git a/src/mfpbench/yahpo/benchmarks/iaml/iaml_rpart.py b/src/mfpbench/yahpo/benchmarks/iaml/iaml_rpart.py
index 86d0051..c62a15f 100644
--- a/src/mfpbench/yahpo/benchmarks/iaml/iaml_rpart.py
+++ b/src/mfpbench/yahpo/benchmarks/iaml/iaml_rpart.py
@@ -1,9 +1,8 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 
-from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig, IAMLResult
+from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -13,24 +12,9 @@ class IAMLrpartConfig(IAMLConfig):
     minbucket: int
     minsplit: int
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert 0.00010000000000000009 <= self.cp <= 1.0
-        assert 1 <= self.maxdepth <= 30
-        assert 1 <= self.minbucket <= 100
-        assert 1 <= self.minsplit <= 100
-
-
-@dataclass(frozen=True)
-class IAMLrpartResult(IAMLResult):
-    config: IAMLrpartConfig
-
-
-class IAMLrpartBenchmark(IAMLBenchmark):
-    Result = IAMLrpartResult
-    Config = IAMLrpartConfig
-    has_conditionals = False
 
+class IAMLrpartBenchmark(IAMLBenchmark[IAMLrpartConfig]):
+    yahpo_config_type = IAMLrpartConfig
+    yahpo_has_conditionals = False
     yahpo_base_benchmark_name = "iaml_rpart"
     yahpo_instances = ("40981", "41146", "1489", "1067")
diff --git a/src/mfpbench/yahpo/benchmarks/iaml/iaml_super.py b/src/mfpbench/yahpo/benchmarks/iaml/iaml_super.py
index 4e14749..dca3aee 100644
--- a/src/mfpbench/yahpo/benchmarks/iaml/iaml_super.py
+++ b/src/mfpbench/yahpo/benchmarks/iaml/iaml_super.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig, IAMLResult
+from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -48,128 +47,9 @@ class IAMLSuperConfig(IAMLConfig):
     xgboost__skip_drop: float | None = None
     xgboost__subsample: float | None = None
 
-    @no_type_check
-    def validate(self) -> None:  # noqa: C901, PLR0915, PLR0912
-        """Validate this config."""
-        assert self.learner_id in ["glmnet", "ranger", "rpart", "xgboost"]
-
-        # We do some conditional checking here
-        learner = self.learner_id
-
-        # We filter out all attributes except for those that must always be contained
-        # or are the selected learner, ...
-        attrs = [
-            attr
-            for attr in dir(self)
-            if not attr.startswith("__")
-            or not attr.startswith(learner)
-            or attr in ["learner_id"]
-        ]
-
-        # ... the remaining must always have None set then
-        for attr in attrs:
-            assert attr is None
-
-        if learner == "glmnet":
-            assert self.glmnet__alpha is not None
-            assert self.glmnet__s is not None
-            assert 0.0 <= self.glmnet__alpha <= 1.0
-            assert 0.00010000000000000009 <= self.glmnet__s <= 999.9999999999998
-
-        elif learner == "rpart":
-            assert self.rpart__cp is not None
-            assert self.rpart__maxdepth is not None
-            assert self.rpart__minbucket is not None
-            assert self.rpart__minsplit is not None
-            assert 0.00010000000000000009 <= self.rpart__cp <= 1.0
-            assert 1 <= self.rpart__maxdepth <= 30
-            assert 1 <= self.rpart__minbucket <= 100
-            assert 1 <= self.rpart__minsplit <= 100
-
-        elif learner == "ranger":
-            assert self.ranger__min__node__size is not None
-            assert self.ranger__mtry__power is not None
-            assert self.ranger__num__trees is not None
-            assert self.ranger__respect__unordered__factors is not None
-            assert self.ranger__sample__fraction is not None
-            assert 1 <= self.ranger__min__node__size <= 100
-            assert 0 <= self.ranger__mtry__power <= 1
-            assert 1 <= self.ranger__num__trees <= 2000
-            assert self.ranger__respect__unordered__factors in [
-                "ignore",
-                "order",
-                "partition",
-            ]
-            assert 0.1 <= self.ranger__sample__fraction <= 1.0
-            assert self.ranger__splitrule in ["gini", "extratrees"]
-
-            if self.ranger__num__random__splits is not None:
-                assert self.ranger__splitrule == "extratrees"
-                assert 1 <= self.ranger__num__random__splits <= 100
-
-        elif learner == "xgboost":
-            assert self.xgboost__alpha is not None
-            assert self.xgboost__lambda is not None
-            assert self.xgboost__nrounds is not None
-            assert self.xgboost__subsample is not None
-            assert self.xgboost__booster in ["gblinear", "gbtree", "dart"]
-            assert 0.00010000000000000009 <= self.xgboost__alpha <= 999.9999999999998
-            assert 0.00010000000000000009 <= self.xgboost__lambda <= 999.9999999999998
-            assert 7 <= self.xgboost__nrounds <= 2981
-            assert 0.1 <= self.xgboost__subsample <= 1.0
-
-            if self.xgboost__colsample_bylevel is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 0.01 <= self.xgboost__colsample_bylevel <= 1.0
-
-            if self.xgboost__colsample_bytree is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 0.01 <= self.xgboost__colsample_bytree <= 1.0
-
-            if self.xgboost__eta is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 0.00010000000000000009 <= self.xgboost__eta <= 1.0
-
-            if self.xgboost__gamma is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert (
-                    0.00010000000000000009 <= self.xgboost__gamma <= 6.999999999999999
-                )
-
-            if self.xgboost__max_depth is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 1 <= self.xgboost__max_depth <= 15
-
-            if self.xgboost__min_child_weight is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert (
-                    2.718281828459045
-                    <= self.xgboost__min_child_weight
-                    <= 149.99999999999997
-                )
-
-            if self.xgboost__rate_drop is not None:
-                assert self.xgboost__booster in ["dart"]
-                assert 0.0 <= self.xgboost__rate_drop <= 1.0
-
-            if self.xgboost__skip_drop is not None:
-                assert self.xgboost__booster in ["dart"]
-                assert 0.0 <= self.xgboost__skip_drop <= 1.0
-
-        else:
-            raise NotImplementedError()
-
-
-@dataclass(frozen=True)
-class IAMLSuperResult(IAMLResult):
-    config: IAMLSuperConfig
-
-
-class IAMLSuperBenchmark(IAMLBenchmark):
-    Result = IAMLSuperResult
-    Config = IAMLSuperConfig
-
-    has_conditionals = True
 
+class IAMLSuperBenchmark(IAMLBenchmark[IAMLSuperConfig]):
+    yahpo_config_type = IAMLSuperConfig
+    yahpo_has_conditionals = True
     yahpo_base_benchmark_name = "iaml_super"
     yahpo_instances = ("40981", "41146", "1489", "1067")
diff --git a/src/mfpbench/yahpo/benchmarks/iaml/iaml_xgboost.py b/src/mfpbench/yahpo/benchmarks/iaml/iaml_xgboost.py
index 066ad47..47cfeec 100644
--- a/src/mfpbench/yahpo/benchmarks/iaml/iaml_xgboost.py
+++ b/src/mfpbench/yahpo/benchmarks/iaml/iaml_xgboost.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
+from typing import Mapping
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig, IAMLResult
+from mfpbench.yahpo.benchmarks.iaml.iaml import IAMLBenchmark, IAMLConfig
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -24,58 +24,10 @@ class IAMLxgboostConfig(IAMLConfig):
     rate_drop: float | None = None
     skip_drop: float | None = None
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert self.booster in ["gblinear", "gbtree", "dart"]
-        assert 0.00010000000000000009 <= self.alpha <= 999.9999999999998
-        assert 0.00010000000000000009 <= self._lambda <= 999.9999999999998
-        assert 7 <= self.nrounds <= 2981
-        assert 0.1 <= self.subsample <= 1.0
-
-        if self.colsample_bylevel is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 0.01 <= self.colsample_bylevel <= 1.0
-
-        if self.colsample_bytree is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 0.01 <= self.colsample_bytree <= 1.0
-
-        if self.eta is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 0.00010000000000000009 <= self.eta <= 1.0
-
-        if self.gamma is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 0.00010000000000000009 <= self.gamma <= 6.999999999999999
-
-        if self.max_depth is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 1 <= self.max_depth <= 15
-
-        if self.min_child_weight is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 2.718281828459045 <= self.min_child_weight <= 149.99999999999997
-
-        if self.rate_drop is not None:
-            assert self.booster in ["dart"]
-            assert 0.0 <= self.rate_drop <= 1.0
-
-        if self.skip_drop is not None:
-            assert self.booster in ["dart"]
-            assert 0.0 <= self.skip_drop <= 1.0
-
-
-@dataclass(frozen=True)
-class IAMLxgboostResult(IAMLResult):
-    config: IAMLxgboostConfig
-
-
-class IAMLxgboostBenchmark(IAMLBenchmark):
-    Result = IAMLxgboostResult
-    Config = IAMLxgboostConfig
-    has_conditionals = True
 
+class IAMLxgboostBenchmark(IAMLBenchmark[IAMLxgboostConfig]):
+    _config_replacements: Mapping[str, str] = {"lambda": "_lambda"}
+    yahpo_config_type = IAMLxgboostConfig
+    yahpo_has_conditionals = True
     yahpo_base_benchmark_name = "iaml_xgboost"
-    yahpo_replacements_hps = (("_lambda", "lambda"),)
     yahpo_instances = ("40981", "41146", "1489", "1067")
diff --git a/src/mfpbench/yahpo/benchmarks/lcbench.py b/src/mfpbench/yahpo/benchmarks/lcbench.py
index 06e6954..13bad03 100644
--- a/src/mfpbench/yahpo/benchmarks/lcbench.py
+++ b/src/mfpbench/yahpo/benchmarks/lcbench.py
@@ -1,14 +1,16 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from typing import ClassVar, Mapping
 
-from mfpbench.yahpo.benchmark import YAHPOBenchmark
-from mfpbench.yahpo.config import YAHPOConfig
-from mfpbench.yahpo.result import YAHPOResult
+import numpy as np
+
+from mfpbench.metric import Metric
+from mfpbench.yahpo.benchmark import Config, Result, YAHPOBenchmark
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
-class LCBenchConfig(YAHPOConfig):
+class LCBenchConfig(Config):
     """A LCBench Config.
 
     Note:
@@ -25,74 +27,38 @@ class LCBenchConfig(YAHPOConfig):
     max_units: int  # [64, 1024] int log
     max_dropout: float  # [0.0, 1.0] float
 
-    def validate(self) -> None:
-        """Validate this is a correct config."""
-        assert 16 <= self.batch_size <= 512
-        assert 1e-04 <= self.learning_rate <= 0.1
-        assert 0.1 <= self.momentum <= 0.99
-        assert 1e-05 <= self.weight_decay <= 0.1
-        assert 1 <= self.num_layers <= 5
-        assert 64 <= self.max_units <= 1024
-        assert 0.0 <= self.max_dropout <= 1.0
-
 
 @dataclass(frozen=True)  # type: ignore[misc]
-class LCBenchResult(YAHPOResult[LCBenchConfig, int]):
-    time: float  # unit?
-
-    val_accuracy: float
-    val_cross_entropy: float
-    val_balanced_accuracy: float
-
-    test_cross_entropy: float
-    test_balanced_accuracy: float
-
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        return self.val_balanced_accuracy
+class LCBenchResult(Result[LCBenchConfig, int]):
+    default_value_metric: ClassVar[str] = "val_balanced_accuracy"
+    default_cost_metric: ClassVar[str] = "time"
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "val_accuracy": Metric(minimize=False, bounds=(0, 100)),
+        "val_balanced_accuracy": Metric(minimize=False, bounds=(0, 100)),
+        "val_cross_entropy": Metric(minimize=True, bounds=(0, np.inf)),
+        "test_balanced_accuracy": Metric(minimize=False, bounds=(0, 100)),
+        "test_cross_entropy": Metric(minimize=True, bounds=(0, np.inf)),
+        "time": Metric(minimize=True, bounds=(0, np.inf)),
+    }
 
-    @property
-    def error(self) -> float:
-        """The error of interest."""
-        return 1 - self.val_balanced_accuracy
+    val_accuracy: Metric.Value
+    val_cross_entropy: Metric.Value
+    val_balanced_accuracy: Metric.Value
 
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        return self.test_balanced_accuracy
+    test_cross_entropy: Metric.Value
+    test_balanced_accuracy: Metric.Value
 
-    @property
-    def test_error(self) -> float:
-        """The score on the test set."""
-        return 1 - self.test_balanced_accuracy
-
-    @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        return self.val_balanced_accuracy
-
-    @property
-    def val_error(self) -> float:
-        """The score on the validation set."""
-        return 1 - self.val_balanced_accuracy
-
-    @property
-    def cost(self) -> float:
-        """Time taken in seconds to train the config (assumed to be seconds)."""
-        return self.time
+    time: Metric.Value  # unit?
 
 
 class LCBenchBenchmark(YAHPOBenchmark):
-    fidelity_name = "epoch"
-    fidelity_range = (1, 52, 1)
-    Config = LCBenchConfig
-    Result = LCBenchResult
-
+    yahpo_fidelity_range = (1, 52, 1)
+    yahpo_fidelity_name = "epoch"
+    yahpo_config_type = LCBenchConfig
+    yahpo_result_type = LCBenchResult
     yahpo_base_benchmark_name = "lcbench"
     yahpo_task_id_name = "OpenML_task_id"
-    yahpo_replacements_hps = None
-    yahpo_forced_remove_hps = None
+    yahpo_has_conditionals = False
     yahpo_instances = (
         "3945",
         "7593",
diff --git a/src/mfpbench/yahpo/benchmarks/nb301.py b/src/mfpbench/yahpo/benchmarks/nb301.py
index 53b37e6..138b97b 100644
--- a/src/mfpbench/yahpo/benchmarks/nb301.py
+++ b/src/mfpbench/yahpo/benchmarks/nb301.py
@@ -1,15 +1,14 @@
 from __future__ import annotations
 
 from dataclasses import asdict, dataclass
-from itertools import product
-from typing import Any, Mapping, TypeVar, no_type_check
-from typing_extensions import Literal
+from typing import Any, ClassVar, Mapping
+from typing_extensions import Literal, Self
 
-from mfpbench.yahpo.benchmark import YAHPOBenchmark
-from mfpbench.yahpo.config import YAHPOConfig
-from mfpbench.yahpo.result import YAHPOResult
+import numpy as np
 
-Self = TypeVar("Self", bound="NB301Config")
+from mfpbench.benchmark import Config, Result
+from mfpbench.metric import Metric
+from mfpbench.yahpo.benchmark import YAHPOBenchmark
 
 ChoicesT = Literal[
     "max_pool_3x3",
@@ -35,7 +34,7 @@
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
-class NB301Config(YAHPOConfig):
+class NB301Config(Config):
     edge_normal_0: ChoicesT
     edge_normal_1: ChoicesT
 
@@ -98,52 +97,18 @@ class NB301Config(YAHPOConfig):
     edge_reduce_12: ChoicesT | None = None
     edge_reduce_13: ChoicesT | None = None
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this is a correct config.
-
-        Note:
-        ----
-        We don't check conditionals validity
-        """
-        nodes = list(range(13 + 1))
-        cells = ["normal", "reduce"]
-        for i, cell in product(nodes, cells):
-            attr_name = f"edge_{cell}_{i}"
-            attr = getattr(self, attr_name)
-            assert attr is None or attr in Choices, attr_name
-
-        choices_3 = ["0_1", "0_2", "1_2"]
-        choices_4 = ["0_1", "0_2", "0_3", "1_2", "1_3", "2_3"]
-        choices_5 = [
-            "0_1",
-            "0_2",
-            "0_3",
-            "0_4",
-            "1_2",
-            "1_3",
-            "1_4",
-            "2_3",
-            "2_4",
-            "3_4",
-        ]
-
-        nodes = list(range(3, 5 + 1))
-        for i, choices in [(3, choices_3), (4, choices_4), (5, choices_5)]:
-            normal_node = f"inputs_node_normal_{i}"
-            assert getattr(self, normal_node) in choices
-
-            reduce_node = f"inputs_node_reduce_{i}"
-            assert getattr(self, reduce_node) in choices
-
     @classmethod
-    def from_dict(cls: type[Self], d: Mapping[str, Any]) -> Self:
+    def from_dict(
+        cls,
+        d: Mapping[str, Any],
+        renames: Mapping[str, str] | None = None,
+    ) -> Self:
         """Create from a dict or mapping object."""
-        # We just flatten things because it's way too big of a name
+        # We may have keys that are conditional and hence we need to flatten them
         config = {k.replace(_hp_name_extension, ""): v for k, v in d.items()}
-        return cls(**config)
+        return super().from_dict(config, renames)
 
-    def dict(self) -> dict[str, Any]:
+    def as_dict(self) -> dict[str, Any]:
         """Converts the config to a raw dictionary."""
         return {
             _hp_name_extension + k: v for k, v in asdict(self).items() if v is not None
@@ -151,55 +116,23 @@ def dict(self) -> dict[str, Any]:
 
 
 @dataclass(frozen=True)  # type: ignore[misc]
-class NB301Result(YAHPOResult[NB301Config, int]):
-    runtime: float  # unit?
-    val_accuracy: float
-
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        return self.val_accuracy
-
-    @property
-    def error(self) -> float:
-        """The error of interest."""
-        return 1 - self.val_accuracy
-
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        return self.val_accuracy
-
-    @property
-    def test_error(self) -> float:
-        """The score on the test set."""
-        return 1 - self.val_accuracy
-
-    @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        return self.val_accuracy
-
-    @property
-    def val_error(self) -> float:
-        """The score on the validation set."""
-        return 1 - self.val_accuracy
-
-    @property
-    def cost(self) -> float:
-        """Time taken in seconds to train the config."""
-        return self.runtime
+class NB301Result(Result[NB301Config, int]):
+    default_value_metric: ClassVar[str] = "val_accuracy"
+    default_cost_metric: ClassVar[str] = "runtime"
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "runtime": Metric(minimize=True, bounds=(0, np.inf)),
+        "val_accuracy": Metric(minimize=False, bounds=(0, 1)),
+    }
 
+    runtime: Metric.Value  # unit?
+    val_accuracy: Metric.Value
 
-class NB301Benchmark(YAHPOBenchmark):
-    fidelity_name = "epoch"
-    fidelity_range = (1, 98, 1)
-    Config = NB301Config
-    Result = NB301Result
-    has_conditionals = True
 
+class NB301Benchmark(YAHPOBenchmark):
+    yahpo_fidelity_name = "epoch"
+    yahpo_fidelity_range = (1, 98, 1)
+    yahpo_config_type = NB301Config
+    yahpo_result_type = NB301Result
+    yahpo_has_conditionals = True
     yahpo_base_benchmark_name = "nb301"
-    yahpo_task_id_name = None
     yahpo_instances = ("CIFAR10",)
-    yahpo_replacements_hps = None
-    yahpo_forced_remove_hps = None
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/__init__.py b/src/mfpbench/yahpo/benchmarks/rbv2/__init__.py
index 723afaa..5e8b435 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/__init__.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/__init__.py
@@ -2,37 +2,30 @@
 from mfpbench.yahpo.benchmarks.rbv2.rbv2_aknn import (
     RBV2aknnBenchmark,
     RBV2aknnConfig,
-    RBV2aknnResult,
 )
 from mfpbench.yahpo.benchmarks.rbv2.rbv2_glmnet import (
     RBV2glmnetBenchmark,
     RBV2glmnetConfig,
-    RBV2glmnetResult,
 )
 from mfpbench.yahpo.benchmarks.rbv2.rbv2_ranger import (
     RBV2rangerBenchmark,
     RBV2rangerConfig,
-    RBV2rangerResult,
 )
 from mfpbench.yahpo.benchmarks.rbv2.rbv2_rpart import (
     RBV2rpartBenchmark,
     RBV2rpartConfig,
-    RBV2rpartResult,
 )
 from mfpbench.yahpo.benchmarks.rbv2.rbv2_super import (
     RBV2SuperBenchmark,
     RBV2SuperConfig,
-    RBV2SuperResult,
 )
 from mfpbench.yahpo.benchmarks.rbv2.rbv2_svm import (
     RBV2svmBenchmark,
     RBV2svmConfig,
-    RBV2svmResult,
 )
 from mfpbench.yahpo.benchmarks.rbv2.rbv2_xgboost import (
     RBV2xgboostBenchmark,
     RBV2xgboostConfig,
-    RBV2xgboostResult,
 )
 
 __all__ = [
@@ -40,24 +33,17 @@
     "RBV2Config",
     "RBV2Result",
     "RBV2SuperBenchmark",
-    "RBV2SuperResult",
     "RBV2SuperConfig",
     "RBV2glmnetBenchmark",
-    "RBV2glmnetResult",
     "RBV2glmnetConfig",
     "RBV2rangerBenchmark",
-    "RBV2rangerResult",
     "RBV2rangerConfig",
     "RBV2rpartBenchmark",
-    "RBV2rpartResult",
     "RBV2rpartConfig",
     "RBV2svmBenchmark",
-    "RBV2svmResult",
     "RBV2svmConfig",
     "RBV2xgboostBenchmark",
-    "RBV2xgboostResult",
     "RBV2xgboostConfig",
     "RBV2aknnBenchmark",
-    "RBV2aknnResult",
     "RBV2aknnConfig",
 ]
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2.py
index 59f1a80..22a83b4 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2.py
@@ -1,90 +1,74 @@
 from __future__ import annotations
 
 from dataclasses import asdict, dataclass
-from typing import Any, Mapping, Sequence, TypeVar
+from typing import Any, ClassVar, Mapping, TypeVar
+from typing_extensions import Self
 
+import numpy as np
+
+from mfpbench.benchmark import Config, Result
+from mfpbench.metric import Metric
 from mfpbench.yahpo.benchmark import YAHPOBenchmark
-from mfpbench.yahpo.config import YAHPOConfig
-from mfpbench.yahpo.result import YAHPOResult
 
 C = TypeVar("C", bound="RBV2Config")
 R = TypeVar("R", bound="RBV2Result")
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
-class RBV2Config(YAHPOConfig):
+class RBV2Config(Config):
     @classmethod
-    def from_dict(cls: type[C], d: Mapping[str, Any]) -> C:
+    def from_dict(
+        cls,
+        d: Mapping[str, Any],
+        renames: Mapping[str, str] | None = None,
+    ) -> Self:
         """Create from a dict or mapping object."""
         # We may have keys that are conditional and hence we need to flatten them
         config = {k.replace(".", "__"): v for k, v in d.items()}
-        return cls(**config)
+        return super().from_dict(config, renames)
 
-    def dict(self) -> dict[str, Any]:
+    def as_dict(self) -> dict[str, Any]:
         """Converts the config to a raw dictionary."""
         d = asdict(self)
         return {k.replace("__", "."): v for k, v in d.items() if v is not None}
 
 
 @dataclass(frozen=True)  # type: ignore[misc]
-class RBV2Result(YAHPOResult[C, float]):
-    # Fidelity
-    fidelity: float
-
-    acc: float
-    bac: float
-    auc: float
-    brier: float
-    f1: float
-    logloss: float
-
-    timetrain: float
-    timepredict: float
-
-    memory: float
-
-    @property
-    def score(self) -> float:
-        """The score of interest."""
-        return self.bac
-
-    @property
-    def error(self) -> float:
-        """The error of interest."""
-        return 1 - self.bac
-
-    @property
-    def test_score(self) -> float:
-        """The score on the test set."""
-        return self.score
-
-    @property
-    def test_error(self) -> float:
-        """The error on the test set."""
-        return self.error
-
-    @property
-    def val_score(self) -> float:
-        """The score on the validation set."""
-        return self.score
-
-    @property
-    def val_error(self) -> float:
-        """The error on the validation set."""
-        return self.error
-
-    @property
-    def cost(self) -> float:
-        """The time taken in seconds to train the config."""
-        return self.timetrain
-
-
-class RBV2Benchmark(YAHPOBenchmark):
+class RBV2Result(Result[C, float]):
+    default_value_metric: ClassVar[str] = "bac"
+    default_cost_metric: ClassVar[str] = "timetrain"
+    metric_defs: ClassVar[Mapping[str, Metric]] = {
+        "acc": Metric(minimize=False, bounds=(0, 1)),
+        "bac": Metric(minimize=False, bounds=(0, 1)),
+        "auc": Metric(minimize=False, bounds=(0, 1)),
+        "brier": Metric(minimize=True, bounds=(0, 1)),
+        "f1": Metric(minimize=False, bounds=(0, 1)),
+        "logloss": Metric(minimize=True, bounds=(0, np.inf)),
+        "timetrain": Metric(minimize=True, bounds=(0, np.inf)),
+        "timepredict": Metric(minimize=True, bounds=(0, np.inf)),
+        "memory": Metric(minimize=True, bounds=(0, np.inf)),
+    }
+
+    acc: Metric.Value
+    bac: Metric.Value
+    auc: Metric.Value
+    brier: Metric.Value
+    f1: Metric.Value
+    logloss: Metric.Value
+
+    timetrain: Metric.Value
+    timepredict: Metric.Value
+
+    memory: Metric.Value
+
+
+class RBV2Benchmark(YAHPOBenchmark[C, RBV2Result, float]):
     # RVB2 class of benchmarks share train size as fidelity
-    fidelity_range = (0.03, 1.0, 0.05)
-    fidelity_name = "trainsize"
+    yahpo_config_type: type[C]
+    yahpo_result_type = RBV2Result
+    yahpo_fidelity_range = (0.03, 1.0, 0.05)
+    yahpo_fidelity_name = "trainsize"
     yahpo_task_id_name = "task_id"
 
     # We have to specify a repl number, not sure what it is but YAHPO gym fix it to 10
-    yahpo_forced_remove_hps: Mapping[str, int] = {"repl": 10}
-    yahpo_replacements_hps: Sequence[tuple[str, str]] | None = None
+    yahpo_forced_remove_hps: ClassVar[Mapping[str, int]] = {"repl": 10}
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_aknn.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_aknn.py
index ffb6043..dfce2f3 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_aknn.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_aknn.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config, RBV2Result
+from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -16,31 +15,10 @@ class RBV2aknnConfig(RBV2Config):
     ef_construction: int  # (7, 1097),  log
     k: int  # (1, 50)
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert self.num__impute__selected__cpo in [
-            "impute.mean",
-            "impute.median",
-            "impute.hist",
-        ]
-        assert 18 <= self.M <= 50
-        assert self.distance in ["l2", "cosine", "ip"]
-        assert 7 <= self.ef <= 403
-        assert 7 <= self.ef_construction <= 1097
-        assert 1 <= self.k <= 50
-
-
-@dataclass(frozen=True)
-class RBV2aknnResult(RBV2Result):
-    config: RBV2aknnConfig
-
-
-class RBV2aknnBenchmark(RBV2Benchmark):
-    Result = RBV2aknnResult
-    Config = RBV2aknnConfig
-    has_conditionals = False
 
+class RBV2aknnBenchmark(RBV2Benchmark[RBV2aknnConfig]):
+    yahpo_config_type = RBV2aknnConfig
+    yahpo_has_conditionals = False
     yahpo_base_benchmark_name = "rbv2_aknn"
     yahpo_instances = (
         "41138",
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_glmnet.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_glmnet.py
index c43d19b..6c57549 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_glmnet.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_glmnet.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config, RBV2Result
+from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -14,28 +13,10 @@ class RBV2glmnetConfig(RBV2Config):
     alpha: float  # (0.0, 1.0)
     s: float  # (0.0009118819655545162, 1096.6331584284585), log
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert self.num__impute__selected__cpo in [
-            "impute.mean",
-            "impute.median",
-            "impute.hist",
-        ]
-        assert 0.0 <= self.alpha <= 1.0
-        assert 0.0009118819655545162 <= self.s <= 1096.6331584284585
-
-
-@dataclass(frozen=True)
-class RBV2glmnetResult(RBV2Result):
-    config: RBV2glmnetConfig
-
-
-class RBV2glmnetBenchmark(RBV2Benchmark):
-    Result = RBV2glmnetResult
-    Config = RBV2glmnetConfig
-    has_conditionals = False
 
+class RBV2glmnetBenchmark(RBV2Benchmark[RBV2glmnetConfig]):
+    yahpo_config_type = RBV2glmnetConfig
+    yahpo_has_conditionals = False
     yahpo_base_benchmark_name = "rbv2_glmnet"
     yahpo_instances = (
         "41138",
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_ranger.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_ranger.py
index 4ffe055..c99251a 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_ranger.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_ranger.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config, RBV2Result
+from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -20,41 +19,11 @@ class RBV2rangerConfig(RBV2Config):
 
     num__random__splits: int | None = None  # (1, 100)
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert self.num__impute__selected__cpo in [
-            "impute.mean",
-            "impute.median",
-            "impute.hist",
-        ]
-        assert 1 <= self.min__node__size <= 100
-        assert 0 <= self.mtry__power <= 1
-        assert 1 <= self.num__trees <= 2000
-        assert self.respect__unordered__factors in [
-            "ignore",
-            "order",
-            "partition",
-        ]
-        assert 0.1 <= self.sample__fraction <= 1.0
-        assert self.splitrule in ["gini", "extratrees"]
 
-        if self.num__random__splits is not None:
-            assert self.splitrule == "extratrees"
-            assert 1 <= self.num__random__splits <= 100
-
-
-@dataclass(frozen=True)
-class RBV2rangerResult(RBV2Result):
-    config: RBV2rangerConfig
-
-
-class RBV2rangerBenchmark(RBV2Benchmark):
+class RBV2rangerBenchmark(RBV2Benchmark[RBV2rangerConfig]):
+    yahpo_config_type = RBV2rangerConfig
     yahpo_base_benchmark_name = "rbv2_ranger"
-    Result = RBV2rangerResult
-    Config = RBV2rangerConfig
-    has_conditionals = True
-
+    yahpo_has_conditionals = True
     yahpo_instances = (
         "4135",
         "40981",
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_rpart.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_rpart.py
index 4347b98..dc05da4 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_rpart.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_rpart.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config, RBV2Result
+from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -16,30 +15,10 @@ class RBV2rpartConfig(RBV2Config):
     minbucket: int  # (1, 100)
     minsplit: int  # (1, 100)
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert self.num__impute__selected__cpo in [
-            "impute.mean",
-            "impute.median",
-            "impute.hist",
-        ]
-        assert 0.0009118819655545162 <= self.cp <= 1.0
-        assert 1 <= self.maxdepth <= 30
-        assert 1 <= self.minbucket <= 100
-        assert 1 <= self.minsplit <= 100
-
-
-@dataclass(frozen=True)
-class RBV2rpartResult(RBV2Result):
-    config: RBV2rpartConfig
-
-
-class RBV2rpartBenchmark(RBV2Benchmark):
-    Result = RBV2rpartResult
-    Config = RBV2rpartConfig
-    has_conditionals = False
 
+class RBV2rpartBenchmark(RBV2Benchmark[RBV2rpartConfig]):
+    yahpo_config_type = RBV2rpartConfig
+    yahpo_has_conditionals = False
     yahpo_base_benchmark_name = "rbv2_rpart"
     yahpo_instances = (
         "41138",
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_super.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_super.py
index e3f3bac..fcd752e 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_super.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_super.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config, RBV2Result
+from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -65,172 +64,10 @@ class RBV2SuperConfig(RBV2Config):
     xgboost__skip_drop: float | None = None  # (0.0, 1.0)
     xgboost__subsample: float | None = None  # (0.1, 1.0)
 
-    @no_type_check
-    def validate(self) -> None:  # noqa: C901, PLR0915, PLR0912
-        """Validate this config."""
-        assert self.learner_id in [
-            "aknn",
-            "glmnet",
-            "ranger",
-            "rpart",
-            "svm",
-            "xgboost",
-        ]
-
-        assert self.num__impute__selected__cpo in [
-            "impute.mean",
-            "impute.median",
-            "impute.hist",
-        ]
-
-        # We do some conditional checking here
-        learner = self.learner_id
-
-        # We filter out all attributes except for those that must always be contained
-        # or are the selected learner, ...
-        attrs = [
-            attr
-            for attr in dir(self)
-            if not attr.startswith("__")
-            or not attr.startswith(learner)
-            or attr in ["learner_id", "num__impute__selected__cpo"]
-        ]
-
-        # ... the remaining must always have None set then
-        for attr in attrs:
-            assert attr is None
-
-        if learner == "aknn":
-            assert self.aknn__M is not None
-            assert self.aknn__ef is not None
-            assert self.aknn__ef_construction is not None
-            assert self.aknn__k is not None
-            assert 18 <= self.aknn__M <= 50
-            assert self.aknn__distance in ["l2", "cosine", "ip"]
-            assert 7 <= self.aknn__ef <= 403
-            assert 7 <= self.aknn__ef_construction <= 1097
-            assert 1 <= self.aknn__k <= 50
-
-        elif learner == "glmnet":
-            assert self.glmnet__alpha is not None
-            assert self.glmnet__s is not None
-            assert 0.0 <= self.glmnet__alpha <= 1.0
-            assert 0.0009118819655545162 <= self.glmnet__s <= 1096.6331584284585
-
-        elif learner == "rpart":
-            assert self.rpart__cp is not None
-            assert self.rpart__maxdepth is not None
-            assert self.rpart__minbucket is not None
-            assert self.rpart__minsplit is not None
-            assert 0.0009118819655545162 <= self.rpart__cp <= 1.0
-            assert 1 <= self.rpart__maxdepth <= 30
-            assert 1 <= self.rpart__minbucket <= 100
-            assert 1 <= self.rpart__minsplit <= 100
-
-        elif learner == "ranger":
-            assert self.ranger__min__node__size is not None
-            assert self.ranger__mtry__power is not None
-            assert self.ranger__num__trees is not None
-            assert self.ranger__respect__unordered__factors is not None
-            assert self.ranger__sample__fraction is not None
-            assert 1 <= self.ranger__min__node__size <= 100
-            assert 0 <= self.ranger__mtry__power <= 1
-            assert 1 <= self.ranger__num__trees <= 2000
-            assert self.ranger__respect__unordered__factors in [
-                "ignore",
-                "order",
-                "partition",
-            ]
-            assert 0.1 <= self.ranger__sample__fraction <= 1.0
-            assert self.ranger__splitrule in ["gini", "extratrees"]
-
-            if self.ranger__num__random__splits is not None:
-                assert self.ranger__splitrule == "extratrees"
-                assert 1 <= self.ranger__num__random__splits <= 100
-
-        elif learner == "svm":
-            assert self.svm__cost is not None
-            assert self.svm__gamma is not None
-            assert self.svm__kernel is not None
-            assert self.svm__tolerance is not None
-
-            assert 4.5399929762484854e-05 <= self.svm__cost <= 22026.465794806718
-            assert 4.5399929762484854e-05 <= self.svm__gamma <= 22026.465794806718
-            assert self.svm__kernel in ["linear", "polynomial", "radial"]
-            assert 4.5399929762484854e-05 <= self.svm__tolerance <= 2.0
-
-            if self.svm__degree is not None:
-                assert 2 <= self.svm__degree <= 5
-                assert self.svm__kernel == "polynomial"
-
-            if self.svm__gamma is not None:
-                assert 4.5399929762484854e-05 <= self.svm__gamma <= 22026.465794806718
-                assert self.svm__kernel == "radial"
-
-        elif learner == "xgboost":
-            assert self.xgboost__alpha is not None
-            assert self.xgboost__booster is not None
-            assert self.xgboost__lambda is not None
-            assert self.xgboost__nrounds is not None
-            assert self.xgboost__subsample is not None
-
-            assert self.xgboost__booster in ["gblinear", "gbtree", "dart"]
-            assert 0.0009118819655545162 <= self.xgboost__alpha <= 1096.6331584284585
-            assert 0.0009118819655545162 <= self.xgboost__lambda <= 1096.6331584284585
-            assert 7 <= self.xgboost__nrounds <= 2981
-            assert 0.1 <= self.xgboost__subsample <= 1.0
-
-            if self.xgboost__colsample_bylevel is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 0.01 <= self.xgboost__colsample_bylevel <= 1.0
-
-            if self.xgboost__colsample_bytree is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 0.01 <= self.xgboost__colsample_bytree <= 1.0
-
-            if self.xgboost__eta is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 0.0009118819655545162 <= self.xgboost__eta <= 1.0
-
-            if self.xgboost__gamma is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 4.5399929762484854e-05 <= self.xgboost__gamma <= 7.38905609893065
-
-            if self.xgboost__max_depth is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert 1 <= self.xgboost__max_depth <= 15
-
-            if self.xgboost__min_child_weight is not None:
-                assert self.xgboost__booster in ["dart", "gbtree"]
-                assert (
-                    2.718281828459045
-                    <= self.xgboost__min_child_weight
-                    <= 148.4131591025766
-                )
-
-            if self.xgboost__rate_drop is not None:
-                assert self.xgboost__booster in ["dart"]
-                assert 0.0 <= self.xgboost__rate_drop <= 1.0
-
-            if self.xgboost__skip_drop is not None:
-                assert self.xgboost__booster in ["dart"]
-                assert 0.0 <= self.xgboost__skip_drop <= 1.0
-
-        else:
-            raise NotImplementedError()
-
-
-@dataclass(frozen=True)
-class RBV2SuperResult(RBV2Result):
-    config: RBV2SuperConfig
-
-
-class RBV2SuperBenchmark(RBV2Benchmark):
-    Result = RBV2SuperResult
-    Config = RBV2SuperConfig
-
-    has_conditionals = True
 
+class RBV2SuperBenchmark(RBV2Benchmark[RBV2SuperConfig]):
+    yahpo_config_type = RBV2SuperConfig
+    yapho_has_conditionals = True
     yahpo_base_benchmark_name = "rbv2_super"
     yahpo_instances = (
         "41138",
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_svm.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_svm.py
index 7c5801d..65865dd 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_svm.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_svm.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config, RBV2Result
+from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -17,39 +16,10 @@ class RBV2svmConfig(RBV2Config):
     tolerance: float  # (4.5399929762484854e-05, 2.0) log
     kernel: Literal["linear", "polynomial", "radial"] | None = None
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert self.num__impute__selected__cpo in [
-            "impute.mean",
-            "impute.median",
-            "impute.hist",
-        ]
-
-        assert 4.5399929762484854e-05 <= self.cost <= 22026.465794806718
-        assert 4.5399929762484854e-05 <= self.gamma <= 22026.465794806718
-        assert self.kernel in ["linear", "polynomial", "radial"]
-        assert 4.5399929762484854e-05 <= self.tolerance <= 2.0
-
-        if self.degree is not None:
-            assert 2 <= self.degree <= 5
-            assert self.kernel == "polynomial"
-
-        if self.gamma is not None:
-            assert 4.5399929762484854e-05 <= self.gamma <= 22026.465794806718
-            assert self.kernel == "radial"
-
-
-@dataclass(frozen=True)
-class RBV2svmResult(RBV2Result):
-    config: RBV2svmConfig
-
-
-class RBV2svmBenchmark(RBV2Benchmark):
-    Result = RBV2svmResult
-    Config = RBV2svmConfig
-    has_conditionals = True
 
+class RBV2svmBenchmark(RBV2Benchmark[RBV2svmConfig]):
+    yahpo_config_type = RBV2svmConfig
+    yahpo_has_conditionals = True
     yahpo_base_benchmark_name = "rbv2_svm"
     yahpo_instances = (
         "41138",
diff --git a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_xgboost.py b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_xgboost.py
index dae6f3e..361e8c3 100644
--- a/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_xgboost.py
+++ b/src/mfpbench/yahpo/benchmarks/rbv2/rbv2_xgboost.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import no_type_check
+from typing import ClassVar, Mapping
 from typing_extensions import Literal
 
-from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config, RBV2Result
+from mfpbench.yahpo.benchmarks.rbv2.rbv2 import RBV2Benchmark, RBV2Config
 
 
 @dataclass(frozen=True, eq=False, unsafe_hash=True)
@@ -27,66 +27,12 @@ class RBV2xgboostConfig(RBV2Config):
     rate_drop: float | None = None  # (0.0, 1.0)
     skip_drop: float | None = None  # (0.0, 1.0)
 
-    @no_type_check
-    def validate(self) -> None:
-        """Validate this config."""
-        assert self.booster in ["gblinear", "gbtree", "dart"]
-        assert 0.0009118819655545162 <= self.alpha <= 1096.6331584284585
-        assert 0.0009118819655545162 <= self._lambda <= 1096.6331584284585
-        assert 7 <= self.nrounds <= 2981
-        assert 0.1 <= self.subsample <= 1.0
-
-        if self.colsample_bylevel is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 0.01 <= self.colsample_bylevel <= 1.0
-
-        if self.colsample_bytree is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 0.01 <= self.colsample_bytree <= 1.0
-
-        if self.eta is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 0.0009118819655545162 <= self.eta <= 1.0
-
-        if self.gamma is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 4.5399929762484854e-05 <= self.gamma <= 7.38905609893065
-
-        if self.max_depth is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 1 <= self.max_depth <= 15
-
-        if self.min_child_weight is not None:
-            assert self.booster in ["dart", "gbtree"]
-            assert 2.718281828459045 <= self.min_child_weight <= 148.4131591025766
-
-        if self.rate_drop is not None:
-            assert self.booster in ["dart"]
-            assert 0.0 <= self.rate_drop <= 1.0
-
-        if self.skip_drop is not None:
-            assert self.booster in ["dart"]
-            assert 0.0 <= self.skip_drop <= 1.0
-
-        assert self.num__impute__selected__cpo in [
-            "impute.mean",
-            "impute.median",
-            "impute.hist",
-        ]
-
-
-@dataclass(frozen=True)
-class RBV2xgboostResult(RBV2Result):
-    config: RBV2xgboostConfig
-
-
-class RBV2xgboostBenchmark(RBV2Benchmark):
-    Result = RBV2xgboostResult
-    Config = RBV2xgboostConfig
-    has_conditionals = True
 
+class RBV2xgboostBenchmark(RBV2Benchmark[RBV2xgboostConfig]):
+    _config_renames: ClassVar[Mapping[str, str]] = {"lambda": "_lambda"}
+    yahpo_config_type = RBV2xgboostConfig
+    yahpo_has_conditionals = True
     yahpo_base_benchmark_name = "rbv2_xgboost"
-    yahpo_replacements_hps = (("_lambda", "lambda"),)
     yahpo_instances = (
         "16",
         "40923",
diff --git a/src/mfpbench/yahpo/config.py b/src/mfpbench/yahpo/config.py
deleted file mode 100644
index 540e012..0000000
--- a/src/mfpbench/yahpo/config.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import TypeVar
-
-from mfpbench.config import Config
-
-Self = TypeVar("Self", bound="YAHPOConfig")
-
-
-@dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
-class YAHPOConfig(Config):
-    ...
diff --git a/src/mfpbench/yahpo/result.py b/src/mfpbench/yahpo/result.py
deleted file mode 100644
index 96caea3..0000000
--- a/src/mfpbench/yahpo/result.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import TypeVar
-
-from mfpbench.result import Result
-from mfpbench.yahpo.config import YAHPOConfig
-
-C = TypeVar("C", bound=YAHPOConfig)
-F = TypeVar("F", int, float)
-
-
-@dataclass(frozen=True)  # type: ignore[misc]
-class YAHPOResult(Result[C, F]):
-    ...
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
index ebbb1c8..2a47ab5 100644
--- a/tests/test_benchmarks.py
+++ b/tests/test_benchmarks.py
@@ -3,8 +3,9 @@
 from dataclasses import dataclass
 from itertools import product
 from pathlib import Path
-from typing import Any
+from typing import Any, ClassVar, Mapping
 
+import numpy as np
 import pandas as pd
 import pytest
 from pytest_cases import (
@@ -18,9 +19,10 @@
 import mfpbench
 from mfpbench import (
     Benchmark,
-    GenericTabularBenchmark,
-    MFHartmannBenchmark,
+    Metric,
+    Result,
     TabularBenchmark,
+    TabularConfig,
     YAHPOBenchmark,
 )
 from mfpbench.setup_benchmark import download_status
@@ -85,7 +87,7 @@ def case_pd1() -> BenchmarkTest:
     download_status("lcbench-tabular") is False,
     reason="lcbench-tabular is not downloaded",
 )
-@case
+@case(tags="tabular")
 def case_lcbench_tabular() -> BenchmarkTest:
     return BenchmarkTest("lcbench_tabular", kwargs={"task_id": "adult"})
 
@@ -95,7 +97,7 @@ def case_mfh() -> BenchmarkTest:
     return BenchmarkTest("mfh3_good", prior="good")
 
 
-@case(tags="generic_tabular")
+@case(tags="tabular")
 def case_generic_tabular() -> BenchmarkTest:
     ids = "abcdefghijklmnopqrstuvwxyz"
     colors = ["red", "green", "blue"]
@@ -108,12 +110,12 @@ def case_generic_tabular() -> BenchmarkTest:
         pd.DataFrame(
             [
                 {
-                    "config": k,
+                    "id": k,
                     "color": c,
                     "shape": s,
                     "animal": a,
                     "number": n,
-                    "float": f,
+                    "ffloat": f,
                     "balanced_accuracy": v,
                     "fidelity": fid,
                 }
@@ -123,23 +125,35 @@ def case_generic_tabular() -> BenchmarkTest:
         for k, (c, s, a, n, f) in zip(ids, config_values)
     ]
     df = pd.concat(values, ignore_index=True)
-    benchmark = GenericTabularBenchmark(
-        df,
+
+    @dataclass(frozen=True)
+    class MyResult(Result):
+        default_value_metric: ClassVar[str] = "balanced_accuracy"
+        default_cost_metric: ClassVar[str] = "ffloat"
+        metric_defs: ClassVar[Mapping[str, Metric]] = {
+            "balanced_accuracy": Metric(minimize=False, bounds=(0, 1)),
+            "ffloat": Metric(minimize=True, bounds=(0, np.inf)),
+        }
+
+        balanced_accuracy: Metric.Value
+        ffloat: Metric.Value
+
+    @dataclass(frozen=True, eq=False, unsafe_hash=True)
+    class MyConfig(TabularConfig):
+        id: str | None
+        color: str
+        shape: str
+        animal: str
+        number: int
+
+    benchmark = TabularBenchmark(
         name="testdata",
-        id_key="config",
+        table=df,
+        id_key="id",
         fidelity_key="fidelity",
-        config_keys=["color", "shape"],
-        result_keys=["balanced_accuracy"],
-        result_mapping={
-            "error": lambda df: 1 - df["balanced_accuracy"],
-            "val_error": lambda df: 1 - df["balanced_accuracy"],
-            "test_error": lambda df: 1 - df["balanced_accuracy"],
-            "score": lambda df: df["balanced_accuracy"],
-            "val_score": lambda df: df["balanced_accuracy"],
-            "test_score": lambda df: df["balanced_accuracy"],
-            "cost": lambda df: df["float"],
-        },
-        remove_constants=True,
+        config_type=MyConfig,
+        result_type=MyResult,
+        seed=1,
     )
     return BenchmarkTest(benchmark.name, benchmark=benchmark)
 
@@ -159,19 +173,18 @@ def benchmark(item: BenchmarkTest) -> Benchmark:
 
 
 @parametrize("n_samples", [1, 2, 3])
-def test_benchmark_sampling(benchmark: Benchmark, n_samples: int) -> None:
+def test_benchmark_sampling(
+    benchmark: Benchmark,
+    n_samples: int,
+) -> None:
     config = benchmark.sample()
     assert isinstance(config, benchmark.Config)
-    config.validate()
 
     configs = benchmark.sample(n_samples)
     assert len(configs) == n_samples
     for config in configs:
         assert isinstance(config, benchmark.Config)
 
-    for config in configs:
-        config.validate()
-
 
 def test_query_api_validity(benchmark: Benchmark) -> None:
     sample = benchmark.sample()
@@ -179,7 +192,7 @@ def test_query_api_validity(benchmark: Benchmark) -> None:
 
     assert result.config == sample
 
-    sample_dict = sample.dict()
+    sample_dict = sample.as_dict()
     result = benchmark.query(sample_dict)
     assert result.config == sample_dict
 
@@ -188,20 +201,14 @@ def test_result_api_validity(benchmark: Benchmark) -> None:
     sample = benchmark.sample()
     result = benchmark.query(sample)
 
-    # MFHartmanns don't have scores
-    if not isinstance(benchmark, MFHartmannBenchmark):
-        assert result.score is not None
-        assert result.test_score is not None
-        assert result.val_score is not None
-
     assert result.error is not None
-    assert result.test_error is not None
-    assert result.val_error is not None
     assert result.fidelity is not None
     assert result.cost is not None
 
 
-def test_query_through_entire_fidelity_range(benchmark: Benchmark) -> None:
+def test_query_through_entire_fidelity_range(
+    benchmark: Benchmark,
+) -> None:
     config = benchmark.sample()
 
     results = [benchmark.query(config, at=x) for x in benchmark.iter_fidelities()]
@@ -221,6 +228,50 @@ def test_repeated_query(benchmark: Benchmark) -> None:
             assert r1 == r2, f"{r1}\n{r2}"
 
 
+def test_metric_optimums(benchmark: Benchmark) -> None:
+    configs = benchmark.sample(20)
+
+    for config in configs:
+        result = benchmark.query(config, at=benchmark.end)
+        for k in benchmark.Result.metric_defs:
+            assert result[k].score <= benchmark.metric_optimums[k].score
+            assert result[k].error >= benchmark.metric_optimums[k].error
+
+
+@parametrize_with_cases("item", cases=case_generic_tabular)
+def test_table_optimums(item: BenchmarkTest) -> None:
+    bench: TabularBenchmark = item.benchmark  # type: ignore
+    assert bench is not None
+    table = bench.table
+    for k, metric in bench.metrics.items():
+        values = [metric.as_value(v) for v in table[k]]
+        scores = np.array([v.score for v in values])
+        errors = np.array([v.error for v in values])
+        optimum_score = bench.metric_optimums[k].score
+        optimum_error = bench.metric_optimums[k].error
+        assert np.all(scores <= optimum_score)
+        assert np.all(errors >= optimum_error)
+
+
+def test_with_different_value_metric(
+    benchmark: Benchmark,
+) -> None:
+    result_type = benchmark.Result
+
+    value_choices = list(result_type.metric_defs.keys())
+    cost_choices = list(result_type.metric_defs.keys())
+
+    for value_metric, cost_metric in product(value_choices, cost_choices):
+        config = benchmark.sample()
+        result = benchmark.query(
+            config,
+            value_metric=value_metric,
+            cost_metric=cost_metric,
+        )
+        assert result.value_metric == value_metric
+        assert result.cost_metric == cost_metric
+
+
 def test_repeated_trajectory(benchmark: Benchmark) -> None:
     configs = benchmark.sample(10)
 
@@ -231,7 +282,9 @@ def test_repeated_trajectory(benchmark: Benchmark) -> None:
             assert r1 == r2, f"{r1}\n{r2}"
 
 
-def test_query_default_is_max_fidelity(benchmark: Benchmark) -> None:
+def test_query_default_is_max_fidelity(
+    benchmark: Benchmark,
+) -> None:
     config = benchmark.sample()
     r1 = benchmark.query(config, at=benchmark.end)
     r2 = benchmark.query(config)
@@ -239,7 +292,9 @@ def test_query_default_is_max_fidelity(benchmark: Benchmark) -> None:
     assert r1 == r2
 
 
-def test_query_same_as_trajectory(benchmark: Benchmark) -> None:
+def test_query_same_as_trajectory(
+    benchmark: Benchmark,
+) -> None:
     config = benchmark.sample()
     if isinstance(benchmark, YAHPOBenchmark):
         pytest.skip(
@@ -254,7 +309,9 @@ def test_query_same_as_trajectory(benchmark: Benchmark) -> None:
         assert qr == tr, f"{qr}\n{tr}"
 
 
-def test_trajectory_is_over_full_range_by_default(benchmark: Benchmark) -> None:
+def test_trajectory_is_over_full_range_by_default(
+    benchmark: Benchmark,
+) -> None:
     config = benchmark.sample()
     results = benchmark.trajectory(config)
 
@@ -262,14 +319,18 @@ def test_trajectory_is_over_full_range_by_default(benchmark: Benchmark) -> None:
         assert r.fidelity == fidelity
 
 
-def test_configs_hashable_and_unique(benchmark: Benchmark) -> None:
+def test_configs_hashable_and_unique(
+    benchmark: Benchmark,
+) -> None:
     configs = benchmark.sample(10)
 
     s = set(configs)
     assert len(s) == len(configs)
 
 
-def test_results_hashable_and_unique(benchmark: Benchmark) -> None:
+def test_results_hashable_and_unique(
+    benchmark: Benchmark,
+) -> None:
     configs = benchmark.sample(10)
     results = [benchmark.query(c) for c in configs]
 
@@ -277,27 +338,16 @@ def test_results_hashable_and_unique(benchmark: Benchmark) -> None:
     assert len(s) == len(results)
 
 
-def test_argmin_query(benchmark: Benchmark) -> None:
-    # Get a random configuration
-    random_config = benchmark.sample()
-
-    # Get the argmax
-    argmin_config = benchmark.query(random_config, argmin="error")
-
-    # Get the trajectory
-    trajectory = benchmark.trajectory(random_config)
-    best_in_trajectory = min(trajectory, key=lambda x: x.error)
-
-    assert argmin_config == best_in_trajectory
-
-
-def test_config_with_same_content_hashes_correctly(benchmark: Benchmark) -> None:
+def test_config_with_same_content_hashes_correctly(
+    benchmark: Benchmark,
+) -> None:
     config = benchmark.sample()
 
     if isinstance(benchmark, TabularBenchmark):
-        config_dict = config.dict(with_id=True)
+        assert isinstance(config, TabularConfig)
+        config_dict = config.as_dict(with_id=True)
     else:
-        config_dict = config.dict()
+        config_dict = config.as_dict()
 
     # Turn it into a dict and back again
     new_config = benchmark.Config.from_dict(config_dict)
@@ -305,7 +355,9 @@ def test_config_with_same_content_hashes_correctly(benchmark: Benchmark) -> None
     assert hash(config) == hash(new_config)
 
 
-def test_result_with_same_content_hashes_correctly(benchmark: Benchmark) -> None:
+def test_result_with_same_content_hashes_correctly(
+    benchmark: Benchmark,
+) -> None:
     config = benchmark.sample()
     result = benchmark.query(config)
 
@@ -313,7 +365,7 @@ def test_result_with_same_content_hashes_correctly(benchmark: Benchmark) -> None
     new_result = benchmark.Result.from_dict(
         config=config,
         fidelity=result.fidelity,
-        result=result.dict(),
+        result=result.as_dict(),
     )
 
     assert hash(result) == hash(new_result)
@@ -324,18 +376,21 @@ def test_result_same_value_but_different_fidelity_has_different_hash(
 ) -> None:
     config = benchmark.sample()
     result = benchmark.query(config)
+    result_dict = result.as_dict()
 
     # Turn it into a dict and back again
     new_result = benchmark.Result.from_dict(
         config=config,
         fidelity=result.fidelity - 1,
-        result=result.dict(),
+        result=result_dict,
+        value_metric=result.value_metric,
+        cost_metric=result.cost_metric,
     )
 
     assert hash(result) != hash(new_result)
 
 
-@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("generic_tabular"))
+@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("tabular"))
 def test_prior_from_yaml_file(item: BenchmarkTest, tmp_path: Path) -> None:
     params = item.unpack()
     bench = mfpbench.get(**params)
@@ -355,7 +410,7 @@ def test_prior_from_yaml_file(item: BenchmarkTest, tmp_path: Path) -> None:
     assert default == random_config, f"{random_config}, {default}"
 
 
-@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("generic_tabular"))
+@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("tabular"))
 def test_prior_from_json_file(item: BenchmarkTest, tmp_path: Path) -> None:
     params = item.unpack()
     bench = mfpbench.get(**params)
@@ -375,7 +430,7 @@ def test_prior_from_json_file(item: BenchmarkTest, tmp_path: Path) -> None:
     assert default == random_config, f"{random_config}, {default}"
 
 
-@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("generic_tabular"))
+@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("tabular"))
 def test_prior_from_config(item: BenchmarkTest) -> None:
     params = item.unpack()
     bench = mfpbench.get(**params)
@@ -392,7 +447,7 @@ def test_prior_from_config(item: BenchmarkTest) -> None:
     assert default == random_config, f"{random_config}, {default}"
 
 
-@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("generic_tabular"))
+@parametrize_with_cases("item", cases=".", has_tag=~ft.has_tag("tabular"))
 def test_prior_from_dict(item: BenchmarkTest) -> None:
     params = item.unpack()
     bench = mfpbench.get(**params)
@@ -400,7 +455,7 @@ def test_prior_from_dict(item: BenchmarkTest) -> None:
     # Get a random config
     random_config = bench.sample()
     # Use the path of the saved config as the prior config
-    prior_config = random_config.dict()
+    prior_config = random_config.as_dict()
 
     params["prior"] = prior_config
 
@@ -412,3 +467,37 @@ def test_prior_from_dict(item: BenchmarkTest) -> None:
     # The default configuration for the benchmark should be the same as the prior
     default = bench.space.get_default_configuration()
     assert default == random_config, f"{random_config}, {default}"
+
+
+@pytest.mark.skipif(
+    download_status("lcbench-tabular") is False,
+    reason="lcbench-tabular is not downloaded",
+)
+def explicit_test_with_different_value_metric() -> None:
+    lcbench_tabular_1 = mfpbench.get(
+        "lcbench_tabular",
+        task_id="adult",
+        cost_metric="time",
+        value_metric="val_accuracy",
+    )
+    lcbench_tabular_2 = mfpbench.get(
+        "lcbench_tabular",
+        task_id="adult",
+        cost_metric="time",
+        value_metric="val_balanced_accuracy",
+    )
+
+    config_1 = lcbench_tabular_1.sample()
+    config_2 = lcbench_tabular_2.sample()
+
+    result_1 = lcbench_tabular_1.query(config_1)
+    result_2 = lcbench_tabular_2.query(config_2)
+
+    assert result_1.value_metric == "val_accuracy"
+    assert result_2.value_metric == "val_balanced_accuracy"
+
+    assert result_1.error != result_2.error
+    assert result_1.score != result_2.score
+
+    # Same cost metric, only has one
+    assert result_1.cost == result_2.cost
diff --git a/tests/test_hartmann.py b/tests/test_hartmann.py
index 2ab16ee..f20de93 100644
--- a/tests/test_hartmann.py
+++ b/tests/test_hartmann.py
@@ -56,7 +56,7 @@ def test_hartmann_priors_with_and_without_noise_added(
     assert isinstance(bench_no_noise._prior_arg, str)
 
     # All values different
-    for v1, v2 in zip(clean_prior.dict().values(), noisy_prior.dict().values()):
+    for v1, v2 in zip(clean_prior.as_dict().values(), noisy_prior.as_dict().values()):
         assert v1 != v2
 
     # configspace seeded with these priors
@@ -79,8 +79,7 @@ def test_hartmann_priors_noise_in_bounds(
     config = bench.prior
     assert config is not None
 
-    config.validate()
-    for x in config.dict().values():
+    for x in config.as_dict().values():
         assert 0 <= x <= 1
 
 
diff --git a/tests/test_metric.py b/tests/test_metric.py
new file mode 100644
index 0000000..cdcd43f
--- /dev/null
+++ b/tests/test_metric.py
@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+from pytest_cases import case, parametrize_with_cases
+
+from mfpbench.metric import Metric, OutOfBoundsError
+
+# NOTE: Each case returns the Metric, the value to use and a tuple of (score, error)
+
+
+# MINIMIZE
+@case
+def case_metric_minimize_unbounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=True)
+    return metric, 0.5, (-0.5, 0.5)
+
+
+@case
+def case_metric_minimize_lower_bounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=True, bounds=(-1, np.inf))
+    return metric, 0.5, (-0.5, 0.5)
+
+
+@case
+def case_metric_minimize_upper_bounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=True, bounds=(-np.inf, 1))
+    return metric, 0.5, (-0.5, 0.5)
+
+
+@case
+def case_metric_minimize_bounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=True, bounds=(-1, 1))
+    return metric, 0.5, (0.25, 0.75)
+
+
+# MAXIMIZE
+@case
+def case_metric_maximize_unbounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=False)
+    return metric, 0.5, (0.5, -0.5)
+
+
+@case
+def case_metric_maximize_lower_bounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=False, bounds=(-1, np.inf))
+    return metric, 0.5, (0.5, -0.5)
+
+
+@case
+def case_metric_maximize_upper_bounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=False, bounds=(-np.inf, 1))
+    return (metric, 0.25, (0.25, -0.25))
+
+
+@case
+def case_metric_maximize_bounded() -> tuple[Metric, float, tuple[float, float]]:
+    metric = Metric(minimize=False, bounds=(-1, 1))
+    return (metric, 0.5, (0.75, 0.25))
+
+
+@parametrize_with_cases("metric, value, expected", cases=".")
+def test_metric_error(
+    metric: Metric,
+    value: float,
+    expected: tuple[float, float],
+) -> None:
+    _, error = expected
+    assert metric.as_value(value).error == error
+
+
+@parametrize_with_cases("metric, value, expected", cases=".")
+def test_metric_score(
+    metric: Metric,
+    value: float,
+    expected: tuple[float, float],
+) -> None:
+    score, _ = expected
+    assert metric.as_value(value).score == score
+
+
+@parametrize_with_cases("metric, value, expected", cases=".")
+def test_metric_value(
+    metric: Metric,
+    value: float,
+    expected: tuple[float, float],  # noqa: ARG001
+) -> None:
+    assert metric.as_value(value).value == value
+
+
+def test_metric_complains_if_out_of_bounds() -> None:
+    metric = Metric(minimize=True, bounds=(-1, 1))
+    with pytest.raises(OutOfBoundsError):
+        metric.as_value(-2)
+    with pytest.raises(OutOfBoundsError):
+        metric.as_value(2)