# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# pyre-strict
from collections.abc import Iterable, Mapping
from dataclasses import dataclass
from math import sqrt
from typing import Any
import torch
from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
from ax.core.base_trial import BaseTrial, TrialStatus
from ax.core.batch_trial import BatchTrial
from ax.core.runner import Runner
from ax.core.trial import Trial
from ax.core.types import TParamValue
from ax.exceptions.core import UnsupportedError
from ax.utils.common.serialization import TClassDecoderRegistry, TDecoderRegistry
from ax.utils.common.typeutils import checked_cast
from torch import Tensor
[docs]
@dataclass(kw_only=True)
class BenchmarkRunner(Runner):
"""
A Runner that produces both observed and ground-truth values.
Observed values equal ground-truth values plus noise, with the noise added
according to the standard deviations returned by `get_noise_stds()`.
This runner does require that every benchmark has a ground truth, which
won't necessarily be true for real-world problems. Such problems fall into
two categories:
- If they are deterministic, they can be used with this runner by
viewing them as noiseless problems where the observed values are the
ground truth. The observed values will be used for tracking the
progress of optimization.
- If they are not deterministc, they are not supported. It is not
conceptually clear how to benchmark such problems, so we decided to
not over-engineer for that before such a use case arrives.
Args:
test_function: A ``BenchmarkTestFunction`` from which to generate
deterministic data before adding noise.
noise_std: The standard deviation of the noise added to the data. Can be
a list or dict to be per-metric.
search_space: Used to extract target fidelity and task.
"""
test_function: BenchmarkTestFunction
noise_std: float | list[float] | dict[str, float] = 0.0
@property
def outcome_names(self) -> list[str]:
"""The names of the outcomes."""
return self.test_function.outcome_names
[docs]
def get_Y_true(self, params: Mapping[str, TParamValue]) -> Tensor:
"""Evaluates the test problem.
Returns:
An `m`-dim tensor of ground truth (noiseless) evaluations.
"""
return torch.atleast_1d(self.test_function.evaluate_true(params=params))
[docs]
def get_noise_stds(self) -> dict[str, float]:
noise_std = self.noise_std
if isinstance(noise_std, float):
return {name: noise_std for name in self.outcome_names}
elif isinstance(noise_std, dict):
if not set(noise_std.keys()) == set(self.outcome_names):
raise ValueError(
"Noise std must have keys equal to outcome names if given as "
"a dict."
)
return noise_std
# list of floats
return dict(zip(self.outcome_names, noise_std, strict=True))
[docs]
def run(self, trial: BaseTrial) -> dict[str, Any]:
"""Run the trial by evaluating its parameterization(s).
Args:
trial: The trial to evaluate.
Returns:
A dictionary with the following keys:
- Ys: A dict mapping arm names to lists of corresponding outcomes,
where the order of the outcomes is the same as in `outcome_names`.
- Ystds: A dict mapping arm names to lists of corresponding outcome
noise standard deviations (possibly nan if the noise level is
unobserved), where the order of the outcomes is the same as in
`outcome_names`.
- "outcome_names": A list of metric names.
"""
Ys, Ystds = {}, {}
noise_stds = self.get_noise_stds()
noiseless = all(v == 0 for v in noise_stds.values())
if not noiseless:
# extract arm weights to adjust noise levels accordingly
if isinstance(trial, BatchTrial):
# normalize arm weights (we assume that the noise level is defined)
# w.r.t. to a single arm allocated all of the sample budget
nlzd_arm_weights = {
arm: weight / sum(trial.arm_weights.values())
for arm, weight in trial.arm_weights.items()
}
else:
nlzd_arm_weights = {checked_cast(Trial, trial).arm: 1.0}
# generate a tensor of noise levels that we'll reuse below
noise_stds_tsr = torch.tensor(
[noise_stds[metric_name] for metric_name in self.outcome_names],
dtype=torch.double,
)
for arm in trial.arms:
# Case where we do have a ground truth
Y_true = self.get_Y_true(arm.parameters)
if noiseless:
# No noise, so just return the true outcome.
Ystds[arm.name] = [0.0] * len(Y_true)
Ys[arm.name] = Y_true.tolist()
else:
# We can scale the noise std by the inverse of the relative sample
# budget allocation to each arm. This works b/c (i) we assume that
# observations per unit sample budget are i.i.d. and (ii) the
# normalized weights sum to one.
# pyre-fixme[61]: `nlzd_arm_weights` is undefined, or not always
# defined.
std = noise_stds_tsr.to(Y_true) / sqrt(nlzd_arm_weights[arm])
Ystds[arm.name] = std.tolist()
Ys[arm.name] = (Y_true + std * torch.randn_like(Y_true)).tolist()
run_metadata = {
"Ys": Ys,
"Ystds": Ystds,
"outcome_names": self.outcome_names,
}
return run_metadata
# This will need to be udpated once asynchronous benchmarks are supported.
[docs]
def poll_trial_status(
self, trials: Iterable[BaseTrial]
) -> dict[TrialStatus, set[int]]:
return {TrialStatus.COMPLETED: {t.index for t in trials}}
[docs]
@classmethod
# pyre-fixme [2]: Parameter `obj` must have a type other than `Any``
def serialize_init_args(cls, obj: Any) -> dict[str, Any]:
"""
It is tricky to use SerializationMixin with instances that have Ax
objects as attributes, as BenchmarkRunners do. Therefore, serialization
is not supported.
"""
raise UnsupportedError(
"serialize_init_args is not a supported method for BenchmarkRunners."
)
[docs]
@classmethod
def deserialize_init_args(
cls,
args: dict[str, Any],
decoder_registry: TDecoderRegistry | None = None,
class_decoder_registry: TClassDecoderRegistry | None = None,
) -> dict[str, Any]:
"""
It is tricky to use SerializationMixin with instances that have Ax
objects as attributes, as BenchmarkRunners do. Therefore, serialization
is not supported.
"""
raise UnsupportedError(
"deserialize_init_args is not a supported method for BenchmarkRunners."
)