Source code for ax.benchmark.problems.surrogate

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple

import pandas as pd
import torch
from ax.benchmark.benchmark_problem import BenchmarkProblemBase
from ax.core.base_trial import BaseTrial, TrialStatus
from ax.core.data import Data
from ax.core.metric import Metric, MetricFetchE, MetricFetchResult
from ax.core.observation import ObservationFeatures
from ax.core.optimization_config import (
    MultiObjectiveOptimizationConfig,
    OptimizationConfig,
)
from ax.core.parameter import RangeParameter
from ax.core.runner import Runner
from ax.core.search_space import SearchSpace
from ax.core.types import TParameterization
from ax.modelbridge.transforms.int_to_float import IntToFloat
from ax.modelbridge.transforms.log import Log
from ax.models.torch.botorch_modular.surrogate import Surrogate

from ax.utils.common.base import Base
from ax.utils.common.equality import equality_typechecker
from ax.utils.common.result import Err, Ok
from ax.utils.common.serialization import TClassDecoderRegistry, TDecoderRegistry
from ax.utils.common.typeutils import not_none
from botorch.utils.datasets import SupervisedDataset


[docs]class SurrogateBenchmarkProblemBase(Base, BenchmarkProblemBase):
    """
    Base class for SOOSurrogateBenchmarkProblem and MOOSurrogateBenchmarkProblem.

    Allows for lazy creation of objects needed to construct a `runner`,
    including a surrogate and datasets.
    """

    def __init__(
        self,
        *,
        name: str,
        search_space: SearchSpace,
        optimization_config: OptimizationConfig,
        num_trials: int,
        infer_noise: bool,
        metric_names: List[str],
        get_surrogate_and_datasets: Optional[
            Callable[[], Tuple[Surrogate, List[SupervisedDataset]]]
        ] = None,
        tracking_metrics: Optional[List[Metric]] = None,
        _runner: Optional[Runner] = None,
    ) -> None:
        if get_surrogate_and_datasets is None and _runner is None:
            raise ValueError(
                "Either `get_surrogate_and_datasets` or `_runner` required."
            )
        self.name = name
        self.search_space = search_space
        self.optimization_config = optimization_config
        self.num_trials = num_trials
        self.infer_noise = infer_noise
        self.metric_names = metric_names
        self.get_surrogate_and_datasets = get_surrogate_and_datasets
        self.tracking_metrics: List[Metric] = (
            [] if tracking_metrics is None else tracking_metrics
        )
        self._runner = _runner

    @equality_typechecker
    def __eq__(self, other: Base) -> bool:
        if type(other) is not type(self):
            return False

        # Checking the whole datasets' equality here would be too expensive to be
        # worth it; just check names instead
        return self.name == other.name

[docs]    def set_runner(self) -> None:
        surrogate, datasets = not_none(self.get_surrogate_and_datasets)()
        self._runner = SurrogateRunner(
            name=self.name,
            surrogate=surrogate,
            datasets=datasets,
            search_space=self.search_space,
            metric_names=self.metric_names,
        )

    @property
    def runner(self) -> Runner:
        if self._runner is None:
            self.set_runner()
        return not_none(self._runner)

    def __repr__(self) -> str:
        """
        Return a string representation that includes only the attributes that
        print nicely and contain information likely to be useful.
        """
        return (
            f"{self.__class__.__name__}("
            f"name={self.name}, "
            f"optimization_config={self.optimization_config}, "
            f"num_trials={self.num_trials}, "
            f"infer_noise={self.infer_noise}, "
            f"tracking_metrics={self.tracking_metrics})"
        )


[docs]class SOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
    """
    Has the same attributes/properties as a `SingleObjectiveBenchmarkProblem`,
    but allows for constructing from a surrogate.
    """

    def __init__(
        self,
        *,
        name: str,
        search_space: SearchSpace,
        optimization_config: OptimizationConfig,
        num_trials: int,
        infer_noise: bool,
        optimal_value: float,
        metric_names: List[str],
        get_surrogate_and_datasets: Optional[
            Callable[[], Tuple[Surrogate, List[SupervisedDataset]]]
        ] = None,
        tracking_metrics: Optional[List[Metric]] = None,
        _runner: Optional[Runner] = None,
    ) -> None:
        super().__init__(
            name=name,
            search_space=search_space,
            optimization_config=optimization_config,
            num_trials=num_trials,
            infer_noise=infer_noise,
            metric_names=metric_names,
            get_surrogate_and_datasets=get_surrogate_and_datasets,
            tracking_metrics=tracking_metrics,
            _runner=_runner,
        )
        self.optimization_config = optimization_config
        self.optimal_value = optimal_value


[docs]class MOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
    """
    Has the same attributes/properties as a `MultiObjectiveBenchmarkProblem`,
    but its runner is not constructed until needed, to allow for deferring
    constructing the surrogate.

    Simple aspects of the problem problem such as its search space
    are defined immediately, while the surrogate is only defined when [TODO]
    in order to avoid expensive operations like downloading files and fitting
    a model.
    """

    optimization_config: MultiObjectiveOptimizationConfig

    def __init__(
        self,
        *,
        name: str,
        search_space: SearchSpace,
        optimization_config: MultiObjectiveOptimizationConfig,
        num_trials: int,
        infer_noise: bool,
        maximum_hypervolume: float,
        reference_point: List[float],
        metric_names: List[str],
        get_surrogate_and_datasets: Optional[
            Callable[[], Tuple[Surrogate, List[SupervisedDataset]]]
        ] = None,
        tracking_metrics: Optional[List[Metric]] = None,
        _runner: Optional[Runner] = None,
    ) -> None:
        super().__init__(
            name=name,
            search_space=search_space,
            optimization_config=optimization_config,
            num_trials=num_trials,
            infer_noise=infer_noise,
            metric_names=metric_names,
            get_surrogate_and_datasets=get_surrogate_and_datasets,
            tracking_metrics=tracking_metrics,
            _runner=_runner,
        )
        self.reference_point = reference_point
        self.maximum_hypervolume = maximum_hypervolume

    @property
    def optimal_value(self) -> float:
        return self.maximum_hypervolume


[docs]class SurrogateMetric(Metric):
    def __init__(
        self, name: str, lower_is_better: bool, infer_noise: bool = True
    ) -> None:
        super().__init__(name=name, lower_is_better=lower_is_better)
        self.infer_noise = infer_noise

    # pyre-fixme[2]: Parameter must be annotated.
[docs]    def fetch_trial_data(self, trial: BaseTrial, **kwargs) -> MetricFetchResult:
        try:
            prediction = [
                trial.run_metadata[self.name][name]
                for name, arm in trial.arms_by_name.items()
            ]
            df = pd.DataFrame(
                {
                    "arm_name": [name for name, _ in trial.arms_by_name.items()],
                    "metric_name": self.name,
                    "mean": prediction,
                    "sem": None if self.infer_noise else 0,
                    "trial_index": trial.index,
                }
            )

            return Ok(value=Data(df=df))

        except Exception as e:
            return Err(
                MetricFetchE(
                    message=f"Failed to predict for trial {trial}", exception=e
                )
            )


[docs]class SurrogateRunner(Runner):
    def __init__(
        self,
        name: str,
        surrogate: Surrogate,
        datasets: List[SupervisedDataset],
        search_space: SearchSpace,
        metric_names: List[str],
    ) -> None:
        self.name = name
        self.surrogate = surrogate
        self.metric_names = metric_names
        self.datasets = datasets
        self.search_space = search_space

        self.results: Dict[int, float] = {}
        self.statuses: Dict[int, TrialStatus] = {}

        # If there are log scale parameters, these need to be transformed.
        if any(
            isinstance(p, RangeParameter) and p.log_scale
            for p in search_space.parameters.values()
        ):
            int_to_float_tf = IntToFloat(search_space=search_space)
            log_tf = Log(
                search_space=int_to_float_tf.transform_search_space(
                    search_space.clone()
                )
            )
            self.transforms: Optional[Tuple[IntToFloat, Log]] = (
                int_to_float_tf,
                log_tf,
            )
        else:
            self.transforms = None

    def _get_transformed_parameters(
        self, parameters: TParameterization
    ) -> TParameterization:
        if self.transforms is None:
            return parameters

        obs_ft = ObservationFeatures(parameters=parameters)
        for t in not_none(self.transforms):
            obs_ft = t.transform_observation_features([obs_ft])[0]
        return obs_ft.parameters

[docs]    def run(self, trial: BaseTrial) -> Dict[str, Any]:
        self.statuses[trial.index] = TrialStatus.COMPLETED
        preds = {  # Cache predictions for each arm
            arm.name: self.surrogate.predict(
                X=torch.tensor(
                    [*self._get_transformed_parameters(arm.parameters).values()]
                ).reshape([1, len(arm.parameters)])
            )[0].squeeze(0)
            for arm in trial.arms
        }
        return {
            metric_name: {arm_name: float(pred[i]) for arm_name, pred in preds.items()}
            for i, metric_name in enumerate(self.metric_names)
        }

[docs]    def poll_trial_status(
        self, trials: Iterable[BaseTrial]
    ) -> Dict[TrialStatus, Set[int]]:
        return {TrialStatus.COMPLETED: {t.index for t in trials}}

[docs]    @classmethod
    # pyre-fixme[2]: Parameter annotation cannot be `Any`.
    def serialize_init_args(cls, obj: Any) -> Dict[str, Any]:
        """Serialize the properties needed to initialize the runner.
        Used for storage.

        WARNING: Because of issues with consistently saving and loading BoTorch and
        GPyTorch modules the SurrogateRunner cannot be serialized at this time. At load
        time the runner will be replaced with a SyntheticRunner.
        """
        return {}

[docs]    @classmethod
    def deserialize_init_args(
        cls,
        args: Dict[str, Any],
        decoder_registry: Optional[TDecoderRegistry] = None,
        class_decoder_registry: Optional[TClassDecoderRegistry] = None,
    ) -> Dict[str, Any]:
        return {}
Ax

stable

Source code for ax.benchmark.problems.surrogate

Ax

Navigation

Related Topics