Source code for ax.models.torch.botorch_modular.model

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

import dataclasses
from collections import OrderedDict
from collections.abc import Mapping, Sequence
from copy import deepcopy
from dataclasses import dataclass, field
from functools import wraps
from itertools import chain
from typing import Any, Callable, Optional, TypeVar

import numpy as np
import torch
from ax.core.search_space import SearchSpaceDigest
from ax.core.types import TCandidateMetadata, TGenMetadata
from ax.exceptions.core import UserInputError
from ax.models.torch.botorch import (
    get_feature_importances_from_botorch_model,
    get_rounding_func,
)
from ax.models.torch.botorch_modular.acquisition import Acquisition
from ax.models.torch.botorch_modular.surrogate import Surrogate
from ax.models.torch.botorch_modular.utils import (
    check_outcome_dataset_match,
    choose_botorch_acqf_class,
    construct_acquisition_and_optimizer_options,
    get_subset_datasets,
)
from ax.models.torch.utils import _to_inequality_constraints
from ax.models.torch_base import TorchGenResults, TorchModel, TorchOptConfig
from ax.utils.common.base import Base
from ax.utils.common.constants import Keys
from ax.utils.common.docutils import copy_doc
from ax.utils.common.typeutils import checked_cast
from botorch.acquisition.acquisition import AcquisitionFunction
from botorch.models.deterministic import FixedSingleSampleModel
from botorch.models.model import Model
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import OutcomeTransform
from botorch.utils.datasets import SupervisedDataset
from gpytorch.kernels.kernel import Kernel
from gpytorch.likelihoods import Likelihood
from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
from gpytorch.mlls.marginal_log_likelihood import MarginalLogLikelihood
from torch import Tensor

T = TypeVar("T")


[docs]def single_surrogate_only(f: Callable[..., T]) -> Callable[..., T]:
    """
    For use as a decorator on functions only implemented for BotorchModels with a
    single Surrogate.
    """

    @wraps(f)
    def impl(self: "BoTorchModel", *args: list[Any], **kwargs: dict[str, Any]) -> T:
        if len(self._surrogates) != 1:
            raise NotImplementedError(
                f"{f.__name__} not implemented for multi-surrogate case. Found "
                f"{self.surrogates=}."
            )
        return f(self, *args, **kwargs)

    return impl


[docs]@dataclass(frozen=True)
class SurrogateSpec:
    """
    Fields in the SurrogateSpec dataclass correspond to arguments in
    ``Surrogate.__init__``, except for ``outcomes`` which is used to specify which
    outcomes the Surrogate is responsible for modeling.
    When ``BotorchModel.fit`` is called, these fields will be used to construct the
    requisite Surrogate objects.
    If ``outcomes`` is left empty then no outcomes will be fit to the Surrogate.
    """

    botorch_model_class: Optional[type[Model]] = None
    botorch_model_kwargs: dict[str, Any] = field(default_factory=dict)

    mll_class: type[MarginalLogLikelihood] = ExactMarginalLogLikelihood
    mll_kwargs: dict[str, Any] = field(default_factory=dict)

    covar_module_class: Optional[type[Kernel]] = None
    covar_module_kwargs: Optional[dict[str, Any]] = None

    likelihood_class: Optional[type[Likelihood]] = None
    likelihood_kwargs: Optional[dict[str, Any]] = None

    input_transform_classes: Optional[list[type[InputTransform]]] = None
    input_transform_options: Optional[dict[str, dict[str, Any]]] = None

    outcome_transform_classes: Optional[list[type[OutcomeTransform]]] = None
    outcome_transform_options: Optional[dict[str, dict[str, Any]]] = None

    allow_batched_models: bool = True

    outcomes: list[str] = field(default_factory=list)


[docs]class BoTorchModel(TorchModel, Base):
    """**All classes in 'botorch_modular' directory are under
    construction, incomplete, and should be treated as alpha
    versions only.**

    Modular `Model` class for combining BoTorch subcomponents
    in Ax. Specified via `Surrogate` and `Acquisition`, which wrap
    BoTorch `Model` and `AcquisitionFunction`, respectively, for
    convenient use in Ax.

    Args:
        acquisition_class: Type of `Acquisition` to be used in
            this model, auto-selected based on experiment and data
            if not specified.
        acquisition_options: Optional dict of kwargs, passed to
            the constructor of BoTorch `AcquisitionFunction`.
        botorch_acqf_class: Type of `AcquisitionFunction` to be
            used in this model, auto-selected based on experiment
            and data if not specified.
        surrogate_specs: Optional Mapping of names onto SurrogateSpecs, which specify
            how to initialize specific Surrogates to model specific outcomes. If None
            is provided a single Surrogate will be created and set up automatically
            based on the data provided.
        surrogate: In liu of SurrogateSpecs, an instance of `Surrogate` may be
            provided to be used as the sole Surrogate for all outcomes
        refit_on_cv: Whether to reoptimize model parameters during call to
            `BoTorchmodel.cross_validate`.
        warm_start_refit: Whether to load parameters from either the provided
            state dict or the state dict of the current BoTorch `Model` during
            refitting. If False, model parameters will be reoptimized from
            scratch on refit. NOTE: This setting is ignored during
            `cross_validate` if the corresponding `refit_on_...` is False.
    """

    acquisition_class: type[Acquisition]
    acquisition_options: dict[str, Any]

    surrogate_specs: dict[str, SurrogateSpec]
    _surrogates: dict[str, Surrogate]
    _output_order: Optional[list[int]] = None

    _botorch_acqf_class: Optional[type[AcquisitionFunction]]
    _search_space_digest: Optional[SearchSpaceDigest] = None
    _supports_robust_optimization: bool = True

    def __init__(
        self,
        surrogate_specs: Optional[Mapping[str, SurrogateSpec]] = None,
        surrogate: Optional[Surrogate] = None,
        acquisition_class: Optional[type[Acquisition]] = None,
        acquisition_options: Optional[dict[str, Any]] = None,
        botorch_acqf_class: Optional[type[AcquisitionFunction]] = None,
        # TODO: [T168715924] Revisit these "refit" arguments.
        refit_on_cv: bool = False,
        warm_start_refit: bool = True,
    ) -> None:
        # Ensure only surrogate_specs or surrogate is provided
        if surrogate_specs and surrogate:
            raise UserInputError(
                "Only one of `surrogate_specs` and `surrogate` arguments is expected."
            )

        # Ensure each outcome is only modeled by one Surrogate in the SurrogateSpecs
        if surrogate_specs is not None:
            outcomes_by_surrogate_label = {
                label: spec.outcomes for label, spec in surrogate_specs.items()
            }
            all_outcomes = list(
                chain.from_iterable(outcomes_by_surrogate_label.values())
            )
            if len(all_outcomes) != len(set(all_outcomes)):
                raise UserInputError(
                    "Each outcome may be modeled by only one Surrogate, found "
                    f"{outcomes_by_surrogate_label}"
                )

        # Ensure user does not use reserved Surrogate labels
        if (
            surrogate_specs is not None
            and len(
                {Keys.ONLY_SURROGATE, Keys.AUTOSET_SURROGATE} - surrogate_specs.keys()
            )
            < 2
        ):
            raise UserInputError(
                f"SurrogateSpecs may not be labeled {Keys.ONLY_SURROGATE} or "
                f"{Keys.AUTOSET_SURROGATE}, these are reserved."
            )

        self.surrogate_specs = dict((surrogate_specs or {}).items())
        if surrogate is not None:
            self._surrogates = {Keys.ONLY_SURROGATE: surrogate}
        else:
            self._surrogates = {}

        self.acquisition_class = acquisition_class or Acquisition
        self.acquisition_options = acquisition_options or {}
        self._botorch_acqf_class = botorch_acqf_class

        self.refit_on_cv = refit_on_cv
        self.warm_start_refit = warm_start_refit

    @property
    def surrogates(self) -> dict[str, Surrogate]:
        """Surrogates by label"""
        return self._surrogates

    @property
    @single_surrogate_only
    def surrogate(self) -> Surrogate:
        """Surrogate, if there is only one."""
        return next(iter(self.surrogates.values()))

    @property
    @single_surrogate_only
    def Xs(self) -> list[Tensor]:
        """A list of tensors, each of shape ``batch_shape x n_i x d``,
        where `n_i` is the number of training inputs for the i-th model.

        NOTE: This is an accessor for ``self.surrogate.Xs``
        and returns it unchanged.
        """
        return self.surrogate.Xs

    @property
    def botorch_acqf_class(self) -> type[AcquisitionFunction]:
        """BoTorch ``AcquisitionFunction`` class, associated with this model.
        Raises an error if one is not yet set.
        """
        if not self._botorch_acqf_class:
            raise ValueError("BoTorch `AcquisitionFunction` has not yet been set.")
        return self._botorch_acqf_class

[docs]    def fit(
        self,
        datasets: Sequence[SupervisedDataset],
        search_space_digest: SearchSpaceDigest,
        candidate_metadata: Optional[list[list[TCandidateMetadata]]] = None,
        # state dict by surrogate label
        state_dicts: Optional[Mapping[str, OrderedDict[str, Tensor]]] = None,
        refit: bool = True,
        **additional_model_inputs: Any,
    ) -> None:
        """Fit model to m outcomes.

        Args:
            datasets: A list of ``SupervisedDataset`` containers, each
                corresponding to the data of one or more outcomes.
            search_space_digest: A ``SearchSpaceDigest`` object containing
                metadata on the features in the datasets.
            candidate_metadata: Model-produced metadata for candidates, in
                the order corresponding to the Xs.
            state_dicts: Optional state dict to load by model label as passed in via
                surrogate_specs. If using a single, pre-instantiated model use
                `Keys.ONLY_SURROGATE.
            refit: Whether to re-optimize model parameters.
            additional_model_inputs: Additional kwargs to pass to the
                model input constructor in ``Surrogate.fit``.
        """
        outcome_names = sum((ds.outcome_names for ds in datasets), [])
        check_outcome_dataset_match(
            outcome_names=outcome_names, datasets=datasets, exact_match=True
        )  # Checks for duplicate outcome names

        # Store search space info for later use (e.g. during generation)
        self._search_space_digest = search_space_digest

        # Step 0. If the user passed in a preconstructed surrogate we won't have a
        # SurrogateSpec and must assume we're fitting all metrics
        if Keys.ONLY_SURROGATE in self._surrogates.keys():
            surrogate = self._surrogates[Keys.ONLY_SURROGATE]
            surrogate.model_options.update(additional_model_inputs)
            surrogate.fit(
                datasets=datasets,
                search_space_digest=search_space_digest,
                candidate_metadata=candidate_metadata,
                state_dict=(
                    state_dicts.get(Keys.ONLY_SURROGATE) if state_dicts else None
                ),
                refit=refit,
            )
            self._output_order = list(range(len(outcome_names)))
            return

        # Step 1. Initialize a Surrogate for every SurrogateSpec
        self._surrogates = {
            label: Surrogate(
                # if None, Surrogate will autoset class per outcome at construct time
                botorch_model_class=spec.botorch_model_class,
                model_options=spec.botorch_model_kwargs,
                mll_class=spec.mll_class,
                mll_options=spec.mll_kwargs,
                covar_module_class=spec.covar_module_class,
                covar_module_options=spec.covar_module_kwargs,
                likelihood_class=spec.likelihood_class,
                likelihood_options=spec.likelihood_kwargs,
                input_transform_classes=spec.input_transform_classes,
                input_transform_options=spec.input_transform_options,
                outcome_transform_classes=spec.outcome_transform_classes,
                outcome_transform_options=spec.outcome_transform_options,
                allow_batched_models=spec.allow_batched_models,
            )
            for label, spec in self.surrogate_specs.items()
        }

        # Step 1.5. If any outcomes are not explicitly assigned to a Surrogate, create
        # a new Surrogate for all these outcomes (which will autoset its botorch model
        # class per outcome) UNLESS there is only one SurrogateSpec with no outcomes
        # assigned to it, in which case that will be used for all outcomes.
        assigned_outcome_names = {
            item
            for sublist in [spec.outcomes for spec in self.surrogate_specs.values()]
            for item in sublist
        }
        unassigned_outcome_names = [
            name for name in outcome_names if name not in assigned_outcome_names
        ]
        if len(unassigned_outcome_names) > 0 and len(self.surrogates) != 1:
            self._surrogates[Keys.AUTOSET_SURROGATE] = Surrogate()

        # Step 2. Fit each Surrogate iteratively using its assigned outcomes
        for label, surrogate in self.surrogates.items():
            if label == Keys.AUTOSET_SURROGATE or len(self.surrogates) == 1:
                subset_outcome_names = unassigned_outcome_names
            else:
                subset_outcome_names = self.surrogate_specs[label].outcomes
            subset_datasets = get_subset_datasets(
                datasets=datasets, subset_outcome_names=subset_outcome_names
            )
            surrogate.model_options.update(additional_model_inputs)
            surrogate.fit(
                datasets=subset_datasets,
                search_space_digest=search_space_digest,
                candidate_metadata=candidate_metadata,
                state_dict=(state_dicts or {}).get(label),
                refit=refit,
            )

        # Step 3. Output order of outcomes must match input order, but now outcomes are
        # grouped according to surrogate. Compute the permutation from surrogate order
        # to input ordering.
        surrogate_order = []
        for surrogate in self.surrogates.values():
            surrogate_order.extend(surrogate.outcomes)
        self._output_order = list(
            np.argsort([outcome_names.index(name) for name in surrogate_order])
        )

[docs]    def predict(self, X: Tensor) -> tuple[Tensor, Tensor]:
        """Predicts, potentially from multiple surrogates.

        If predictions are from multiple surrogates, will stitch outputs together
        in same order as input datasets, using self.output_order.

        Args:
            X: (n x d) Tensor of input locations.

        Returns: Tuple of tensors: (n x m) mean, (n x m x m) covariance.
        """
        if len(self.surrogates) == 1:
            return self.surrogate.predict(X=X)
        fs, covs = [], []
        for surrogate in self.surrogates.values():
            f, cov = surrogate.predict(X=X)
            fs.append(f)
            covs.append(cov)
        f = torch.cat(fs, dim=-1)
        cov = torch.zeros(
            f.shape[0], f.shape[1], f.shape[1], dtype=X.dtype, device=X.device
        )
        i = 0
        for cov_i in covs:
            d = cov_i.shape[-1]
            cov[:, i : (i + d), i : (i + d)] = cov_i
            i += d
        # Permute from surrogate order to input ordering
        f = f[:, self.output_order]
        cov = cov[:, :, self.output_order][:, self.output_order, :]
        return f, cov

[docs]    def predict_from_surrogate(
        self,
        surrogate_label: str,
        X: Tensor,
        use_posterior_predictive: bool = False,
    ) -> tuple[Tensor, Tensor]:
        """Predict from the Surrogate with the given label."""
        return self.surrogates[surrogate_label].predict(
            X=X, use_posterior_predictive=use_posterior_predictive
        )

[docs]    @copy_doc(TorchModel.gen)
    def gen(
        self,
        n: int,
        search_space_digest: SearchSpaceDigest,
        torch_opt_config: TorchOptConfig,
    ) -> TorchGenResults:
        acq_options, opt_options = construct_acquisition_and_optimizer_options(
            acqf_options=self.acquisition_options,
            model_gen_options=torch_opt_config.model_gen_options,
        )
        # update bounds / target values
        search_space_digest = dataclasses.replace(
            self.search_space_digest,
            bounds=search_space_digest.bounds,
            target_values=search_space_digest.target_values or {},
        )

        acqf = self._instantiate_acquisition(
            search_space_digest=search_space_digest,
            torch_opt_config=torch_opt_config,
            acq_options=acq_options,
        )
        botorch_rounding_func = get_rounding_func(torch_opt_config.rounding_func)
        candidates, expected_acquisition_value, weights = acqf.optimize(
            n=n,
            search_space_digest=search_space_digest,
            inequality_constraints=_to_inequality_constraints(
                linear_constraints=torch_opt_config.linear_constraints
            ),
            fixed_features=torch_opt_config.fixed_features,
            rounding_func=botorch_rounding_func,
            optimizer_options=checked_cast(dict, opt_options),
        )
        gen_metadata = self._get_gen_metadata_from_acqf(
            acqf=acqf,
            torch_opt_config=torch_opt_config,
            expected_acquisition_value=expected_acquisition_value,
        )
        return TorchGenResults(
            points=candidates.detach().cpu(),
            weights=weights,
            gen_metadata=gen_metadata,
        )

    def _get_gen_metadata_from_acqf(
        self,
        acqf: Acquisition,
        torch_opt_config: TorchOptConfig,
        expected_acquisition_value: Tensor,
    ) -> TGenMetadata:
        gen_metadata: TGenMetadata = {
            Keys.EXPECTED_ACQF_VAL: expected_acquisition_value.tolist()
        }
        if torch_opt_config.objective_weights.nonzero().numel() > 1:
            gen_metadata["objective_thresholds"] = acqf.objective_thresholds
            gen_metadata["objective_weights"] = acqf.objective_weights

        if hasattr(acqf.acqf, "outcome_model"):
            outcome_model = acqf.acqf.outcome_model
            if isinstance(
                outcome_model,
                FixedSingleSampleModel,
            ):
                gen_metadata["outcome_model_fixed_draw_weights"] = outcome_model.w
        return gen_metadata

[docs]    @copy_doc(TorchModel.best_point)
    @single_surrogate_only
    def best_point(
        self,
        search_space_digest: SearchSpaceDigest,
        torch_opt_config: TorchOptConfig,
    ) -> Optional[Tensor]:
        try:
            return self.surrogate.best_in_sample_point(
                search_space_digest=search_space_digest,
                torch_opt_config=torch_opt_config,
            )[0]
        except ValueError:
            return None

[docs]    @copy_doc(TorchModel.evaluate_acquisition_function)
    def evaluate_acquisition_function(
        self,
        X: Tensor,
        search_space_digest: SearchSpaceDigest,
        torch_opt_config: TorchOptConfig,
        acq_options: Optional[dict[str, Any]] = None,
    ) -> Tensor:
        acqf = self._instantiate_acquisition(
            search_space_digest=search_space_digest,
            torch_opt_config=torch_opt_config,
            acq_options=acq_options,
        )
        return acqf.evaluate(X=X)

[docs]    @copy_doc(TorchModel.cross_validate)
    def cross_validate(
        self,
        datasets: Sequence[SupervisedDataset],
        X_test: Tensor,
        search_space_digest: SearchSpaceDigest,
        use_posterior_predictive: bool = False,
        **additional_model_inputs: Any,
    ) -> tuple[Tensor, Tensor]:
        # Will fail if metric_names exist across multiple models
        metric_names = sum((ds.outcome_names for ds in datasets), [])
        surrogate_labels = (
            [
                label
                for label, surrogate in self.surrogates.items()
                if any(metric in surrogate.outcomes for metric in metric_names)
            ]
            if len(self.surrogates) > 1
            else [*self.surrogates.keys()]
        )
        if len(surrogate_labels) != 1:
            raise UserInputError(
                "May not cross validate multiple Surrogates at once. Please input "
                f"datasets that exist on one Surrogate. {metric_names} spans "
                f"{surrogate_labels}"
            )
        surrogate_label = surrogate_labels[0]

        current_surrogates = self.surrogates
        # If we should be refitting but not warm-starting the refit, set
        # `state_dicts` to None to avoid loading it.
        state_dicts = (
            None
            if self.refit_on_cv and not self.warm_start_refit
            else {
                label: deepcopy(checked_cast(OrderedDict, surrogate.model.state_dict()))
                for label, surrogate in current_surrogates.items()
            }
        )

        # Temporarily set `_surrogates` to cloned surrogates to set
        # the training data on cloned surrogates to train set and
        # use it to predict the test point.
        surrogate_clones = {
            label: surrogate.clone_reset()
            for label, surrogate in self.surrogates.items()
        }
        self._surrogates = surrogate_clones
        # Remove the robust_digest since we do not want to use perturbations here.
        search_space_digest = dataclasses.replace(
            search_space_digest,
            robust_digest=None,
        )

        try:
            self.fit(
                datasets=datasets,
                search_space_digest=search_space_digest,
                state_dicts=state_dicts,
                refit=self.refit_on_cv,
                **additional_model_inputs,
            )
            X_test_prediction = self.predict_from_surrogate(
                surrogate_label=surrogate_label,
                X=X_test,
                use_posterior_predictive=use_posterior_predictive,
            )
        finally:
            # Reset the surrogates back to this model's surrogate, make
            # sure the cloned surrogate doesn't stay around if fit or
            # predict fail.
            self._surrogates = current_surrogates
        return X_test_prediction

    @property
    def dtype(self) -> torch.dtype:
        """Torch data type of the tensors in the training data used in the model,
        of which this ``Acquisition`` is a subcomponent.
        """
        dtypes = {
            label: surrogate.dtype for label, surrogate in self.surrogates.items()
        }

        dtypes_list = list(dtypes.values())
        if dtypes_list.count(dtypes_list[0]) != len(dtypes_list):
            raise NotImplementedError(
                f"Expected all Surrogates to have same dtype, found {dtypes}"
            )

        return dtypes_list[0]

    @property
    def device(self) -> torch.device:
        """Torch device type of the tensors in the training data used in the model,
        of which this ``Acquisition`` is a subcomponent.
        """

        devices = {
            label: surrogate.device for label, surrogate in self.surrogates.items()
        }

        devices_list = list(devices.values())
        if devices_list.count(devices_list[0]) != len(devices_list):
            raise NotImplementedError(
                f"Expected all Surrogates to have same device, found {devices}"
            )

        return devices_list[0]

    def _instantiate_acquisition(
        self,
        search_space_digest: SearchSpaceDigest,
        torch_opt_config: TorchOptConfig,
        acq_options: Optional[dict[str, Any]] = None,
    ) -> Acquisition:
        """Set a BoTorch acquisition function class for this model if needed and
        instantiate it.

        Returns:
            A BoTorch ``AcquisitionFunction`` instance.
        """
        if not self._botorch_acqf_class:
            if torch_opt_config.risk_measure is not None:
                # TODO[T131759261]: Implement selection of acqf for robust opt.
                # This will depend on the properties of the robust search space and
                # the risk measure being used.
                raise NotImplementedError
            self._botorch_acqf_class = choose_botorch_acqf_class(
                pending_observations=torch_opt_config.pending_observations,
                outcome_constraints=torch_opt_config.outcome_constraints,
                linear_constraints=torch_opt_config.linear_constraints,
                fixed_features=torch_opt_config.fixed_features,
                objective_thresholds=torch_opt_config.objective_thresholds,
                objective_weights=torch_opt_config.objective_weights,
            )

        return self.acquisition_class(
            surrogates=self.surrogates,
            botorch_acqf_class=self.botorch_acqf_class,
            search_space_digest=search_space_digest,
            torch_opt_config=torch_opt_config,
            options=acq_options,
        )

[docs]    @single_surrogate_only
    def feature_importances(self) -> np.ndarray:
        """Compute feature importances from the model.

        Caveat: This assumes the following:
            1. There is a single surrogate model (potentially a `ModelList`).
            2. We can get model lengthscales from `covar_module.base_kernel.lengthscale`

        Returns:
            The feature importances as a numpy array of size len(metrics) x 1 x dim
            where each row sums to 1.
        """
        return get_feature_importances_from_botorch_model(model=self.surrogate.model)

    @property
    def search_space_digest(self) -> SearchSpaceDigest:
        if self._search_space_digest is None:
            raise RuntimeError(
                "`search_space_digest` is not initialized. Must `fit` the model first."
            )
        return self._search_space_digest

    @search_space_digest.setter
    def search_space_digest(self, value: SearchSpaceDigest) -> None:
        raise RuntimeError("Setting search_space_digest manually is disallowed.")

    @property
    def outcomes_by_surrogate_label(self) -> dict[str, list[str]]:
        """Returns a dictionary mapping from surrogate label to a list of outcomes."""
        outcomes_by_surrogate_label = {}
        for k, v in self.surrogates.items():
            outcomes_by_surrogate_label[k] = v.outcomes
        return outcomes_by_surrogate_label

    @property
    def output_order(self) -> list[int]:
        if self._output_order is None:
            raise RuntimeError(
                "`output_order` is not initialized. Must `fit` the model first."
            )
        return self._output_order
Ax

stable

Source code for ax.models.torch.botorch_modular.model

Ax

Navigation

Related Topics