Source code for ax.models.torch.botorch

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

from __future__ import annotations

import warnings
from copy import deepcopy
from logging import Logger
from typing import Any, Callable, Optional, Union

import numpy as np
import torch
from ax.core.search_space import SearchSpaceDigest
from ax.core.types import TCandidateMetadata
from ax.exceptions.core import DataRequiredError
from ax.models.torch.botorch_defaults import (
    get_and_fit_model,
    get_qLogNEI,
    recommend_best_observed_point,
    scipy_optimizer,
    TAcqfConstructor,
)
from ax.models.torch.utils import (
    _datasets_to_legacy_inputs,
    _get_X_pending_and_observed,
    _to_inequality_constraints,
    normalize_indices,
    predict_from_model,
    subset_model,
)
from ax.models.torch_base import TorchGenResults, TorchModel, TorchOptConfig
from ax.models.types import TConfig
from ax.utils.common.constants import Keys
from ax.utils.common.docutils import copy_doc
from ax.utils.common.logger import get_logger
from ax.utils.common.typeutils import checked_cast
from botorch.acquisition.acquisition import AcquisitionFunction
from botorch.models import ModelList
from botorch.models.model import Model
from botorch.utils.datasets import SupervisedDataset
from botorch.utils.transforms import is_ensemble
from torch import Tensor
from torch.nn import ModuleList  # @manual

logger: Logger = get_logger(__name__)


# pyre-fixme[33]: Aliased annotation cannot contain `Any`.
TModelConstructor = Callable[
    [
        list[Tensor],
        list[Tensor],
        list[Tensor],
        list[int],
        list[int],
        list[str],
        Optional[dict[str, Tensor]],
        Any,
    ],
    Model,
]
TModelPredictor = Callable[[Model, Tensor, bool], tuple[Tensor, Tensor]]


# pyre-fixme[33]: Aliased annotation cannot contain `Any`.
TOptimizer = Callable[
    [
        AcquisitionFunction,
        Tensor,
        int,
        Optional[list[tuple[Tensor, Tensor, float]]],
        Optional[list[tuple[Tensor, Tensor, float]]],
        Optional[dict[int, float]],
        Optional[Callable[[Tensor], Tensor]],
        Any,
    ],
    tuple[Tensor, Tensor],
]
TBestPointRecommender = Callable[
    [
        TorchModel,
        list[tuple[float, float]],
        Tensor,
        Optional[tuple[Tensor, Tensor]],
        Optional[tuple[Tensor, Tensor]],
        Optional[dict[int, float]],
        Optional[TConfig],
        Optional[dict[int, float]],
    ],
    Optional[Tensor],
]


[docs]class BotorchModel(TorchModel): r""" Customizable botorch model. By default, this uses a noisy Log Expected Improvement (qLogNEI) acquisition function on top of a model made up of separate GPs, one for each outcome. This behavior can be modified by providing custom implementations of the following components: - a `model_constructor` that instantiates and fits a model on data - a `model_predictor` that predicts outcomes using the fitted model - a `acqf_constructor` that creates an acquisition function from a fitted model - a `acqf_optimizer` that optimizes the acquisition function - a `best_point_recommender` that recommends a current "best" point (i.e., what the model recommends if the learning process ended now) Args: model_constructor: A callable that instantiates and fits a model on data, with signature as described below. model_predictor: A callable that predicts using the fitted model, with signature as described below. acqf_constructor: A callable that creates an acquisition function from a fitted model, with signature as described below. acqf_optimizer: A callable that optimizes the acquisition function, with signature as described below. best_point_recommender: A callable that recommends the best point, with signature as described below. refit_on_cv: If True, refit the model for each fold when performing cross-validation. warm_start_refitting: If True, start model refitting from previous model parameters in order to speed up the fitting process. prior: An optional dictionary that contains the specification of GP model prior. Currently, the keys include: - covar_module_prior: prior on covariance matrix e.g. {"lengthscale_prior": GammaPrior(3.0, 6.0)}. - type: type of prior on task covariance matrix e.g.`LKJCovariancePrior`. - sd_prior: A scalar prior over nonnegative numbers, which is used for the default LKJCovariancePrior task_covar_prior. - eta: The eta parameter on the default LKJ task_covar_prior. Call signatures: :: model_constructor( Xs, Ys, Yvars, task_features, fidelity_features, metric_names, state_dict, **kwargs, ) -> model Here `Xs`, `Ys`, `Yvars` are lists of tensors (one element per outcome), `task_features` identifies columns of Xs that should be modeled as a task, `fidelity_features` is a list of ints that specify the positions of fidelity parameters in 'Xs', `metric_names` provides the names of each `Y` in `Ys`, `state_dict` is a pytorch module state dict, and `model` is a BoTorch `Model`. Optional kwargs are being passed through from the `BotorchModel` constructor. This callable is assumed to return a fitted BoTorch model that has the same dtype and lives on the same device as the input tensors. :: model_predictor(model, X) -> [mean, cov] Here `model` is a fitted botorch model, `X` is a tensor of candidate points, and `mean` and `cov` are the posterior mean and covariance, respectively. :: acqf_constructor( model, objective_weights, outcome_constraints, X_observed, X_pending, **kwargs, ) -> acq_function Here `model` is a botorch `Model`, `objective_weights` is a tensor of weights for the model outputs, `outcome_constraints` is a tuple of tensors describing the (linear) outcome constraints, `X_observed` are previously observed points, and `X_pending` are points whose evaluation is pending. `acq_function` is a BoTorch acquisition function crafted from these inputs. For additional details on the arguments, see `get_qLogNEI`. :: acqf_optimizer( acq_function, bounds, n, inequality_constraints, equality_constraints, fixed_features, rounding_func, **kwargs, ) -> candidates Here `acq_function` is a BoTorch `AcquisitionFunction`, `bounds` is a tensor containing bounds on the parameters, `n` is the number of candidates to be generated, `inequality_constraints` are inequality constraints on parameter values, `fixed_features` specifies features that should be fixed during generation, and `rounding_func` is a callback that rounds an optimization result appropriately. `candidates` is a tensor of generated candidates. For additional details on the arguments, see `scipy_optimizer`. :: best_point_recommender( model, bounds, objective_weights, outcome_constraints, linear_constraints, fixed_features, model_gen_options, target_fidelities, ) -> candidates Here `model` is a TorchModel, `bounds` is a list of tuples containing bounds on the parameters, `objective_weights` is a tensor of weights for the model outputs, `outcome_constraints` is a tuple of tensors describing the (linear) outcome constraints, `linear_constraints` is a tuple of tensors describing constraints on the design, `fixed_features` specifies features that should be fixed during generation, `model_gen_options` is a config dictionary that can contain model-specific options, and `target_fidelities` is a map from fidelity feature column indices to their respective target fidelities, used for multi-fidelity optimization problems. % TODO: refer to an example. """ dtype: Optional[torch.dtype] device: Optional[torch.device] Xs: list[Tensor] Ys: list[Tensor] Yvars: list[Tensor] _model: Optional[Model] _search_space_digest: Optional[SearchSpaceDigest] = None def __init__( self, model_constructor: TModelConstructor = get_and_fit_model, model_predictor: TModelPredictor = predict_from_model, acqf_constructor: TAcqfConstructor = get_qLogNEI, # pyre-fixme[9]: acqf_optimizer declared/used type mismatch acqf_optimizer: TOptimizer = scipy_optimizer, best_point_recommender: TBestPointRecommender = recommend_best_observed_point, refit_on_cv: bool = False, warm_start_refitting: bool = True, use_input_warping: bool = False, use_loocv_pseudo_likelihood: bool = False, prior: Optional[dict[str, Any]] = None, **kwargs: Any, ) -> None: warnings.warn( "The legacy `BotorchModel` and its subclasses, including the current" f"class `{self.__class__.__name__}`, slated for deprecation. " "These models will not be supported going forward and may be " "fully removed in a future release. Please consider using the " "Modular BoTorch Model (MBM) setup (ax/models/torch/botorch_modular) " "instead. If you run into a use case that is not supported by MBM, " "please raise this with an issue at https://github.com/facebook/Ax", DeprecationWarning, ) self.model_constructor = model_constructor self.model_predictor = model_predictor self.acqf_constructor = acqf_constructor self.acqf_optimizer = acqf_optimizer self.best_point_recommender = best_point_recommender # pyre-fixme[4]: Attribute must be annotated. self._kwargs = kwargs self.refit_on_cv = refit_on_cv self.warm_start_refitting = warm_start_refitting self.use_input_warping = use_input_warping self.use_loocv_pseudo_likelihood = use_loocv_pseudo_likelihood self.prior = prior self._model: Optional[Model] = None self.Xs = [] self.Ys = [] self.Yvars = [] self.dtype = None self.device = None self.task_features: list[int] = [] self.fidelity_features: list[int] = [] self.metric_names: list[str] = []
[docs] @copy_doc(TorchModel.fit) def fit( self, datasets: list[SupervisedDataset], search_space_digest: SearchSpaceDigest, candidate_metadata: Optional[list[list[TCandidateMetadata]]] = None, ) -> None: if len(datasets) == 0: raise DataRequiredError("BotorchModel.fit requires non-empty data sets.") self.Xs, self.Ys, self.Yvars = _datasets_to_legacy_inputs(datasets=datasets) self.metric_names = sum((ds.outcome_names for ds in datasets), []) # Store search space info for later use (e.g. during generation) self._search_space_digest = search_space_digest self.dtype = self.Xs[0].dtype self.device = self.Xs[0].device self.task_features = normalize_indices( search_space_digest.task_features, d=self.Xs[0].size(-1) ) self.fidelity_features = normalize_indices( search_space_digest.fidelity_features, d=self.Xs[0].size(-1) ) extra_kwargs = {} if self.prior is None else {"prior": self.prior} self._model = self.model_constructor( # pyre-ignore [28] Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, task_features=self.task_features, fidelity_features=self.fidelity_features, metric_names=self.metric_names, use_input_warping=self.use_input_warping, use_loocv_pseudo_likelihood=self.use_loocv_pseudo_likelihood, **extra_kwargs, **self._kwargs, )
[docs] @copy_doc(TorchModel.predict) def predict(self, X: Tensor) -> tuple[Tensor, Tensor]: return self.model_predictor(model=self.model, X=X) # pyre-ignore [28]
[docs] @copy_doc(TorchModel.gen) def gen( self, n: int, search_space_digest: SearchSpaceDigest, torch_opt_config: TorchOptConfig, ) -> TorchGenResults: options = torch_opt_config.model_gen_options or {} acf_options = options.get(Keys.ACQF_KWARGS, {}) optimizer_options = options.get(Keys.OPTIMIZER_KWARGS, {}) if search_space_digest.fidelity_features: raise NotImplementedError( "Base BotorchModel does not support fidelity_features." ) X_pending, X_observed = _get_X_pending_and_observed( Xs=self.Xs, objective_weights=torch_opt_config.objective_weights, bounds=search_space_digest.bounds, pending_observations=torch_opt_config.pending_observations, outcome_constraints=torch_opt_config.outcome_constraints, linear_constraints=torch_opt_config.linear_constraints, fixed_features=torch_opt_config.fixed_features, fit_out_of_design=torch_opt_config.fit_out_of_design, ) model = self.model # subset model only to the outcomes we need for the optimization 357 if options.get(Keys.SUBSET_MODEL, True): subset_model_results = subset_model( model=model, objective_weights=torch_opt_config.objective_weights, outcome_constraints=torch_opt_config.outcome_constraints, ) model = subset_model_results.model objective_weights = subset_model_results.objective_weights outcome_constraints = subset_model_results.outcome_constraints else: objective_weights = torch_opt_config.objective_weights outcome_constraints = torch_opt_config.outcome_constraints bounds_ = torch.tensor( search_space_digest.bounds, dtype=self.dtype, device=self.device ) bounds_ = bounds_.transpose(0, 1) botorch_rounding_func = get_rounding_func(torch_opt_config.rounding_func) from botorch.exceptions.errors import UnsupportedError # pyre-fixme[53]: Captured variable `X_observed` is not annotated. # pyre-fixme[53]: Captured variable `X_pending` is not annotated. # pyre-fixme[53]: Captured variable `acf_options` is not annotated. # pyre-fixme[53]: Captured variable `botorch_rounding_func` is not annotated. # pyre-fixme[53]: Captured variable `bounds_` is not annotated. # pyre-fixme[53]: Captured variable `model` is not annotated. # pyre-fixme[53]: Captured variable `objective_weights` is not annotated. # pyre-fixme[53]: Captured variable `optimizer_options` is not annotated. # pyre-fixme[53]: Captured variable `outcome_constraints` is not annotated. def make_and_optimize_acqf(override_qmc: bool = False) -> tuple[Tensor, Tensor]: add_kwargs = {"qmc": False} if override_qmc else {} acquisition_function = self.acqf_constructor( model=model, objective_weights=objective_weights, outcome_constraints=outcome_constraints, X_observed=X_observed, X_pending=X_pending, **acf_options, **add_kwargs, ) acquisition_function = checked_cast( AcquisitionFunction, acquisition_function ) # pyre-ignore: [28] candidates, expected_acquisition_value = self.acqf_optimizer( acq_function=checked_cast(AcquisitionFunction, acquisition_function), bounds=bounds_, n=n, inequality_constraints=_to_inequality_constraints( linear_constraints=torch_opt_config.linear_constraints ), fixed_features=torch_opt_config.fixed_features, rounding_func=botorch_rounding_func, **optimizer_options, ) return candidates, expected_acquisition_value try: candidates, expected_acquisition_value = make_and_optimize_acqf() except UnsupportedError as e: # untested if "SobolQMCSampler only supports dimensions" in str(e): # dimension too large for Sobol, let's use IID candidates, expected_acquisition_value = make_and_optimize_acqf( override_qmc=True ) else: raise e gen_metadata = {} if expected_acquisition_value.numel() > 0: gen_metadata["expected_acquisition_value"] = ( expected_acquisition_value.tolist() ) return TorchGenResults( points=candidates.detach().cpu(), weights=torch.ones(n, dtype=self.dtype), gen_metadata=gen_metadata, )
[docs] @copy_doc(TorchModel.best_point) def best_point( self, search_space_digest: SearchSpaceDigest, torch_opt_config: TorchOptConfig, ) -> Optional[Tensor]: if torch_opt_config.is_moo: raise NotImplementedError( "Best observed point is incompatible with MOO problems." ) target_fidelities = { k: v for k, v in search_space_digest.target_values.items() if k in search_space_digest.fidelity_features } return self.best_point_recommender( # pyre-ignore [28] model=self, bounds=search_space_digest.bounds, objective_weights=torch_opt_config.objective_weights, outcome_constraints=torch_opt_config.outcome_constraints, linear_constraints=torch_opt_config.linear_constraints, fixed_features=torch_opt_config.fixed_features, model_gen_options=torch_opt_config.model_gen_options, target_fidelities=target_fidelities, )
[docs] @copy_doc(TorchModel.cross_validate) def cross_validate( # pyre-ignore [14]: `search_space_digest` arg not needed here self, datasets: list[SupervisedDataset], X_test: Tensor, use_posterior_predictive: bool = False, **kwargs: Any, ) -> tuple[Tensor, Tensor]: if self._model is None: raise RuntimeError("Cannot cross-validate model that has not been fitted.") if self.refit_on_cv: state_dict = None else: state_dict = deepcopy(self.model.state_dict()) Xs, Ys, Yvars = _datasets_to_legacy_inputs(datasets=datasets) model = self.model_constructor( # pyre-ignore: [28] Xs=Xs, Ys=Ys, Yvars=Yvars, task_features=self.task_features, state_dict=state_dict, fidelity_features=self.fidelity_features, metric_names=self.metric_names, refit_model=self.refit_on_cv, use_input_warping=self.use_input_warping, use_loocv_pseudo_likelihood=self.use_loocv_pseudo_likelihood, **self._kwargs, ) # pyre-ignore: [28] return self.model_predictor( model=model, X=X_test, use_posterior_predictive=use_posterior_predictive )
[docs] def feature_importances(self) -> np.ndarray: return get_feature_importances_from_botorch_model(model=self._model)
@property def search_space_digest(self) -> SearchSpaceDigest: if self._search_space_digest is None: raise RuntimeError( "`search_space_digest` is not initialized. Please fit the model first." ) return self._search_space_digest @search_space_digest.setter def search_space_digest(self, value: SearchSpaceDigest) -> None: raise RuntimeError("Setting search_space_digest manually is disallowed.") @property def model(self) -> Model: if self._model is None: raise RuntimeError( "`model` is not initialized. Please fit the model first." ) return self._model @model.setter def model(self, model: Model) -> None: self._model = model # there are a few places that set model directly
[docs]def get_rounding_func( rounding_func: Optional[Callable[[Tensor], Tensor]] ) -> Optional[Callable[[Tensor], Tensor]]: if rounding_func is None: botorch_rounding_func = rounding_func else: # make sure rounding_func is properly applied to q- and t-batches def botorch_rounding_func(X: Tensor) -> Tensor: batch_shape, d = X.shape[:-1], X.shape[-1] X_round = torch.stack( [rounding_func(x) for x in X.view(-1, d)] # pyre-ignore: [16] ) return X_round.view(*batch_shape, d) return botorch_rounding_func
[docs]def get_feature_importances_from_botorch_model( model: Union[Model, ModuleList, None], ) -> np.ndarray: """Get feature importances from a list of BoTorch models. Args: models: BoTorch model to get feature importances from. Returns: The feature importances as a numpy array where each row sums to 1. """ if model is None: raise RuntimeError( "Cannot calculate feature_importances without a fitted model." "Call `fit` first." ) elif isinstance(model, ModelList): models = model.models else: models = [model] lengthscales = [] for m in models: try: # this can be a ModelList of a SAAS and STGP, so this is a necessary way # to get the lengthscale if hasattr(m.covar_module, "base_kernel"): ls = m.covar_module.base_kernel.lengthscale else: ls = m.covar_module.lengthscale except AttributeError: ls = None if ls is None or ls.shape[-1] != m.train_inputs[0].shape[-1]: # TODO: We could potentially set the feature importances to NaN in this # case, but this require knowing the batch dimension of this model. # Consider supporting in the future. raise NotImplementedError( "Failed to extract lengthscales from `m.covar_module` " "and `m.covar_module.base_kernel`" ) if ls.ndim == 2: ls = ls.unsqueeze(0) if is_ensemble(m): # Take the median over the model batch dimension ls = torch.quantile(ls, q=0.5, dim=0, keepdim=True) lengthscales.append(ls) lengthscales = torch.cat(lengthscales, dim=0) feature_importances = (1 / lengthscales).detach().cpu() # pyre-ignore # Make sure the sum of feature importances is 1.0 for each metric feature_importances /= feature_importances.sum(dim=-1, keepdim=True) return feature_importances.numpy()