Source code for ax.modelbridge.best_model_selector

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

from __future__ import annotations

from abc import ABC, abstractmethod
from collections.abc import Callable
from enum import Enum
from functools import partial
from typing import Any, Union

import numpy as np
import numpy.typing as npt
from ax.exceptions.core import UserInputError
from ax.modelbridge.model_spec import ModelSpec
from ax.utils.common.base import Base
from pyre_extensions import none_throws

# pyre-fixme[24]: Generic type `np.ndarray` expects 2 type parameters.
ARRAYLIKE = Union[np.ndarray, list[float], list[np.ndarray]]


[docs] class BestModelSelector(ABC, Base):
[docs] @abstractmethod def best_model(self, model_specs: list[ModelSpec]) -> ModelSpec: """Return the best ``ModelSpec`` based on some criteria. NOTE: The returned ``ModelSpec`` may be a different object than what was provided in the original list. It may be possible to clone and modify the original ``ModelSpec`` to produce one that performs better. """
[docs] class ReductionCriterion(Enum): """An enum for callables that are used for aggregating diagnostics over metrics and selecting the best diagnostic in ``SingleDiagnosticBestModelSelector``. NOTE: This is used to ensure serializability of the callables. """ # NOTE: Callables need to be wrapped in `partial` to be registered as members. # pyre-fixme[35]: Target cannot be annotated. MEAN: Callable[[ARRAYLIKE], npt.NDArray] = partial(np.mean) # pyre-fixme[35]: Target cannot be annotated. MIN: Callable[[ARRAYLIKE], npt.NDArray] = partial(np.min) # pyre-fixme[35]: Target cannot be annotated. MAX: Callable[[ARRAYLIKE], npt.NDArray] = partial(np.max) def __call__(self, array_like: ARRAYLIKE) -> npt.NDArray: return self.value(array_like)
[docs] class SingleDiagnosticBestModelSelector(BestModelSelector): """Choose the best model using a single cross-validation diagnostic. The input is a list of ``ModelSpec``, each corresponding to one model. The specified diagnostic is extracted from each of the models, its values (each of which corresponds to a separate metric) are aggregated with the aggregation function, the best one is determined with the criterion, and the index of the best diagnostic result is returned. Example: :: s = SingleDiagnosticBestModelSelector( diagnostic='Fisher exact test p', metric_aggregation=ReductionCriterion.MEAN, criterion=ReductionCriterion.MIN, model_cv_kwargs={"untransform": False}, ) best_model = s.best_model(model_specs=model_specs) Args: diagnostic: The name of the diagnostic to use, which should be a key in ``CVDiagnostic``. metric_aggregation: ``ReductionCriterion`` applied to the values of the diagnostic for a single model to produce a single number. criterion: ``ReductionCriterion`` used to determine which of the (aggregated) diagnostics is the best. model_cv_kwargs: Optional dictionary of kwargs to pass in while computing the cross validation diagnostics. """ def __init__( self, diagnostic: str, metric_aggregation: ReductionCriterion, criterion: ReductionCriterion, model_cv_kwargs: dict[str, Any] | None = None, ) -> None: self.diagnostic = diagnostic if not isinstance(metric_aggregation, ReductionCriterion) or not isinstance( criterion, ReductionCriterion ): raise UserInputError( "Both `metric_aggregation` and `criterion` must be " f"`ReductionCriterion`. Got {metric_aggregation=}, {criterion=}." ) if criterion == ReductionCriterion.MEAN: raise UserInputError( f"{criterion=} is not supported. Please use MIN or MAX." ) self.metric_aggregation = metric_aggregation self.criterion = criterion self.model_cv_kwargs = model_cv_kwargs
[docs] def best_model(self, model_specs: list[ModelSpec]) -> ModelSpec: """Return the best ``ModelSpec`` based on the specified diagnostic. Args: model_specs: List of ``ModelSpec`` to choose from. Returns: The best ``ModelSpec`` based on the specified diagnostic. """ for model_spec in model_specs: model_spec.cross_validate(model_cv_kwargs=self.model_cv_kwargs) aggregated_diagnostic_values = [ self.metric_aggregation( list(none_throws(model_spec.diagnostics)[self.diagnostic].values()) ) for model_spec in model_specs ] best_diagnostic = self.criterion(aggregated_diagnostic_values).item() best_index = aggregated_diagnostic_values.index(best_diagnostic) return model_specs[best_index]