Source code for ax.analysis.healthcheck.search_space_analysis

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

import json
from typing import Union

import numpy as np
import pandas as pd

from ax.analysis.analysis import AnalysisCardLevel
from ax.analysis.healthcheck.healthcheck_analysis import (
    HealthcheckAnalysis,
    HealthcheckAnalysisCard,
    HealthcheckStatus,
)
from ax.core.experiment import Experiment
from ax.core.generation_strategy_interface import GenerationStrategyInterface

from ax.core.parameter import ChoiceParameter, Parameter, RangeParameter
from ax.core.parameter_constraint import ParameterConstraint
from ax.core.search_space import SearchSpace
from ax.core.types import TParameterization
from ax.exceptions.core import UserInputError
from ax.utils.common.typeutils import checked_cast


[docs] class SearchSpaceAnalysis(HealthcheckAnalysis): r""" Analysis for checking wehther the search space of the experiment should be expanded. It checks whether the suggested parameters land at the boundary of the search space and recommends expanding the search space if the proportion of the suggested parameters that land at the boundary is above the threshold. """ def __init__( self, trial_index: int, boundary_proportion_threshold: float = 0.5 ) -> None: r""" Args: trial_index: The index of the trial to analyze. boundary_proportion_threshold: The threshold on the proportion of suggested candidates that land on the boundary of the search space for us to recommend expanding the search space. Returns None """ self.trial_index = trial_index self.boundary_proportion_threshold = boundary_proportion_threshold
[docs] def compute( self, experiment: Experiment | None = None, generation_strategy: GenerationStrategyInterface | None = None, ) -> HealthcheckAnalysisCard: r""" Args: experiment: Ax experiment. generation_strategy: Ax generation strategy. Returns: A HealthcheckAnalysisCard object with the information on the parameters and parameter constraints whose boundaries are recommended to be expanded. """ if experiment is None: raise UserInputError("SearchSpaceAnalysis requires an Experiment.") status = HealthcheckStatus.PASS subtitle = "Search space does not need to be updated." title_status = "Success" level = AnalysisCardLevel.LOW df = pd.DataFrame({"status": [status]}) trial = experiment.trials[self.trial_index] arms = trial.arms parametrizations = [arm.parameters for arm in arms] boundary_proportions_df = search_space_boundary_proportions( search_space=experiment.search_space, parametrizations=parametrizations, ) if np.any( boundary_proportions_df["proportion"] > self.boundary_proportion_threshold ): msg = boundary_proportions_message( boundary_proportions_df=boundary_proportions_df, boundary_proportion_threshold=self.boundary_proportion_threshold, ) status = HealthcheckStatus.WARNING subtitle = msg title_status = "Warning" level = AnalysisCardLevel.LOW df = boundary_proportions_df[["boundary", "proportion", "bound"]] df["status"] = status return HealthcheckAnalysisCard( name="SearchSpaceAnalysis", title=f"Ax Search Space Analysis {title_status}", blob=json.dumps({"status": status}), subtitle=subtitle, df=df, level=level, attributes={"trial_index": self.trial_index}, )
[docs] def search_space_boundary_proportions( search_space: SearchSpace, parametrizations: list[TParameterization], tol: float = 1e-6, ) -> pd.DataFrame: r""" Compute the fractions of parametrizations that landed at the parameter and parameter constraint boundaies of the search space. Args: search_space: Search space. parametrizations: A list of suggested parametrizations (parameter values). tol: Relative tolerance for the difference between parameters and the boundary bounds. Returns: A dataframe containing parameters along with the fractions of parametrizations that landed at the parameter lower and upper limit in case of range and ordered choice parameters and containing parameter constraints along with the fractions of parametrizations that landed at the constraint boundary. """ parameters_and_constraints = [] boundaries = [] proportions = [] bounds = [] num_parametrizations = len(parametrizations) for parameter_name, parameter in search_space.parameters.items(): if isinstance(parameter, RangeParameter): lower = parameter.lower upper = parameter.upper elif isinstance(parameter, ChoiceParameter) and parameter.is_ordered: values = [checked_cast(Union[int, float], v) for v in parameter.values] lower = min(values) upper = max(values) else: continue num_lb = 0 # counts how many parameters are equal to the boundary's lower bound num_ub = 0 # counts how many parameters are equal to the boundary's upper bound for parametrization in parametrizations: value = parametrization[parameter_name] if value is None: continue value = float(value) # for choice parameters, we check if the value is equal to the lower # or upper bound if isinstance(search_space.parameters[parameter_name], ChoiceParameter): num_lb += int(value == lower) num_ub += int(value == upper) else: # for range paramaters, we check if the value is within the tolerance if abs(value - float(lower)) < tol * (float(upper) - float(lower)): num_lb += 1 elif abs(value - float(upper)) < tol * (float(upper) - float(lower)): num_ub += 1 prop_lower = num_lb / float(num_parametrizations) prop_upper = num_ub / float(num_parametrizations) parameters_and_constraints.extend([parameter] * 2) boundaries.extend( [f"{parameter_name} = {lower}", f"{parameter_name} = {upper}"] ) proportions.extend([prop_lower, prop_upper]) bounds.extend(["lower", "upper"]) for pc in search_space.parameter_constraints: weighted_sums = [ sum( float(checked_cast(Union[int, float], parametrization[param])) * weight for param, weight in pc.constraint_dict.items() ) for parametrization in parametrizations ] prop = ( np.sum( [abs(weighted_sum - pc.bound) < tol for weighted_sum in weighted_sums] ) / num_parametrizations ) boundaries.append( " + ".join(f"{v}*{k}" for k, v in sorted(pc.constraint_dict.items())) + f" = {pc.bound}" ) proportions.append(prop) parameters_and_constraints.append(pc) bounds.append("upper") df = pd.DataFrame( { "parameter_or_constraint": parameters_and_constraints, "boundary": boundaries, "proportion": proportions, "bound": bounds, } ) return df
[docs] def boundary_proportions_message( boundary_proportions_df: pd.DataFrame, boundary_proportion_threshold: float = 0.5, ) -> str: r""" Construct a message explaning what parameter or parameter constraints bounds to change based on the proportions of the parametrizations that landed at the search spaces boundaries. A proportion should be above the theshold in order to recommend expanding the search space along the corresponding parameter or parameter constraint. Args: boundary_proportions_df: A dataframe with the following columns * parameter_or_constraint: the parameter or constraint object containing this row's search space boundary. * boundary: a string representation of the function defining this boundary. * proportion: the proportion of provided parameterizations within ``tol`` of this boundary. * bound: whether this is an upper or lower bound. A dataframe containing parameters and parameter constraints along with the proportions of the parametrizations that landed at the lower or upper bounds of the parameters or at the constraints boundary. boundary_proportion_threshold: The minimal proportion of suggested parametrizations that land at the boundary of the search space for us to recommend expanding the search space. Returns: A string explaning what parameter or parameter constraints bounds to change in order to expand the search space. """ msg = "" for _, row in boundary_proportions_df.iterrows(): if isinstance(row["parameter_or_constraint"], Parameter): parameter = row["parameter_or_constraint"] bound = row["bound"] prop = row["proportion"] if bound == "lower" and prop >= boundary_proportion_threshold: msg += ( f"\n - Parameter {parameter.name} values are at their lower bound " f"in {prop * 100:.2f}% of all suggested parameters, which exceeds " f"the threshold of {boundary_proportion_threshold * 100:.2f}%. " "Consider decreasing this lower bound of the search space and " "re-generating the candidates inside the expanded search space. " ) if bound == "upper" and prop >= boundary_proportion_threshold: msg += ( f"\n - Parameter {parameter.name} values are at its upper bound " f"in {prop * 100:.2f}% of all suggested parameters, which exceeds " f"the threshold of {boundary_proportion_threshold * 100:.2f}%. " "Consider increasing this upper bound of the search space and " "re-generating the candidates inside the expanded search space. " ) elif isinstance(row["parameter_or_constraint"], ParameterConstraint): pc = row["parameter_or_constraint"] prop = row["proportion"] if prop >= boundary_proportion_threshold: msg += ( f"\n - Parameter constraint {pc} is binding for {prop * 100:.2f}% " " of all suggested parameters, which exceeds the threshold of " f"{boundary_proportion_threshold * 100:.2f}%. " "Consider increasing this constraint bound and re-generating the " "candidates inside the expanded search space. " ) return msg