Source code for ax.benchmark.benchmark_method
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# pyre-strict
from dataclasses import dataclass
from ax.core.experiment import Experiment
from ax.core.optimization_config import (
MultiObjectiveOptimizationConfig,
OptimizationConfig,
)
from ax.core.types import TParameterization
from ax.modelbridge.generation_strategy import GenerationStrategy
from ax.service.utils.best_point_mixin import BestPointMixin
from ax.service.utils.scheduler_options import SchedulerOptions, TrialType
from ax.utils.common.base import Base
from pyre_extensions import none_throws
[docs]
@dataclass(frozen=True)
class BenchmarkMethod(Base):
"""Benchmark method, represented in terms of Ax generation strategy (which tells us
which models to use when) and scheduler options (which tell us extra execution
information like maximum parallelism, early stopping configuration, etc.).
Note: If `BenchmarkMethod.scheduler_options.total_trials` is less than
`BenchmarkProblem.num_trials` then only the number of trials specified in the
former will be run.
Args:
name: String description.
generation_strategy: The `GenerationStrategy` to use.
scheduler_options: `SchedulerOptions` that specify options such as
`max_pending_trials`, `timeout_hours`, and `batch_size`. Can be
generated with sensible defaults for benchmarking with
`get_benchmark_scheduler_options`.
distribute_replications: Indicates whether the replications should be
run in a distributed manner. Ax itself does not use this attribute.
use_model_predictions_for_best_point: Whether to use model predictions
with `get_pareto_optimal_parameters` (if multi-objective) or
`BestPointMixin._get_best_trial` (if single-objective). However,
note that if multi-objective, best-point selection is not currently
supported and `get_pareto_optimal_parameters` will raise a
`NotImplementedError`.
"""
name: str
generation_strategy: GenerationStrategy
scheduler_options: SchedulerOptions
distribute_replications: bool = False
use_model_predictions_for_best_point: bool = False
[docs]
def get_best_parameters(
self,
experiment: Experiment,
optimization_config: OptimizationConfig,
n_points: int,
) -> list[TParameterization]:
"""
Get ``n_points`` promising points. NOTE: Only SOO with n_points = 1 is
supported.
The expected use case is that these points will be evaluated against an
oracle for hypervolume (if multi-objective) or for the value of the best
parameter (if single-objective).
For multi-objective cases, ``n_points > 1`` is needed. For SOO, ``n_points > 1``
reflects setups where we can choose some points which will then be
evaluated noiselessly or at high fidelity and then use the best one.
Args:
experiment: The experiment to get the data from. This should contain
values that would be observed in a realistic setting and not
contain oracle values.
optimization_config: The ``optimization_config`` for the corresponding
``BenchmarkProblem``.
n_points: The number of points to return.
"""
if isinstance(optimization_config, MultiObjectiveOptimizationConfig):
raise NotImplementedError(
"BenchmarkMethod.get_pareto_optimal_parameters is not currently "
"supported for multi-objective problems."
)
if n_points != 1:
raise NotImplementedError(
f"Currently only n_points=1 is supported. Got {n_points=}."
)
# SOO, n=1 case.
# Note: This has the same effect as Scheduler.get_best_parameters
result = BestPointMixin._get_best_trial(
experiment=experiment,
generation_strategy=self.generation_strategy,
optimization_config=optimization_config,
use_model_predictions=self.use_model_predictions_for_best_point,
)
if result is None:
# This can happen if no points are predicted to satisfy all outcome
# constraints.
return []
i, params, prediction = none_throws(result)
return [params]
[docs]
def get_benchmark_scheduler_options(
timeout_hours: int = 4,
batch_size: int = 1,
) -> SchedulerOptions:
"""The typical SchedulerOptions used in benchmarking.
Currently, regardless of batch size, all pending trials must complete before
new ones are generated. That is, when batch_size > 1, the design is "batch
sequential", and when batch_size = 1, the design is "fully sequential."
Args:
timeout_hours: The maximum amount of time (in hours) to run each
benchmark replication. Defaults to 4 hours.
batch_size: Number of trials to generate at once.
"""
return SchedulerOptions(
# No new candidates can be generated while any are pending.
# If batched, an entire batch must finish before the next can be
# generated.
max_pending_trials=1,
# Do not throttle, as is often necessary when polling real endpoints
init_seconds_between_polls=0,
min_seconds_before_poll=0,
timeout_hours=timeout_hours,
trial_type=TrialType.TRIAL if batch_size == 1 else TrialType.BATCH_TRIAL,
batch_size=batch_size,
)