Source code for ax.service.utils.scheduler_options

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from dataclasses import dataclass, field
from enum import Enum
from logging import INFO
from typing import Any, Dict, Optional

from ax.early_stopping.strategies import BaseEarlyStoppingStrategy
from ax.global_stopping.strategies.base import BaseGlobalStoppingStrategy


[docs]class TrialType(Enum):
    TRIAL = 0
    BATCH_TRIAL = 1


[docs]@dataclass(frozen=True)
class SchedulerOptions:
    """Settings for a scheduler instance.

    Attributes:
        max_pending_trials: Maximum number of pending trials the scheduler
            can have ``STAGED`` or ``RUNNING`` at once, required. If looking
            to use ``Runner.poll_available_capacity`` as a primary guide for
            how many trials should be pending at a given time, set this limit
            to a high number, as an upper bound on number of trials that
            should not be exceeded.
        trial_type: Type of trials (1-arm ``Trial`` or multi-arm ``Batch
            Trial``) that will be deployed using the scheduler. Defaults
            to 1-arm `Trial`. NOTE: use ``BatchTrial`` only if need to
            evaluate multiple arms *together*, e.g. in an A/B-test
            influenced by data nonstationarity. For cases where just
            deploying multiple arms at once is beneficial but the trials
            are evaluated *independently*, implement ``run_trials`` method
            in scheduler subclass, to deploy multiple 1-arm trials at
            the same time.
        batch_size: If using BatchTrial the number of arms to be generated and
            deployed per trial.
        total_trials: Limit on number of trials a given ``Scheduler``
            should run. If no stopping criteria are implemented on
            a given scheduler, exhaustion of this number of trials
            will be used as default stopping criterion in
            ``Scheduler.run_all_trials``. Required to be non-null if
            using ``Scheduler.run_all_trials`` (not required for
            ``Scheduler.run_n_trials``).
        tolerated_trial_failure_rate: Fraction of trials in this
            optimization that are allowed to fail without the whole
            optimization ending. Expects value between 0 and 1.
            NOTE: Failure rate checks begin once
            min_failed_trials_for_failure_rate_check trials have
            failed; after that point if the ratio of failed trials
            to total trials ran so far exceeds the failure rate,
            the optimization will halt.
        min_failed_trials_for_failure_rate_check: The minimum number
            of trials that must fail in `Scheduler` in order to start
            checking failure rate.
        log_filepath: File, to which to write optimization logs.
        logging_level: Minimum level of logging statements to log,
            defaults to ``logging.INFO``.
        ttl_seconds_for_trials: Optional TTL for all trials created
            within this ``Scheduler``, in seconds. Trials that remain
            ``RUNNING`` for more than their TTL seconds will be marked
            ``FAILED`` once the TTL elapses and may be re-suggested by
            the Ax optimization models.
        init_seconds_between_polls: Initial wait between rounds of
            polling, in seconds. Relevant if using the default wait-
            for-completed-runs functionality of the base ``Scheduler``
            (if ``wait_for_completed_trials_and_report_results`` is not
            overridden). With the default waiting, every time a poll
            returns that no trial evaluations completed, wait
            time will increase; once some completed trial evaluations
            are found, it will reset back to this value. Specify 0
            to not introduce any wait between polls.
        min_seconds_before_poll: Minimum number of seconds between
            beginning to run a trial and the first poll to check
            trial status.
        timeout_hours: Number of hours after which the optimization will abort.
        seconds_between_polls_backoff_factor: The rate at which the poll
            interval increases.
        run_trials_in_batches: If True and ``poll_available_capacity`` is
            implemented to return non-null results, trials will be dispatched
            in groups via `run_trials` instead of one-by-one via ``run_trial``.
            This allows to save time, IO calls or computation in cases where
            dispatching trials in groups is more efficient then sequential
            deployment. The size of the groups will be determined as
            the minimum of ``self.poll_available_capacity()`` and the number
            of generator runs that the generation strategy is able to produce
            without more data or reaching its allowed max paralellism limit.
        debug_log_run_metadata: Whether to log run_metadata for debugging purposes.
        early_stopping_strategy: A ``BaseEarlyStoppingStrategy`` that determines
            whether a trial should be stopped given the current state of
            the experiment. Used in ``should_stop_trials_early``.
        global_stopping_strategy: A ``BaseGlobalStoppingStrategy`` that determines
            whether the full optimization should be stopped or not.
        suppress_storage_errors_after_retries: Whether to fully suppress SQL
            storage-related errors if encountered, after retrying the call
            multiple times. Only use if SQL storage is not important for the given
            use case, since this will only log, but not raise, an exception if
            it's encountered while saving to DB or loading from it.
        wait_for_running_trials: Whether the scheduler should wait for running trials
            or exit.
        fetch_kwargs: Kwargs to be used when fetching data.
        validate_metrics: Whether to raise an error if there is a problem with the
            metrics attached to the experiment.
        status_quo_weight: The weight of the status quo arm. This is only used
            if the scheduler is using a BatchTrial. This requires that the status_quo
            be set on the experiment.
        enforce_immutable_search_space_and_opt_config: Whether to enforce that the
            search space and optimization config are immutable.  If true, will add
            `"immutable_search_space_and_opt_config": True` to experiment properties
    """

    max_pending_trials: int = 10
    trial_type: TrialType = TrialType.TRIAL
    batch_size: Optional[int] = None
    total_trials: Optional[int] = None
    tolerated_trial_failure_rate: float = 0.5
    min_failed_trials_for_failure_rate_check: int = 5
    log_filepath: Optional[str] = None
    logging_level: int = INFO
    ttl_seconds_for_trials: Optional[int] = None
    init_seconds_between_polls: Optional[int] = 1
    min_seconds_before_poll: float = 1.0
    seconds_between_polls_backoff_factor: float = 1.5
    timeout_hours: Optional[float] = None
    run_trials_in_batches: bool = False
    debug_log_run_metadata: bool = False
    early_stopping_strategy: Optional[BaseEarlyStoppingStrategy] = None
    global_stopping_strategy: Optional[BaseGlobalStoppingStrategy] = None
    suppress_storage_errors_after_retries: bool = False
    wait_for_running_trials: bool = True
    fetch_kwargs: Dict[str, Any] = field(default_factory=dict)
    validate_metrics: bool = True
    status_quo_weight: float = 0.0
    enforce_immutable_search_space_and_opt_config: bool = True
Ax

0.3.7

Source code for ax.service.utils.scheduler_options

Ax

Navigation

Related Topics