Source code for ax.benchmark.benchmark_result

# Copyright (c) Meta Platforms, Inc. and affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from dataclasses import dataclass
from typing import List, Tuple

import numpy as np
import pandas as pd
from ax.core.experiment import Experiment
from ax.utils.common.base import Base
from ax.utils.common.equality import equality_typechecker

# NOTE: Do not add `from __future__ import annotatations` to this file. Adding
# `annotations` postpones evaluation of types and will break FBLearner's usage of
# `BenchmarkResult` as return type annotation, used for serialization and rendering
# in the UI.

[docs]@dataclass(frozen=True) class BenchmarkResult(Base): """The result of a single optimization loop from one (BenchmarkProblem, BenchmarkMethod) pair. More information will be added to the BenchmarkResult as the suite develops. """ name: str experiment: Experiment # Tracks best point if single-objective problem, max hypervolume if MOO optimization_trace: np.ndarray fit_time: float gen_time: float @equality_typechecker def __eq__(self, other: Base) -> bool: if not isinstance(other, BenchmarkResult): return False return ( == and self.experiment == other.experiment and (self.optimization_trace == other.optimization_trace).all() and self.fit_time == other.fit_time and self.gen_time == other.gen_time )
[docs]@dataclass(frozen=True) class AggregatedBenchmarkResult(Base): """The result of a benchmark test, or series of replications. Scalar data present in the BenchmarkResult is here represented as (mean, sem) pairs. More information will be added to the AggregatedBenchmarkResult as the suite develops. """ name: str experiments: List[Experiment] # mean, sem columns optimization_trace: pd.DataFrame # (mean, sem) pairs fit_time: Tuple[float, float] gen_time: Tuple[float, float] @equality_typechecker def __eq__(self, other: Base) -> bool: if not isinstance(other, AggregatedBenchmarkResult): return False return ( == and self.experiments == other.experiments and self.optimization_trace.eq(other.optimization_trace).all().all() and self.fit_time == other.fit_time and self.gen_time == other.gen_time )
[docs] @classmethod def from_benchmark_results( cls, results: List[BenchmarkResult], ) -> "AggregatedBenchmarkResult": return cls( name=results[0].name, experiments=[result.experiment for result in results], optimization_trace=pd.DataFrame( { "median": [ np.median( [ results[j].optimization_trace[i] for j in range(len(results)) ] ) for i in range(len(results[0].optimization_trace)) ], "mean": [ np.mean( [ results[j].optimization_trace[i] for j in range(len(results)) ] ) for i in range(len(results[0].optimization_trace)) ], "sem": [ cls._series_to_sem( series=[ results[j].optimization_trace[i] for j in range(len(results)) ] ) for i in range(len(results[0].optimization_trace)) ], } ), fit_time=cls._series_to_mean_sem( series=[result.fit_time for result in results] ), gen_time=cls._series_to_mean_sem( series=[result.gen_time for result in results] ), )
@staticmethod def _series_to_mean_sem(series: List[float]) -> Tuple[float, float]: return ( np.mean(series), AggregatedBenchmarkResult._series_to_sem(series=series), ) @staticmethod def _series_to_sem(series: List[float]) -> float: return np.std(series, ddof=1) / np.sqrt(len(series))
[docs]@dataclass(frozen=True) class ScoredBenchmarkResult(AggregatedBenchmarkResult): """An AggregatedBenchmarkResult normalized against some baseline method (for the same problem), typically Sobol. The score is calculated in such a way that 0 corresponds to performance equivalent with the baseline and 100 indicates the true optimum was found. """ baseline_result: AggregatedBenchmarkResult score: np.ndarray @equality_typechecker def __eq__(self, other: Base) -> bool: if not isinstance(other, ScoredBenchmarkResult): return False return ( super().__eq__(other) and self.baseline_result == other.baseline_result and (self.score == other.score).all() )
[docs] @classmethod def from_result_and_baseline( cls, aggregated_result: AggregatedBenchmarkResult, baseline_result: AggregatedBenchmarkResult, optimum: float, ) -> "ScoredBenchmarkResult": baseline = baseline_result.optimization_trace["mean"][ : len(aggregated_result.optimization_trace["mean"]) ] score = ( 100 * ( 1 - (aggregated_result.optimization_trace["mean"] - optimum) / (baseline - optimum) ) ).to_numpy() return cls(, experiments=aggregated_result.experiments, optimization_trace=aggregated_result.optimization_trace, fit_time=aggregated_result.fit_time, gen_time=aggregated_result.gen_time, baseline_result=baseline_result, score=score, )