Source code for ax.utils.stats.model_fit_stats

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Dict, Mapping, Optional, Protocol

import numpy as np
from scipy.stats import fisher_exact, norm, pearsonr, spearmanr

"""
################################ Model Fit Metrics ###############################
"""


[docs]class ModelFitMetricProtocol(Protocol): """Structural type for model fit metrics.""" def __call__( self, y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray ) -> float: ...
[docs]def compute_model_fit_metrics( y_obs: Mapping[str, np.ndarray], y_pred: Mapping[str, np.ndarray], se_pred: Mapping[str, np.ndarray], fit_metrics_dict: Mapping[str, ModelFitMetricProtocol], ) -> Dict[str, Dict[str, float]]: """Computes the model fit metrics for each experimental metric in the input dicts. Args: y_obs: A dictionary mapping from experimental metric name to observed values. y_pred: A dictionary mapping from experimental metric name to predicted values. se_pred: A dictionary mapping from experimental metric name to predicted standard errors. fit_metrics_dict: A dictionary mapping from *model fit* metric name to a ModelFitMetricProtocol function that evaluates a model fit metric. Returns: A nested dictionary mapping from *model fit* and *experimental* metric names to their corresponding model fit metrics values. """ metric_names = list(y_obs.keys()) return { fit_metric_name: { exp_metric_name: fit_metric( y_obs=y_obs[exp_metric_name], y_pred=y_pred[exp_metric_name], se_pred=se_pred[exp_metric_name], ) for exp_metric_name in metric_names } for fit_metric_name, fit_metric in fit_metrics_dict.items() }
[docs]def coefficient_of_determination( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: Optional[np.ndarray] = None, eps: float = 1e-12, ) -> float: """Computes coefficient of determination, the proportion of variance in `y_obs` accounted for by predictions `y_pred`. Args: y_obs: An array of observations for a single metric. y_pred: An array of the predicted values corresponding to y_obs. se_pred: Not used, kept for API compatibility. eps: A small constant to add to the denominator for numerical stability. Returns: The scalar coefficient of determination, "R squared". """ ss_res = ((y_obs - y_pred) ** 2).sum() ss_tot = ((y_obs - y_obs.mean()) ** 2).sum() return 1 - (ss_res / (ss_tot + eps))
[docs]def mean_of_the_standardized_error( # i.e. standardized bias y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray, ) -> float: """Computes the mean of the error standardized by the predictive standard deviation of the model `se_pred`. If the model makes good predictions and its uncertainty is quantified well, should be close to 0 and be normally distributed. NOTE: This assumes that `se_pred` is the predictive standard deviation of the *observations* of the objective `y`, not the predictive standard deviation of the objective `f` itself. In practice, this will matter for very noisy observations. Args: y_obs: An array of observations for a single metric. y_pred: An array of the predicted values corresponding to y_obs. se_pred: An array of the standard errors of the predicted values. Returns: The scalar mean of the standardized error. """ return ((y_obs - y_pred) / se_pred).mean()
[docs]def std_of_the_standardized_error( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray, ) -> float: """Standard deviation of the error standardized by the predictive standard deviation of the model `se_pred`. If the uncertainty is quantified well, should be close to 1. NOTE: This assumes that `se_pred` is the predictive standard deviation of the *observations* of the objective `y`, not the predictive standard deviation of the objective `f` itself. In practice, this will matter for very noisy observations. Args: y_obs: An array of observations for a single metric. y_pred: An array of the predicted values corresponding to y_obs. se_pred: An array of the standard errors of the predicted values. Returns: The scalar standard deviation of the standardized error. """ return ((y_obs - y_pred) / se_pred).std()
def _mean_prediction_ci( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray ) -> float: # Pyre does not allow float * np.ndarray. return float(np.mean(1.96 * 2 * se_pred / np.abs(y_obs))) def _log_likelihood( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray ) -> float: return float(np.sum(norm.logpdf(y_obs, loc=y_pred, scale=se_pred))) def _mape(y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray) -> float: """Mean absolute predictive error""" eps = np.finfo(y_obs.dtype).eps return float(np.mean(np.abs(y_pred - y_obs) / np.abs(y_obs).clip(min=eps))) def _mse(y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray) -> float: """Mean squared error""" return float(np.mean((y_pred - y_obs) ** 2)) def _wmape(y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray) -> float: """Weighted mean absolute predictive error""" eps = np.finfo(y_obs.dtype).eps return float(np.sum(np.abs(y_pred - y_obs)) / np.sum(np.abs(y_obs)).clip(min=eps)) def _total_raw_effect( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray ) -> float: min_y_obs = np.min(y_obs) return float((np.max(y_obs) - min_y_obs) / min_y_obs) def _correlation_coefficient( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray ) -> float: with np.errstate(invalid="ignore"): rho, _ = pearsonr(y_pred, y_obs) return float(rho) def _rank_correlation( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray ) -> float: with np.errstate(invalid="ignore"): rho, _ = spearmanr(y_pred, y_obs) return float(rho) def _fisher_exact_test_p( y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray ) -> float: """Perform a Fisher exact test on the contingency table constructed from agreement/disagreement between the predicted and observed data. Null hypothesis: Agreement between the predicted and observed data arises consistent with random chance P-value < 0.05 indicates that the null hypothesis may be rejected at the 5% level of significance i.e. the model's predictive performance would be unlikely to arise through random chance. Args: y_obs: NumPy array of observed data of shape (n_obs,) y_pred: NumPy array of predicted data of shape (n_obs,) se_pred: NumPy array of standard errors of shape (n_obs,), not used by the calculation but required for interface compatbility. Returns: The p-value of the Fisher exact test on the contingency table. """ if y_obs.ndim != 1 or y_pred.ndim != 1: raise ValueError("y_obs and y_pred must be 1-dimensional.") n_half = len(y_obs) // 2 top_obs = y_obs.argsort(axis=0)[-n_half:] top_est = y_pred.argsort(axis=0)[-n_half:] # Construct contingency table tp = len(set(top_est).intersection(top_obs)) fp = n_half - tp fn = n_half - tp tn = (len(y_obs) - n_half) - (n_half - tp) table = np.array([[tp, fp], [fn, tn]]) # Compute the test statistic _, p = fisher_exact(table, alternative="greater") return float(p)