Source code for ax.metrics.sklearn

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from copy import deepcopy
from enum import Enum
from functools import lru_cache
from math import sqrt
from typing import Any, Dict, Tuple

import numpy as np
import pandas as pd
from ax.core.arm import Arm
from ax.core.base_trial import BaseTrial
from ax.core.data import Data
from ax.core.metric import Metric
from ax.utils.common.typeutils import checked_cast
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier, MLPRegressor


[docs]class SklearnModelType(Enum): RF: str = "rf" NN: str = "nn"
[docs]class SklearnDataset(Enum): DIGITS: str = "digits" BOSTON: str = "boston" CANCER: str = "cancer"
@lru_cache(maxsize=8) def _get_data(dataset) -> Dict[str, np.ndarray]: """Return sklearn dataset, loading and caching if necessary.""" if dataset is SklearnDataset.DIGITS: return datasets.load_digits() elif dataset is SklearnDataset.BOSTON: return datasets.load_boston() elif dataset is SklearnDataset.CANCER: return datasets.load_breast_cancer() else: raise NotImplementedError( f"{dataset.value} is not a currently supported {dataset.name}." )
[docs]class SklearnMetric(Metric): """A metric that trains and evaluates an sklearn model. The evaluation metric is the k-fold "score". The scoring function depends on the model type and task type (e.g. classification/regression), but higher scores are better. See sklearn documentation for supported parameters. In addition, this metric supports tuning the hidden_layer_size and the number of hidden layers (num_hidden_layers) of a NN model. """ def __init__( self, name: str, lower_is_better: bool = False, model_type: SklearnModelType = SklearnModelType.RF, dataset: SklearnDataset = SklearnDataset.DIGITS, observed_noise: bool = False, num_folds: int = 5, ) -> None: """Initialize metric. Args: name: Name of the metric. lower_is_better: Flag for metrics which should be minimized. model_type: Sklearn model type dataset: Sklearn Dataset for training/evaluating the model observed_noise: A boolean indicating whether to return the SE of the mean k-fold cross-validation score. num_folds: The number of cross-validation folds. """ super().__init__(name=name, lower_is_better=lower_is_better) self.dataset = dataset self.model_type = model_type self.num_folds = num_folds self.observed_noise = observed_noise if self.dataset is SklearnDataset.DIGITS: regression = False elif self.dataset is SklearnDataset.BOSTON: regression = True elif self.dataset is SklearnDataset.CANCER: regression = False else: raise NotImplementedError( f"{self.dataset.value} is not a currently supported {dataset.name}." ) if model_type is SklearnModelType.NN: if regression: self._model_cls = MLPRegressor else: self._model_cls = MLPClassifier elif model_type is SklearnModelType.RF: if regression: self._model_cls = RandomForestRegressor else: self._model_cls = RandomForestClassifier else: raise NotImplementedError( f"{model_type.value} is not a currently supported {model_type.name}." )
[docs] def clone(self) -> SklearnMetric: return self.__class__( name=self._name, lower_is_better=checked_cast(bool, self.lower_is_better), model_type=self.model_type, dataset=self.dataset, )
[docs] def fetch_trial_data( self, trial: BaseTrial, noisy: bool = True, **kwargs: Any ) -> Data: arm_names = [] means = [] sems = [] for name, arm in trial.arms_by_name.items(): arm_names.append(name) # TODO: Consider parallelizing evaluation of large batches # (e.g. via ProcessPoolExecutor) mean, sem = self.train_eval(arm=arm) means.append(mean) sems.append(sem) df = pd.DataFrame( { "arm_name": arm_names, "metric_name": self._name, "mean": means, "sem": sems, "trial_index": trial.index, } ) return Data(df=df)
[docs] def train_eval(self, arm: Arm) -> Tuple[float, float]: """Train and evaluate model. Args: arm: An arm specifying the parameters to evaluate. Returns: A two-element tuple containing: - The average k-fold CV score - The SE of the mean k-fold CV score if observed_noise is True and 'nan' otherwise """ data = _get_data(self.dataset) # cached X, y = data["data"], data["target"] params: Dict[str, Any] = deepcopy(arm.parameters) if self.model_type == SklearnModelType.NN: hidden_layer_size = params.pop("hidden_layer_size", None) if hidden_layer_size is not None: hidden_layer_size = checked_cast(int, hidden_layer_size) num_hidden_layers = checked_cast( int, params.pop("num_hidden_layers", 1) ) params["hidden_layer_sizes"] = [hidden_layer_size] * num_hidden_layers model = self._model_cls(**params) cv_scores = cross_val_score(model, X, y, cv=self.num_folds) mean = cv_scores.mean() sem = ( cv_scores.std() / sqrt(cv_scores.shape[0]) if self.observed_noise else float("nan") ) return mean, sem