Source code for ax.plot.feature_importances

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

from logging import Logger
from typing import Any, Dict, Optional, Union

import numpy as np
import pandas as pd
import plotly.graph_objs as go
from ax.core.parameter import ChoiceParameter
from ax.exceptions.core import NoDataError
from ax.modelbridge import ModelBridge
from ax.plot.base import AxPlotConfig, AxPlotTypes
from ax.plot.helper import compose_annotation
from ax.utils.common.logger import get_logger
from plotly import subplots

logger: Logger = get_logger(__name__)


[docs]def plot_feature_importance_plotly(df: pd.DataFrame, title: str) -> go.Figure: if df.empty: raise NoDataError("No Data on Feature Importances found.") df.set_index(df.columns[0], inplace=True) data = [ go.Bar(y=df.index, x=df[column_name], name=column_name, orientation="h") for column_name in df.columns ] fig = subplots.make_subplots( rows=len(df.columns), cols=1, subplot_titles=df.columns, print_grid=False, shared_xaxes=True, ) for idx, item in enumerate(data): fig.append_trace(item, idx + 1, 1) # pyre-ignore[16] fig.layout.showlegend = False fig.layout.margin = go.layout.Margin( l=8 * min(max(len(idx) for idx in df.index), 75) # noqa E741 ) fig.layout.title = title return fig
[docs]def plot_feature_importance(df: pd.DataFrame, title: str) -> AxPlotConfig: """Wrapper method to convert plot_feature_importance_plotly to AxPlotConfig""" return AxPlotConfig( data=plot_feature_importance_plotly(df, title), plot_type=AxPlotTypes.GENERIC )
[docs]def plot_feature_importance_by_metric_plotly(model: ModelBridge) -> go.Figure: """One plot per feature, showing importances by metric.""" importances = [] for metric_name in sorted(model.metric_names): try: vals: Dict[str, Any] = model.feature_importances(metric_name) vals["index"] = metric_name importances.append(vals) except NotImplementedError: logger.warning( f"Model for {metric_name} does not support feature importances." ) if not importances: raise NotImplementedError( "Feature importances could not be calculated for any metric" ) df = pd.DataFrame(importances) # plot_feature_importance expects index in first column df = df.reindex(columns=(["index"] + [a for a in df.columns if a != "index"])) plot_fi = plot_feature_importance_plotly(df, "Parameter Sensitivity by Metric") num_subplots = len(df.columns) num_features = len(df) # Include per-subplot margin for subplot titles (feature names). plot_fi["layout"]["height"] = num_subplots * (num_features + 1) * 50 return plot_fi
[docs]def plot_feature_importance_by_metric(model: ModelBridge) -> AxPlotConfig: """Wrapper method to convert plot_feature_importance_by_metric_plotly to AxPlotConfig""" return AxPlotConfig( data=plot_feature_importance_by_metric_plotly(model), plot_type=AxPlotTypes.GENERIC, )
[docs]def plot_feature_importance_by_feature_plotly( model: Optional[ModelBridge] = None, sensitivity_values: Optional[Dict[str, Dict[str, Union[float, np.ndarray]]]] = None, relative: bool = False, caption: str = "", importance_measure: str = "", label_dict: Optional[Dict[str, str]] = None, ) -> go.Figure: """One plot per metric, showing importances by feature. If sensitivity values are not all positive, the absolute value will be shown and color will indicate positive or negative sign. Args: model: A model with a ``feature_importances`` method. sensitivity_values: The sensitivity values for each metric in a dict format. It takes the following format if only the sensitivity value is plotted: `{"metric1":{"parameter1":value1,"parameter2":value2 ...} ...}` It takes the following format if the sensitivity value and standard error are plotted: `{"metric1":{"parameter1":[value1,var,se], "parameter2":[[value2,var,se]]...}...}}`. relative: Whether to normalize feature importances so that they add to 1. caption: An HTML-formatted string to place at the bottom of the plot. importance_measure: The name of the importance metric to be added to the title. label_dict: A dictionary mapping metric names to short labels. Returns a go.Figure of feature importances. """ if sensitivity_values is None: if model is None: raise ValueError( "A model is required when sensitivity values are not provided" ) try: sensitivity_values = { metric_name: model.feature_importances(metric_name) for i, metric_name in enumerate(sorted(model.metric_names)) } except NotImplementedError: raise NotImplementedError( "Feature importances cannot be computed by the model." ) if label_dict is not None: sensitivity_values = { # pyre-ignore label_dict.get(metric_name, metric_name): v for metric_name, v in sensitivity_values.items() } traces = [] dropdown = [] categorical_features = [] if model is not None: categorical_features = [ name for name, par in model.model_space.parameters.items() if isinstance(par, ChoiceParameter) and not par.is_ordered ] for i, metric_name in enumerate(sorted(sensitivity_values.keys())): importances = sensitivity_values[metric_name] factor_col = "Factor" importance_col = "Importance" sign_col = "Sign" error_plot = np.asarray(next(iter(importances.values()))).size > 1 if error_plot: importance_col_se = "SE" df = pd.DataFrame( [ { factor_col: factor, importance_col: np.asarray(importance)[0], importance_col_se: np.asarray(importance)[2], sign_col: ( 0 if factor in categorical_features else 2 * (np.asarray(importance)[0] >= 0).astype(int) - 1 ), } for factor, importance in importances.items() ] ) df[importance_col] = df[importance_col].abs() df = df.sort_values(importance_col) error_x = {"type": "data", "array": df[importance_col_se], "visible": True} else: df = pd.DataFrame( [ { factor_col: factor, importance_col: importance, sign_col: ( 0 if factor in categorical_features else 2 * (importance >= 0).astype(int) - 1 ), } for factor, importance in importances.items() ] ) df[importance_col] = df[importance_col].abs() df = df.sort_values(importance_col) error_x = None if relative: df[importance_col] = df[importance_col].div(df[importance_col].sum()) colors = {-1: "darkorange", 0: "gray", 1: "steelblue"} names = { -1: "Decreases metric", 0: "Affects metric (categorical choice)", 1: "Increases metric", } legend_counter = {-1: 0, 0: 0, 1: 0} all_positive = all(df[sign_col] >= 0) for _, row in df.iterrows(): traces.append( go.Bar( name=names[row[sign_col]], orientation="h", visible=i == 0, x=np.array([row[importance_col]]), y=np.array([row[factor_col]]), error_x=error_x, opacity=0.8, marker_color=colors[row[sign_col]], showlegend=(not all_positive) and (legend_counter[row[sign_col]] == 0), legendgroup=str(row[sign_col]), ) ) legend_counter[row[sign_col]] += 1 is_visible = [False] * (len(sensitivity_values) * len(df)) for j in range(i * len(df), (i + 1) * len(df)): is_visible[j] = True dropdown.append( {"args": ["visible", is_visible], "label": metric_name, "method": "restyle"} ) if not traces: raise NotImplementedError("No traces found for metric") updatemenus = [ { "x": 0, "y": 1, "yanchor": "top", "xanchor": "left", "buttons": dropdown, "pad": { "t": -40 }, # hack to put dropdown below title regardless of number of features } ] features = list(list(sensitivity_values.values())[0].keys()) title = "Normalized parameter sensitivity" if relative else "Parameter sensitivity" if importance_measure: title = title + " using " + importance_measure longest_label = max(len(f) for f in features) longest_metric = max(len(m) for m in sensitivity_values.keys()) layout = go.Layout( height=200 + len(features) * 20, width=10 * longest_label + max(10 * longest_metric, 400), hovermode="closest", margin=go.layout.Margin( l=8 * min(max(len(idx) for idx in features), 75) ), # noqa E741 title=title, updatemenus=updatemenus, annotations=compose_annotation(caption=caption), ) if relative: layout.update({"xaxis": {"tickformat": ".0%"}}) return go.Figure(data=traces, layout=layout)
[docs]def plot_feature_importance_by_feature( model: Optional[ModelBridge] = None, sensitivity_values: Optional[Dict[str, Dict[str, Union[float, np.ndarray]]]] = None, relative: bool = False, caption: str = "", importance_measure: str = "", label_dict: Optional[Dict[str, str]] = None, ) -> AxPlotConfig: """Wrapper method to convert `plot_feature_importance_by_feature_plotly` to AxPlotConfig""" return AxPlotConfig( data=plot_feature_importance_by_feature_plotly( model=model, sensitivity_values=sensitivity_values, relative=relative, caption=caption, importance_measure=importance_measure, label_dict=label_dict, ), plot_type=AxPlotTypes.GENERIC, )
[docs]def plot_relative_feature_importance_plotly(model: ModelBridge) -> go.Figure: """Create a stacked bar chart of feature importances per metric""" importances = [] for metric_name in sorted(model.metric_names): try: vals: Dict[str, Any] = model.feature_importances(metric_name) vals["index"] = metric_name importances.append(vals) except Exception: logger.warning( "Model for {} does not support feature importances.".format(metric_name) ) df = pd.DataFrame(importances) df.set_index("index", inplace=True) df = df.div(df.sum(axis=1), axis=0) data = [ go.Bar(y=df.index, x=df[column_name], name=column_name, orientation="h") for column_name in df.columns ] layout = go.Layout( margin=go.layout.Margin(l=250), # noqa E741 barmode="group", yaxis={"title": ""}, xaxis={"title": "Relative Parameter importance"}, showlegend=False, title="Relative Parameter Importance per Metric", ) return go.Figure(data=data, layout=layout)
[docs]def plot_relative_feature_importance(model: ModelBridge) -> AxPlotConfig: """Wrapper method to convert plot_relative_feature_importance_plotly to AxPlotConfig""" return AxPlotConfig( data=plot_relative_feature_importance_plotly(model), plot_type=AxPlotTypes.GENERIC, )