Source code for ax.analysis.plotly.scatter

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

from typing import Optional

import pandas as pd
from ax.analysis.analysis import AnalysisCardLevel

from ax.analysis.plotly.plotly_analysis import PlotlyAnalysis, PlotlyAnalysisCard
from ax.core.experiment import Experiment
from ax.core.generation_strategy_interface import GenerationStrategyInterface
from ax.exceptions.core import DataRequiredError, UserInputError
from plotly import express as px, graph_objects as go



[docs]
class ScatterPlot(PlotlyAnalysis):
    """
    Plotly Scatter plot for any two metrics. Each arm is represented by a single point,
    whose color indicates the arm's trial index. Optionally, the Pareto frontier can be
    shown. This plot is useful for understanding the relationship and/or tradeoff
    between two metrics.

    The DataFrame computed will contain one row per arm and the following columns:
        - trial_index: The trial index of the arm
        - arm_name: The name of the arm
        - X_METRIC_NAME: The observed mean of the metric specified
        - Y_METRIC_NAME: The observed mean of the metric specified
        - is_optimal: Whether the arm is on the Pareto frontier
    """

    def __init__(
        self, x_metric_name: str, y_metric_name: str, show_pareto_frontier: bool = False
    ) -> None:
        """
        Args:
            x_metric_name: The name of the metric to plot on the x-axis.
            y_metric_name: The name of the metric to plot on the y-axis.
            show_pareto_frontier: Whether to show the Pareto frontier for the two
                metrics. Optimization direction is inferred from the Experiment.
        """

        self.x_metric_name = x_metric_name
        self.y_metric_name = y_metric_name

        self.show_pareto_frontier = show_pareto_frontier


[docs]
    def compute(
        self,
        experiment: Optional[Experiment] = None,
        generation_strategy: Optional[GenerationStrategyInterface] = None,
    ) -> PlotlyAnalysisCard:
        if experiment is None:
            raise UserInputError("ScatterPlot requires an Experiment")

        df = _prepare_data(
            experiment=experiment,
            x_metric_name=self.x_metric_name,
            y_metric_name=self.y_metric_name,
        )
        fig = _prepare_plot(
            df=df,
            x_metric_name=self.x_metric_name,
            y_metric_name=self.y_metric_name,
            show_pareto_frontier=self.show_pareto_frontier,
            x_lower_is_better=experiment.metrics[self.x_metric_name].lower_is_better
            or False,
        )

        return self._create_plotly_analysis_card(
            title=f"Observed {self.x_metric_name} vs. {self.y_metric_name}",
            subtitle="Compare arms by their observed metric values",
            level=AnalysisCardLevel.HIGH,
            df=df,
            fig=fig,
        )




def _prepare_data(
    experiment: Experiment, x_metric_name: str, y_metric_name: str
) -> pd.DataFrame:
    """
    Extract the relevant data from the experiment and prepare it into a dataframe
    formatted in the way expected by _prepare_plot.

    Args:
        experiment: The experiment to extract data from.
        x_metric_name: The name of the metric to plot on the x-axis.
        y_metric_name: The name of the metric to plot on the y-axis.
    """

    # Lookup the data that has already been fetched and attached to the experiment
    data = experiment.lookup_data().df

    # Filter for only rows with the relevant metric names
    metric_name_mask = data["metric_name"].isin([x_metric_name, y_metric_name])
    filtered = data[metric_name_mask][
        ["trial_index", "arm_name", "metric_name", "mean"]
    ]

    # Pivot the data so that each row is an arm and the columns are the metric names
    pivoted: pd.DataFrame = filtered.pivot_table(
        index=["trial_index", "arm_name"], columns="metric_name", values="mean"
    ).dropna()
    pivoted.reset_index(inplace=True)
    pivoted.columns.name = None

    if pivoted.empty:
        raise DataRequiredError(
            f"No observations have data for both {x_metric_name} and {y_metric_name}. "
            "Please ensure that the data has been fetched and attached to the "
            "experiment."
        )

    # Add a column indicating whether the arm is on the Pareto frontier. This is
    # calculated by comparing each arm to all other arms in the experiment and
    # creating a mask.
    # If directional guidance is not specified, we assume that we intendt to maximize
    # the metric.
    x_lower_is_better: bool = experiment.metrics[x_metric_name].lower_is_better or False
    y_lower_is_better: bool = experiment.metrics[y_metric_name].lower_is_better or False

    def is_optimal(row: pd.Series) -> bool:
        x_mask = (
            (pivoted[x_metric_name] < row[x_metric_name])
            if x_lower_is_better
            else (pivoted[x_metric_name] > row[x_metric_name])
        )
        y_mask = (
            (pivoted[y_metric_name] < row[y_metric_name])
            if y_lower_is_better
            else (pivoted[y_metric_name] > row[y_metric_name])
        )
        return not (x_mask & y_mask).any()

    pivoted["is_optimal"] = pivoted.apply(
        is_optimal,
        axis=1,
    )

    return pivoted


def _prepare_plot(
    df: pd.DataFrame,
    x_metric_name: str,
    y_metric_name: str,
    show_pareto_frontier: bool,
    x_lower_is_better: bool,
) -> go.Figure:
    """
    Prepare a scatter plot for the given DataFrame.

    Args:
        df: The DataFrame to plot. Must contain the following columns:
            - trial_index: The trial index of the arm
            - arm_name: The name of the arm
            - X_METRIC_NAME: The observed mean of some metric to plot on the x-axis
            - Y_METRIC_NAME: The observed mean of the metric to plot on the y-axis
            - is_optimal: Whether the arm is on the Pareto frontier (this can be
                omitted if show_pareto_frontier=False)
        x_metric_name: The name of the metric to plot on the x-axis
        y_metric_name: The name of the metric to plot on the y-axis
        show_pareto_frontier: Whether to draw the Pareto frontier for the two metrics
        x_lower_is_better: Whether the metric on the x-axis is being minimized (only
            relevant if show_pareto_frontier=True)
    """
    fig = px.scatter(
        df,
        x=x_metric_name,
        y=y_metric_name,
        color="trial_index",
        hover_data=["trial_index", "arm_name", x_metric_name, y_metric_name],
    )

    if show_pareto_frontier:
        # Must sort to ensure we draw the line through optimal points in the correct
        # order.
        frontier_df = df[df["is_optimal"]].sort_values(by=x_metric_name)

        fig.add_trace(
            go.Scatter(
                x=frontier_df[x_metric_name],
                y=frontier_df[y_metric_name],
                mode="lines",
                line_shape="hv" if x_lower_is_better else "vh",
                showlegend=False,
            )
        )

    return fig
Ax

latest

Source code for ax.analysis.plotly.scatter

Ax

Navigation

Related Topics