Source code for ax.metrics.chemistry

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

Classes for optimizing yields from chemical reactions.


.. [Perera2018]
    D. Perera, J. W. Tucker, S. Brahmbhatt, C. Helal, A. Chong, W. Farrell,
    P. Richardson, N. W. Sach. A platform for automated nanomole-scale
    reaction screening and micromole-scale synthesis in flow. Science, 26.

.. [Shields2021]
   B. J. Shields, J. Stevens, J. Li, et al. Bayesian reaction optimization
   as a tool for chemical synthesis. Nature 590, 89–96 (2021).

"SUZUKI" involves optimization solvent, ligand, and base combinations
in a Suzuki-Miyaura coupling to optimize carbon-carbon bond formation.
See _[Perera2018] for details.

"DIRECT_ARYLATION" involves optimizing the solvent, base, and ligand chemicals
as well as the temperature and concentration for a direct arylation reaction.
See _[Shields2021] for details.

from __future__ import annotations

from dataclasses import dataclass
from enum import Enum
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Tuple
from zipfile import ZipFile

import pandas as pd
from ax.core.base_trial import BaseTrial
from import Data
from ax.core.metric import Metric, MetricFetchE, MetricFetchResult
from ax.core.types import TParameterization, TParamValue
from ax.utils.common.result import Err, Ok
from ax.utils.common.typeutils import not_none

[docs]class ChemistryProblemType(Enum): SUZUKI: str = "suzuki" DIRECT_ARYLATION: str = "direct_arylation"
[docs]@dataclass(frozen=True) class ChemistryData: param_names: List[str] objective_dict: Dict[Tuple[TParamValue, ...], float]
[docs] def evaluate(self, params: TParameterization) -> float: k = tuple(params[pname] for pname in self.param_names) return self.objective_dict[k]
@lru_cache(maxsize=8) def _get_data(problem_type: ChemistryProblemType) -> ChemistryData: file_path = Path(__file__).parent.joinpath("").absolute() with ZipFile(file_path) as zf: with"{problem_type.value}.csv") as f: df = pd.read_csv(f, index_col=0) param_names = sorted(col for col in df.columns if col != "yield") return ChemistryData( param_names=param_names, objective_dict=df.set_index(param_names)["yield"].to_dict(), )
[docs]class ChemistryMetric(Metric): """Metric for modeling chemical reactions. Metric describing the outcomes of chemical reactions. Based on tabulate data. Problems typically contain many discrete and categorical parameters. Args: name: The name of the metric. noiseless: If True, consider observations noiseless, otherwise sume unknown Gaussian observation noise. problem_type: The problem type. Attributes: noiseless: If True, consider observations noiseless, otherwise assume unknown Gaussian observation noise. lower_is_better: If True, the metric should be minimized. """ def __init__( self, name: str, noiseless: bool = False, problem_type: ChemistryProblemType = ChemistryProblemType.SUZUKI, lower_is_better: bool = False, ) -> None: self.noiseless = noiseless self.problem_type = problem_type super().__init__(name=name, lower_is_better=lower_is_better)
[docs] def clone(self) -> ChemistryMetric: return self.__class__( name=self._name, noiseless=self.noiseless, problem_type=self.problem_type, lower_is_better=not_none(self.lower_is_better), )
[docs] def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult: try: noise_sd = 0.0 if self.noiseless else float("nan") data = _get_data(self.problem_type) arm_names = [] mean = [] for name, arm in trial.arms_by_name.items(): arm_names.append(name) val = data.evaluate(params=arm.parameters) mean.append(val) df = pd.DataFrame( { "arm_name": arm_names, "metric_name":, "mean": mean, "sem": noise_sd, "trial_index": trial.index, } ) return Ok(value=Data(df=df)) except Exception as e: return Err( MetricFetchE(message=f"Failed to fetch {}", exception=e) )