Source code for baybe.surrogates.base

"""Base functionality for all BayBE surrogates."""

from __future__ import annotations

import gc
from abc import ABC, abstractmethod
from collections.abc import Sequence
from enum import Enum, auto
from typing import TYPE_CHECKING, ClassVar, Literal, Protocol, TypeAlias

import pandas as pd
from attrs import define, field
from joblib.hashing import hash
from typing_extensions import override

from baybe.exceptions import IncompatibleSurrogateError, ModelNotTrainedError
from baybe.objectives.base import Objective
from baybe.parameters.base import Parameter
from baybe.searchspace import SearchSpace
from baybe.serialization.mixin import SerialMixin
from baybe.utils.conversion import to_string
from baybe.utils.dataframe import handle_missing_values, to_tensor
from baybe.utils.scaling import ColumnTransformer

if TYPE_CHECKING:
    from botorch.models.model import Model
    from botorch.models.transforms.input import InputTransform
    from botorch.models.transforms.outcome import OutcomeTransform
    from botorch.posteriors import GPyTorchPosterior, Posterior
    from torch import Tensor

    from baybe.surrogates.composite import CompositeSurrogate

PosteriorStatistic: TypeAlias = float | Literal["mean", "std", "var", "mode"]
"""Type alias for requestable statistics (a float yields the corresponding quantile)."""


class _NoTransform(Enum):
    """Sentinel class."""

    IDENTITY_TRANSFORM = auto()


_IDENTITY_TRANSFORM = _NoTransform.IDENTITY_TRANSFORM
"""Sentinel to indicate the absence of a transform where `None` is ambiguous."""


[docs] class SurrogateProtocol(Protocol): """Type protocol specifying the interface surrogate models need to implement.""" # Use slots so that derived classes also remain slotted # See also: https://www.attrs.org/en/stable/glossary.html#term-slotted-classes __slots__ = () # TODO: Final layout still to be optimized. For example, shall we require a # `posterior` method?
[docs] def fit( self, searchspace: SearchSpace, objective: Objective, measurements: pd.DataFrame, ) -> None: """Fit the surrogate to training data in a given modelling context. For details on the expected method arguments, see :meth:`baybe.recommenders.base.RecommenderProtocol`. """
[docs] def to_botorch(self) -> Model: """Create the botorch-ready representation of the fitted model. The :class:`botorch.models.model.Model` created by this method needs to be configured such that it can be called with candidate points in **computational representation**, that is, input of the form as obtained via :meth:`baybe.searchspace.core.SearchSpace.transform`. """
[docs] @define class Surrogate(ABC, SurrogateProtocol, SerialMixin): """Abstract base class for all surrogate models.""" supports_transfer_learning: ClassVar[bool] """Class variable encoding whether or not the surrogate supports transfer learning.""" supports_multi_output: ClassVar[bool] = False """Class variable encoding whether or not the surrogate is multi-output compatible.""" _searchspace: SearchSpace | None = field(init=False, default=None, eq=False) """The search space on which the surrogate operates. Available after fitting.""" _objective: Objective | None = field(init=False, default=None, eq=False) """The objective for which the surrogate was trained. Available after fitting.""" _measurements_hash: str = field(init=False, default=None, eq=False) """The hash of the data the surrogate was trained on.""" _input_scaler: ColumnTransformer | None = field(init=False, default=None, eq=False) """Scaler for transforming input values. Available after fitting. Scales a tensor containing parameter configurations in computational representation to make them digestible for the model-specific, scale-agnostic posterior logic.""" # TODO: type should be # `botorch.models.transforms.outcome.Standardize | _NoTransform` | None # but is currently omitted due to: # https://github.com/python-attrs/cattrs/issues/531 _output_scaler = field(init=False, default=None, eq=False) """Scaler for transforming output values. Available after fitting. Scales a tensor containing target measurements in computational representation to make them digestible for the model-specific, scale-agnostic posterior logic."""
[docs] @override def to_botorch(self) -> Model: from baybe.surrogates._adapter import AdapterModel return AdapterModel(self)
[docs] def replicate(self) -> CompositeSurrogate: """Make the surrogate handle multiple targets via replication. If the surrogate only supports single targets, this method turns it into a multi-target surrogate by replicating the model architecture for each observed target. The resulting copies are trained independently, but share the same architecture. If the surrogate is itself already multi-target compatible, this operation effectively disables the model's inherent multi-target mechanism by treating it as a single-target surrogate and applying the same replication mechanism. """ from baybe.surrogates.composite import CompositeSurrogate return CompositeSurrogate.from_replication(self)
@staticmethod def _make_parameter_scaler_factory( parameter: Parameter, ) -> type[InputTransform] | None: """Return the scaler factory to be used for the given parameter. This method is supposed to be overridden by subclasses to implement their custom parameter scaling logic. Otherwise, parameters will be normalized. """ from botorch.models.transforms.input import Normalize return Normalize @staticmethod def _make_target_scaler_factory() -> type[OutcomeTransform] | None: """Return the scaler factory to be used for target scaling. This method is supposed to be overridden by subclasses to implement their custom target scaling logic. Otherwise, targets will be standardized. """ from botorch.models.transforms.outcome import Standardize return Standardize def _make_input_scaler(self, searchspace: SearchSpace) -> ColumnTransformer: """Make and fit the input scaler for transforming computational dataframes.""" # Create a composite scaler from parameter-wise scaler objects mapping: dict[tuple[int, ...], InputTransform] = {} for p in searchspace.parameters: if (factory := self._make_parameter_scaler_factory(p)) is None: continue idxs = searchspace.get_comp_rep_parameter_indices(p.name) transformer = factory(len(idxs)) mapping[idxs] = transformer scaler = ColumnTransformer(mapping) # Fit the scaler to the parameter bounds scaler.fit(to_tensor(searchspace.scaling_bounds)) return scaler def _make_output_scaler( self, objective: Objective, measurements: pd.DataFrame ) -> OutcomeTransform | _NoTransform: """Make and fit the output scaler for transforming computational dataframes.""" if (factory := self._make_target_scaler_factory()) is None: return _IDENTITY_TRANSFORM if objective.n_outputs != 1: # There is no execution path yet that could lead to this situation raise NotImplementedError( "Output scalers for multi-output models are not available." ) scaler = factory(1) # TODO: Consider taking into account target boundaries when available scaler(to_tensor(objective._pre_transform(measurements, allow_extra=True))) scaler.eval() return scaler
[docs] def posterior(self, candidates: pd.DataFrame) -> Posterior: """Compute the posterior for candidates in experimental representation. Takes a dataframe of parameter configurations in **experimental representation** and returns the corresponding posterior object. Therefore, the method serves as the user-facing entry point for accessing model predictions. Args: candidates: A dataframe containing parameter configurations in **experimental representation**. Raises: ModelNotTrainedError: When called before the model has been trained. Returns: A :class:`botorch.posteriors.Posterior` object representing the posterior distribution at the given candidate points, where the posterior is also described in **experimental representation**. That is, the posterior values lie in the same domain as the modelled targets/objective on which the surrogate was trained via :meth:`baybe.surrogates.base.Surrogate.fit`. """ if self._searchspace is None: raise ModelNotTrainedError( "The surrogate must be trained before a posterior can be computed." ) return self._posterior_comp( to_tensor(self._searchspace.transform(candidates, allow_extra=True)) )
def _posterior_comp(self, candidates_comp: Tensor, /) -> Posterior: """Compute the posterior for candidates in computational representation. Takes a tensor of parameter configurations in **computational representation** and returns the corresponding posterior object. Therefore, the method provides the entry point for queries coming from computational layers, for instance, BoTorch's `optimize_*` functions. Args: candidates_comp: A tensor containing parameter configurations in **computational representation**. Returns: The same :class:`botorch.posteriors.Posterior` object as returned via :meth:`baybe.surrogates.base.Surrogate.posterior`. """ # FIXME[typing]: It seems there is currently no better way to inform the type # checker that the attribute is available at the time of the function call assert self._input_scaler is not None p = self._posterior(self._input_scaler.transform(candidates_comp)) if self._output_scaler is not _IDENTITY_TRANSFORM: p = self._output_scaler.untransform_posterior(p) return p @abstractmethod def _posterior(self, candidates_comp_scaled: Tensor, /) -> Posterior: """Perform the actual model-specific posterior evaluation logic. This method is supposed to be overridden by subclasses to implement their model-specific surrogate architecture. Internally, the method is called by the base class with a **scaled** tensor of candidates in **computational representation**, where the scaling is configurable by the subclass by overriding the default scaler factory methods of the base. The base class also takes care of transforming the returned posterior back to the original scale according to the defined scalers. This means: ----------- Subclasses implementing this method do not have to bother about pre-/postprocessing of the in-/output. Instead, they only need to implement the mathematical operation of computing the posterior for the given input according to their model specifications and can implicitly assume that scaling is handled appropriately outside. In short: the returned posterior simply needs to be on the same scale as the given input. Args: candidates_comp_scaled: A tensor containing **scaled** parameter configurations in **computational representation**, as defined through the input scaler obtained via :meth:`baybe.surrogates.base.Surrogate._make_input_scaler`. Returns: A :class:`botorch.posteriors.Posterior` object representing the **scale-transformed** posterior distributions at the given candidate points, where the posterior is described on the scale dictated by the output scaler obtained via :meth:`baybe.surrogates.base.Surrogate._make_output_scaler`. """
[docs] def posterior_stats( self, candidates: pd.DataFrame, stats: Sequence[PosteriorStatistic] = ("mean", "std"), ) -> pd.DataFrame: """Return posterior statistics for each target. Args: candidates: The candidate points in experimental representation. For details, see :meth:`baybe.surrogates.base.Surrogate.posterior`. stats: Sequence indicating which statistics to compute. Also accepts floats, for which the corresponding quantile point will be computed. Raises: ModelNotTrainedError: When called before the model has been trained. ValueError: If a requested quantile is outside the open interval (0,1). TypeError: If the posterior utilized by the surrogate does not support a requested statistic. Returns: A dataframe with posterior statistics for each target and candidate. """ if self._objective is None: raise ModelNotTrainedError( "The surrogate must be trained before a posterior can be computed." ) stat: PosteriorStatistic for stat in (x for x in stats if isinstance(x, float)): if not 0.0 < stat < 1.0: raise ValueError( f"Posterior quantile statistics can only be computed for quantiles " f"between 0 and 1 (non-inclusive). Provided value: '{stat}' as " f"part of '{stats=}'." ) posterior = self.posterior(candidates) import torch result = pd.DataFrame(index=candidates.index) with torch.no_grad(): for stat in stats: try: if isinstance(stat, float): # Calculate quantile statistic stat_name = f"Q_{stat}" vals = posterior.quantile(torch.tensor(stat)) else: # Calculate non-quantile statistic stat_name = stat vals = getattr( posterior, stat if stat not in ["std", "var"] else "variance", ) except (AttributeError, NotImplementedError) as e: # We could arrive here because an invalid statistics string has # been requested or because a quantile point has been requested, # but the posterior type does not implement quantiles. raise TypeError( f"The utilized posterior of type " f"'{posterior.__class__.__name__}' does not support the " f"statistic associated with the requested input '{stat}'." ) from e if stat == "std": vals = torch.sqrt(vals) # Enforce a consistent shape # https://github.com/pytorch/botorch/issues/2958 vals = vals.reshape((len(candidates), 1)) result[ [ f"{name}_{stat_name}" for name in self._objective._modeled_quantity_names ] ] = vals.cpu().numpy() return result
[docs] @override def fit( self, searchspace: SearchSpace, objective: Objective, measurements: pd.DataFrame, ) -> None: """Train the surrogate model on the provided data. Args: searchspace: The search space in which experiments are conducted. objective: The objective to be optimized. measurements: The training data in experimental representation. Raises: ValueError: If the search space contains task parameters but the selected surrogate model type does not support transfer learning. NotImplementedError: When using a continuous search space and a non-GP model. """ # TODO: consider adding a validation step for `measurements` # Validate multi-target compatibility if objective.is_multi_output and not self.supports_multi_output: raise IncompatibleSurrogateError( f"You attempted to train a single-output surrogate in a " f"{len(objective.targets)}-target multi-output context. Either use " f"a proper multi-output surrogate or consider explicitly " f"replicating the current surrogate model using its " f"'.{self.replicate.__name__}' method." ) # When the context is unchanged, no retraining is necessary if ( searchspace == self._searchspace and objective == self._objective and hash(measurements) == self._measurements_hash ): return # Check if transfer learning capabilities are needed if (searchspace.n_tasks > 1) and (not self.supports_transfer_learning): raise ValueError( f"The search space contains task parameters but the selected " f"surrogate model type ({self.__class__.__name__}) does not " f"support transfer learning." ) if (not searchspace.continuous.is_empty) and ( "GaussianProcess" not in self.__class__.__name__ ): raise NotImplementedError( "Continuous search spaces are currently only supported by GPs." ) # Block partial measurements handle_missing_values(measurements, [t.name for t in objective.targets]) # Remember the training context self._searchspace = searchspace self._objective = objective self._measurements_hash = hash(measurements) # Create context-specific transformations self._input_scaler = self._make_input_scaler(searchspace) self._output_scaler = self._make_output_scaler(objective, measurements) # Transform and fit # Note: The targets are only pre-transformed here. The remaining transformations # are applied in form of BoTorch objectives. This has the consequence that: # * The trained surrogate model can be called with pre-transformed target # values, enabling predictions with input from the pre-transformed domain # (this allows us to control precisely on which level the model is placed) # * The main transformation is part of the computational backpropagation graph pre_transformed = objective._pre_transform(measurements, allow_extra=True) train_x_comp_rep, train_y_tensor = to_tensor( searchspace.transform(measurements, allow_extra=True), pre_transformed ) train_x = self._input_scaler.transform(train_x_comp_rep) train_y = ( train_y_tensor if self._output_scaler is _IDENTITY_TRANSFORM else self._output_scaler(train_y_tensor)[0] ) self._fit(train_x, train_y)
@abstractmethod def _fit(self, train_x: Tensor, train_y: Tensor) -> None: """Perform the actual fitting logic.""" @override def __str__(self) -> str: fields = [ to_string( "Supports Transfer Learning", self.supports_transfer_learning, single_line=True, ), ] return to_string(self.__class__.__name__, *fields)
[docs] @define class IndependentGaussianSurrogate(Surrogate, ABC): """A surrogate base class providing independent Gaussian posteriors.""" @override def _posterior(self, candidates_comp_scaled: Tensor, /) -> GPyTorchPosterior: import torch from botorch.posteriors import GPyTorchPosterior from gpytorch.distributions import MultivariateNormal # Construct the Gaussian posterior from the estimated first and second moment mean, var = self._estimate_moments(candidates_comp_scaled) mvn = MultivariateNormal(mean, torch.diag_embed(var)) return GPyTorchPosterior(mvn) @abstractmethod def _estimate_moments( self, candidates_comp_scaled: Tensor, / ) -> tuple[Tensor, Tensor]: """Estimate first and second moments of the Gaussian posterior."""
# Collect leftover original slotted classes processed by `attrs.define` gc.collect()