Source code for baybe.surrogates.base

"""Base functionality for all BayBE surrogates."""

from __future__ import annotations

import gc
from abc import ABC, abstractmethod
from collections.abc import Sequence
from enum import Enum, auto
from typing import TYPE_CHECKING, ClassVar, Literal, Protocol, TypeAlias

import pandas as pd
from attrs import define, field
from joblib.hashing import hash
from typing_extensions import override

from baybe.exceptions import IncompatibleSurrogateError, ModelNotTrainedError
from baybe.objectives.base import Objective
from baybe.parameters.base import Parameter
from baybe.searchspace import SearchSpace
from baybe.serialization.mixin import SerialMixin
from baybe.utils.conversion import to_string
from baybe.utils.dataframe import handle_missing_values, to_tensor
from baybe.utils.scaling import ColumnTransformer

if TYPE_CHECKING:
    from botorch.models.model import Model
    from botorch.models.transforms.input import InputTransform
    from botorch.models.transforms.outcome import OutcomeTransform
    from botorch.posteriors import GPyTorchPosterior, Posterior
    from torch import Tensor

    from baybe.surrogates.composite import CompositeSurrogate

PosteriorStatistic: TypeAlias = float | Literal["mean", "std", "var", "mode"]
"""Type alias for requestable statistics (a float yields the corresponding quantile)."""


class _NoTransform(Enum):
    """Sentinel class."""

    IDENTITY_TRANSFORM = auto()


_IDENTITY_TRANSFORM = _NoTransform.IDENTITY_TRANSFORM
"""Sentinel to indicate the absence of a transform where `None` is ambiguous."""



[docs]
class SurrogateProtocol(Protocol):
    """Type protocol specifying the interface surrogate models need to implement."""

    # Use slots so that derived classes also remain slotted
    # See also: https://www.attrs.org/en/stable/glossary.html#term-slotted-classes
    __slots__ = ()

    # TODO: Final layout still to be optimized. For example, shall we require a
    #   `posterior` method?


[docs]
    def fit(
        self,
        searchspace: SearchSpace,
        objective: Objective,
        measurements: pd.DataFrame,
    ) -> None:
        """Fit the surrogate to training data in a given modelling context.

        For details on the expected method arguments, see
        :meth:`baybe.recommenders.base.RecommenderProtocol`.
        """



[docs]
    def to_botorch(self) -> Model:
        """Create the botorch-ready representation of the fitted model.

        The :class:`botorch.models.model.Model` created by this method needs to be
        configured such that it can be called with candidate points in **computational
        representation**, that is, input of the form as obtained via
        :meth:`baybe.searchspace.core.SearchSpace.transform`.
        """





[docs]
@define
class Surrogate(ABC, SurrogateProtocol, SerialMixin):
    """Abstract base class for all surrogate models."""

    supports_transfer_learning: ClassVar[bool]
    """Class variable encoding whether or not the surrogate supports transfer
    learning."""

    supports_multi_output: ClassVar[bool] = False
    """Class variable encoding whether or not the surrogate is multi-output
    compatible."""

    _searchspace: SearchSpace | None = field(init=False, default=None, eq=False)
    """The search space on which the surrogate operates. Available after fitting."""

    _objective: Objective | None = field(init=False, default=None, eq=False)
    """The objective for which the surrogate was trained. Available after fitting."""

    _measurements_hash: str = field(init=False, default=None, eq=False)
    """The hash of the data the surrogate was trained on."""

    _input_scaler: ColumnTransformer | None = field(init=False, default=None, eq=False)
    """Scaler for transforming input values. Available after fitting.

    Scales a tensor containing parameter configurations in computational representation
    to make them digestible for the model-specific, scale-agnostic posterior logic."""

    # TODO: type should be
    #   `botorch.models.transforms.outcome.Standardize | _NoTransform` | None
    #   but is currently omitted due to:
    #   https://github.com/python-attrs/cattrs/issues/531
    _output_scaler = field(init=False, default=None, eq=False)
    """Scaler for transforming output values. Available after fitting.

    Scales a tensor containing target measurements in computational representation
    to make them digestible for the model-specific, scale-agnostic posterior logic."""


[docs]
    @override
    def to_botorch(self) -> Model:
        from baybe.surrogates._adapter import AdapterModel

        return AdapterModel(self)



[docs]
    def replicate(self) -> CompositeSurrogate:
        """Make the surrogate handle multiple targets via replication.

        If the surrogate only supports single targets, this method turns it into a
        multi-target surrogate by replicating the model architecture for each observed
        target. The resulting copies are trained independently, but share the same
        architecture.

        If the surrogate is itself already multi-target compatible, this operation
        effectively disables the model's inherent multi-target mechanism by treating
        it as a single-target surrogate and applying the same replication mechanism.
        """
        from baybe.surrogates.composite import CompositeSurrogate

        return CompositeSurrogate.from_replication(self)


    @staticmethod
    def _make_parameter_scaler_factory(
        parameter: Parameter,
    ) -> type[InputTransform] | None:
        """Return the scaler factory to be used for the given parameter.

        This method is supposed to be overridden by subclasses to implement their
        custom parameter scaling logic. Otherwise, parameters will be normalized.
        """
        from botorch.models.transforms.input import Normalize

        return Normalize

    @staticmethod
    def _make_target_scaler_factory() -> type[OutcomeTransform] | None:
        """Return the scaler factory to be used for target scaling.

        This method is supposed to be overridden by subclasses to implement their
        custom target scaling logic. Otherwise, targets will be standardized.
        """
        from botorch.models.transforms.outcome import Standardize

        return Standardize

    def _make_input_scaler(self, searchspace: SearchSpace) -> ColumnTransformer:
        """Make and fit the input scaler for transforming computational dataframes."""
        # Create a composite scaler from parameter-wise scaler objects
        mapping: dict[tuple[int, ...], InputTransform] = {}
        for p in searchspace.parameters:
            if (factory := self._make_parameter_scaler_factory(p)) is None:
                continue
            idxs = searchspace.get_comp_rep_parameter_indices(p.name)
            transformer = factory(len(idxs))
            mapping[idxs] = transformer
        scaler = ColumnTransformer(mapping)

        # Fit the scaler to the parameter bounds
        scaler.fit(to_tensor(searchspace.scaling_bounds))

        return scaler

    def _make_output_scaler(
        self, objective: Objective, measurements: pd.DataFrame
    ) -> OutcomeTransform | _NoTransform:
        """Make and fit the output scaler for transforming computational dataframes."""
        if (factory := self._make_target_scaler_factory()) is None:
            return _IDENTITY_TRANSFORM

        if objective.n_outputs != 1:
            # There is no execution path yet that could lead to this situation
            raise NotImplementedError(
                "Output scalers for multi-output models are not available."
            )
        scaler = factory(1)

        # TODO: Consider taking into account target boundaries when available
        scaler(to_tensor(objective._pre_transform(measurements, allow_extra=True)))
        scaler.eval()

        return scaler


[docs]
    def posterior(self, candidates: pd.DataFrame) -> Posterior:
        """Compute the posterior for candidates in experimental representation.

        Takes a dataframe of parameter configurations in **experimental representation**
        and returns the corresponding posterior object. Therefore, the method serves as
        the user-facing entry point for accessing model predictions.

        Args:
            candidates: A dataframe containing parameter configurations in
                **experimental representation**.

        Raises:
            ModelNotTrainedError: When called before the model has been trained.

        Returns:
            A :class:`botorch.posteriors.Posterior` object representing the posterior
            distribution at the given candidate points, where the posterior is also
            described in **experimental representation**. That is, the posterior values
            lie in the same domain as the modelled targets/objective on which the
            surrogate was trained via :meth:`baybe.surrogates.base.Surrogate.fit`.
        """
        if self._searchspace is None:
            raise ModelNotTrainedError(
                "The surrogate must be trained before a posterior can be computed."
            )
        return self._posterior_comp(
            to_tensor(self._searchspace.transform(candidates, allow_extra=True))
        )


    def _posterior_comp(self, candidates_comp: Tensor, /) -> Posterior:
        """Compute the posterior for candidates in computational representation.

        Takes a tensor of parameter configurations in **computational representation**
        and returns the corresponding posterior object. Therefore, the method provides
        the entry point for queries coming from computational layers, for instance,
        BoTorch's `optimize_*` functions.

        Args:
            candidates_comp: A tensor containing parameter configurations in
                **computational representation**.

        Returns:
            The same :class:`botorch.posteriors.Posterior` object as returned via
            :meth:`baybe.surrogates.base.Surrogate.posterior`.
        """
        # FIXME[typing]: It seems there is currently no better way to inform the type
        #   checker that the attribute is available at the time of the function call
        assert self._input_scaler is not None

        p = self._posterior(self._input_scaler.transform(candidates_comp))
        if self._output_scaler is not _IDENTITY_TRANSFORM:
            p = self._output_scaler.untransform_posterior(p)
        return p

    @abstractmethod
    def _posterior(self, candidates_comp_scaled: Tensor, /) -> Posterior:
        """Perform the actual model-specific posterior evaluation logic.

        This method is supposed to be overridden by subclasses to implement their
        model-specific surrogate architecture. Internally, the method is called by the
        base class with a **scaled** tensor of candidates in **computational
        representation**, where the scaling is configurable by the subclass by
        overriding the default scaler factory methods of the base. The base class also
        takes care of transforming the returned posterior back to the original scale
        according to the defined scalers.

        This means:
        -----------
        Subclasses implementing this method do not have to bother about
        pre-/postprocessing of the in-/output. Instead, they only need to implement the
        mathematical operation of computing the posterior for the given input according
        to their model specifications and can implicitly assume that scaling is handled
        appropriately outside. In short: the returned posterior simply needs to be on
        the same scale as the given input.

        Args:
            candidates_comp_scaled: A tensor containing **scaled** parameter
                configurations in **computational representation**, as defined through
                the input scaler obtained via
                :meth:`baybe.surrogates.base.Surrogate._make_input_scaler`.

        Returns:
            A :class:`botorch.posteriors.Posterior` object representing the
            **scale-transformed** posterior distributions at the given candidate points,
            where the posterior is described on the scale dictated by the output scaler
            obtained via :meth:`baybe.surrogates.base.Surrogate._make_output_scaler`.
        """


[docs]
    def posterior_stats(
        self,
        candidates: pd.DataFrame,
        stats: Sequence[PosteriorStatistic] = ("mean", "std"),
    ) -> pd.DataFrame:
        """Return posterior statistics for each target.

        Args:
            candidates: The candidate points in experimental representation.
                For details, see :meth:`baybe.surrogates.base.Surrogate.posterior`.
            stats: Sequence indicating which statistics to compute. Also accepts
                floats, for which the corresponding quantile point will be computed.

        Raises:
            ModelNotTrainedError: When called before the model has been trained.
            ValueError: If a requested quantile is outside the open interval (0,1).
            TypeError: If the posterior utilized by the surrogate does not support
                a requested statistic.

        Returns:
            A dataframe with posterior statistics for each target and candidate.
        """
        if self._objective is None:
            raise ModelNotTrainedError(
                "The surrogate must be trained before a posterior can be computed."
            )

        stat: PosteriorStatistic
        for stat in (x for x in stats if isinstance(x, float)):
            if not 0.0 < stat < 1.0:
                raise ValueError(
                    f"Posterior quantile statistics can only be computed for quantiles "
                    f"between 0 and 1 (non-inclusive). Provided value: '{stat}' as "
                    f"part of '{stats=}'."
                )
        posterior = self.posterior(candidates)

        import torch

        result = pd.DataFrame(index=candidates.index)
        with torch.no_grad():
            for stat in stats:
                try:
                    if isinstance(stat, float):  # Calculate quantile statistic
                        stat_name = f"Q_{stat}"
                        vals = posterior.quantile(torch.tensor(stat))
                    else:  # Calculate non-quantile statistic
                        stat_name = stat
                        vals = getattr(
                            posterior,
                            stat if stat not in ["std", "var"] else "variance",
                        )
                except (AttributeError, NotImplementedError) as e:
                    # We could arrive here because an invalid statistics string has
                    # been requested or because a quantile point has been requested,
                    # but the posterior type does not implement quantiles.
                    raise TypeError(
                        f"The utilized posterior of type "
                        f"'{posterior.__class__.__name__}' does not support the "
                        f"statistic associated with the requested input '{stat}'."
                    ) from e

                if stat == "std":
                    vals = torch.sqrt(vals)

                # Enforce a consistent shape
                # https://github.com/pytorch/botorch/issues/2958
                vals = vals.reshape((len(candidates), 1))

                result[
                    [
                        f"{name}_{stat_name}"
                        for name in self._objective._modeled_quantity_names
                    ]
                ] = vals.cpu().numpy()

        return result



[docs]
    @override
    def fit(
        self,
        searchspace: SearchSpace,
        objective: Objective,
        measurements: pd.DataFrame,
    ) -> None:
        """Train the surrogate model on the provided data.

        Args:
            searchspace: The search space in which experiments are conducted.
            objective: The objective to be optimized.
            measurements: The training data in experimental representation.

        Raises:
            ValueError: If the search space contains task parameters but the selected
                surrogate model type does not support transfer learning.
            NotImplementedError: When using a continuous search space and a non-GP
                model.
        """
        # TODO: consider adding a validation step for `measurements`

        # Validate multi-target compatibility
        if objective.is_multi_output and not self.supports_multi_output:
            raise IncompatibleSurrogateError(
                f"You attempted to train a single-output surrogate in a "
                f"{len(objective.targets)}-target multi-output context. Either use "
                f"a proper multi-output surrogate or consider explicitly "
                f"replicating the current surrogate model using its "
                f"'.{self.replicate.__name__}' method."
            )

        # When the context is unchanged, no retraining is necessary
        if (
            searchspace == self._searchspace
            and objective == self._objective
            and hash(measurements) == self._measurements_hash
        ):
            return

        # Check if transfer learning capabilities are needed
        if (searchspace.n_tasks > 1) and (not self.supports_transfer_learning):
            raise ValueError(
                f"The search space contains task parameters but the selected "
                f"surrogate model type ({self.__class__.__name__}) does not "
                f"support transfer learning."
            )
        if (not searchspace.continuous.is_empty) and (
            "GaussianProcess" not in self.__class__.__name__
        ):
            raise NotImplementedError(
                "Continuous search spaces are currently only supported by GPs."
            )

        # Block partial measurements
        handle_missing_values(measurements, [t.name for t in objective.targets])

        # Remember the training context
        self._searchspace = searchspace
        self._objective = objective
        self._measurements_hash = hash(measurements)

        # Create context-specific transformations
        self._input_scaler = self._make_input_scaler(searchspace)
        self._output_scaler = self._make_output_scaler(objective, measurements)

        # Transform and fit
        # Note: The targets are only pre-transformed here. The remaining transformations
        #  are applied in form of BoTorch objectives. This has the consequence that:
        #  * The trained surrogate model can be called with pre-transformed target
        #    values, enabling predictions with input from the pre-transformed domain
        #   (this allows us to control precisely on which level the model is placed)
        #  * The main transformation is part of the computational backpropagation graph
        pre_transformed = objective._pre_transform(measurements, allow_extra=True)
        train_x_comp_rep, train_y_tensor = to_tensor(
            searchspace.transform(measurements, allow_extra=True), pre_transformed
        )
        train_x = self._input_scaler.transform(train_x_comp_rep)
        train_y = (
            train_y_tensor
            if self._output_scaler is _IDENTITY_TRANSFORM
            else self._output_scaler(train_y_tensor)[0]
        )

        self._fit(train_x, train_y)


    @abstractmethod
    def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
        """Perform the actual fitting logic."""

    @override
    def __str__(self) -> str:
        fields = [
            to_string(
                "Supports Transfer Learning",
                self.supports_transfer_learning,
                single_line=True,
            ),
        ]
        return to_string(self.__class__.__name__, *fields)




[docs]
@define
class IndependentGaussianSurrogate(Surrogate, ABC):
    """A surrogate base class providing independent Gaussian posteriors."""

    @override
    def _posterior(self, candidates_comp_scaled: Tensor, /) -> GPyTorchPosterior:
        import torch
        from botorch.posteriors import GPyTorchPosterior
        from gpytorch.distributions import MultivariateNormal

        # Construct the Gaussian posterior from the estimated first and second moment
        mean, var = self._estimate_moments(candidates_comp_scaled)
        mvn = MultivariateNormal(mean, torch.diag_embed(var))
        return GPyTorchPosterior(mvn)

    @abstractmethod
    def _estimate_moments(
        self, candidates_comp_scaled: Tensor, /
    ) -> tuple[Tensor, Tensor]:
        """Estimate first and second moments of the Gaussian posterior."""



# Collect leftover original slotted classes processed by `attrs.define`
gc.collect()