Source code for baybe.objectives.base

"""Base classes for all objectives."""

from __future__ import annotations

import gc
import warnings
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, ClassVar

import pandas as pd
from attrs import define, field

from baybe.serialization.mixin import SerialMixin
from baybe.targets.base import Target
from baybe.targets.numerical import NumericalTarget
from baybe.utils.basic import is_all_instance
from baybe.utils.dataframe import get_transform_objects, to_tensor
from baybe.utils.dataframe import (
    handle_missing_values as df_handle_missing_values,
)
from baybe.utils.metadata import Metadata, to_metadata
from baybe.utils.validation import validate_target_input

if TYPE_CHECKING:
    from botorch.acquisition.objective import MCAcquisitionObjective, PosteriorTransform


# TODO: Reactive slots in all classes once cached_property is supported:
#   https://github.com/python-attrs/attrs/issues/164


[docs] @define(frozen=True, slots=False) class Objective(ABC, SerialMixin): """Abstract base class for all objectives.""" is_multi_output: ClassVar[bool] """Class variable indicating if the objective produces multiple outputs.""" metadata: Metadata = field( factory=Metadata, converter=lambda x: to_metadata(x, Metadata), kw_only=True, ) """Optional metadata containing description and other information.""" @property def description(self) -> str | None: """The description of the objective.""" return self.metadata.description @property @abstractmethod def targets(self) -> tuple[Target, ...]: """The targets included in the objective.""" @property def _modeled_quantities(self) -> tuple[Target, ...]: """The quantities modeled by this objective.""" return self.targets @property def _modeled_quantity_names(self) -> tuple[str, ...]: """The names of the quantities returned by the pre-transformation.""" return tuple(t.name for t in self._modeled_quantities) @property def _model_quantities_to_target_names(self) -> dict[str, list[str]]: """The mapping from modeled quantity names to names of the required targets.""" return {mq.name: [mq.name] for mq in self._modeled_quantities} @property def _n_models(self) -> int: """The number of models used in the objective. Corresponds to the number of dimensions after the pre-transformation. """ return len(self._modeled_quantities) @property def _is_multi_model(self) -> bool: """Check if the objective relies on multiple surrogate models.""" return self._n_models > 1 @property @abstractmethod def output_names(self) -> tuple[str, ...]: """The names of the outputs of the objective.""" @property def n_outputs(self) -> int: """The number of outputs of the objective.""" return len(self.output_names) @property @abstractmethod def supports_partial_measurements(self) -> bool: """Boolean indicating if the objective accepts partial target measurements.""" @property def _oriented_targets(self) -> tuple[Target, ...]: """The targets with optional negation transformation for minimization.""" return tuple( t.negate() if isinstance(t, NumericalTarget) and t.minimize else t for t in self.targets ) @property def _full_transformation(self) -> MCAcquisitionObjective: """The end-to-end transformation applied, from targets to objective values.""" return self.to_botorch()
[docs] def handle_missing_values( self, measurements: pd.DataFrame ) -> dict[str, pd.DataFrame]: """Handle missing values in the given measurements for each modeled quantity. Args: measurements: Data potentially containing missing values. Returns: A dictionary with one dataframe for each modeled quantity. """ cleaned: dict[str, pd.DataFrame] = {} for quantity, target_names in self._model_quantities_to_target_names.items(): data = df_handle_missing_values(measurements, target_names, drop=True) cleaned[quantity] = data return cleaned
[docs] def to_botorch(self) -> MCAcquisitionObjective: """Convert to BoTorch objective.""" if not is_all_instance(targets := self._oriented_targets, NumericalTarget): raise NotImplementedError( "Conversion to BoTorch is only supported for numerical targets." ) import torch from botorch.acquisition.multi_objective.objective import ( GenericMCMultiOutputObjective, ) return GenericMCMultiOutputObjective( lambda samples, X: torch.stack( [ t.transformation.to_botorch_objective()(samples[..., i]) for i, t in enumerate(targets) ], dim=-1, ) )
[docs] @abstractmethod def to_botorch_posterior_transform(self) -> PosteriorTransform: """Convert to BoTorch posterior transform, if possible. A representation as posterior transformation is only possible if Gaussianity is preserved by the involved operations, that is, when all targets are inherently numerical and their assigned transformations are affine. """
def _pre_transform( self, df: pd.DataFrame, /, *, allow_missing: bool = False, allow_extra: bool = False, ) -> pd.DataFrame: """Pre-transform the target values prior to predictive modeling. For details on the method arguments, see :meth:`transform`. """ # By default, we just pipe through the unmodified target values targets = get_transform_objects( df, self.targets, allow_missing=allow_missing, allow_extra=allow_extra ) return df[[t.name for t in targets]]
[docs] def transform( self, df: pd.DataFrame | None = None, /, *, allow_missing: bool = False, allow_extra: bool | None = None, data: pd.DataFrame | None = None, ) -> pd.DataFrame: """Evaluate the objective on the target columns of the given dataframe. Args: df: The dataframe to be transformed. The allowed columns of the dataframe are dictated by the ``allow_missing`` and ``allow_extra`` flags. allow_missing: If ``False``, each target of the objective must have exactly one corresponding column in the given dataframe. If ``True``, the dataframe may contain only a subset of target columns. allow_extra: If ``False``, each column present in the dataframe must correspond to exactly one target of the objective. If ``True``, the dataframe may contain additional non-target-related columns, which will be ignored. The ``None`` default value is for temporary backward compatibility only and will be removed in a future version. data: Ignore! For backward compatibility only. Raises: ValueError: If dataframes are passed to both ``df`` and ``data``. Returns: A dataframe containing the objective values for the given input dataframe. """ # >>>>>>>>>> Deprecation if not ((df is None) ^ (data is None)): raise ValueError( "Provide the dataframe to be transformed as first positional argument." ) if data is not None: df = data warnings.warn( "Providing the dataframe via the `data` argument is deprecated and " "will be removed in a future version. Please pass your dataframe " "as positional argument instead.", DeprecationWarning, ) # Mypy does not infer from the above that `df` must be a dataframe here assert isinstance(df, pd.DataFrame) if allow_extra is None: allow_extra = True if set(df.columns) - {p.name for p in self.targets}: warnings.warn( "For backward compatibility, the new `allow_extra` flag is set " "to `True` when left unspecified. However, this behavior will be " "changed in a future version. If you want to invoke the old " "behavior, please explicitly set `allow_extra=True`.", DeprecationWarning, ) # <<<<<<<<<< Deprecation targets = get_transform_objects( df, self._oriented_targets, # <-- important to use oriented version allow_missing=allow_missing, allow_extra=allow_extra, ) import torch with torch.no_grad(): transformed = self._full_transformation( to_tensor(df[[t.name for t in targets]]) ) return pd.DataFrame( transformed.numpy(), columns=self.output_names, index=df.index )
[docs] def identify_non_dominated_configurations( self, configurations: pd.DataFrame, / ) -> pd.Series: """Create a Boolean mask indicating non-dominated target configurations. In case of duplicated non-dominated points, all duplicates are marked as non-dominated. Note: Non-dominated configurations can be computed for any objective type, not just for :class:`~baybe.objectives.pareto.ParetoObjective`. For more details, have a look at the corresponding :ref:`user guide section <userguide/objectives:Identifying Non-Dominated Configurations>`. Args: configurations: The target configurations for which the non-dominated subset is identified. Returns: A Boolean series indicating which configurations are non-dominated. """ from botorch.utils.multi_objective.pareto import is_non_dominated validate_target_input(configurations, self.targets) targets = self.transform(configurations) non_dominated = is_non_dominated(Y=to_tensor(targets), deduplicate=False) return pd.Series(non_dominated.numpy(), name="is_non_dominated")
[docs] def to_objective(x: Target | Objective, /) -> Objective: """Convert a target into an objective (with objective passthrough).""" return x if isinstance(x, Objective) else x.to_objective()
# Collect leftover original slotted classes processed by `attrs.define` gc.collect()