"""Botorch recommender."""
import math
from typing import Any, ClassVar
import pandas as pd
from attr.converters import optional
from attrs import define, field
from baybe.exceptions import NoMCAcquisitionFunctionError
from baybe.recommenders.pure.bayesian.base import BayesianRecommender
from baybe.searchspace import (
SearchSpace,
SearchSpaceType,
SubspaceContinuous,
SubspaceDiscrete,
)
from baybe.utils.dataframe import to_tensor
from baybe.utils.sampling_algorithms import (
DiscreteSamplingMethod,
sample_numerical_df,
)
[docs]
@define(kw_only=True)
class BotorchRecommender(BayesianRecommender):
"""A pure recommender utilizing Botorch's optimization machinery.
This recommender makes use of Botorch's ``optimize_acqf_discrete``,
``optimize_acqf`` and ``optimize_acqf_mixed`` functions to optimize discrete,
continuous and hybrid search spaces, respectively. Accordingly, it can be applied to
all kinds of search spaces.
Note:
In hybrid search spaces, the used algorithm performs a brute-force optimization
that can be computationally expensive. Thus, the behavior of the algorithm in
hybrid search spaces can be controlled via two additional parameters.
"""
# Class variables
compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID
# See base class.
# Object variables
sequential_continuous: bool = field(default=False)
"""Flag defining whether to apply sequential greedy or batch optimization in
**continuous** search spaces. (In discrete/hybrid spaces, sequential greedy
optimization is applied automatically.)
"""
hybrid_sampler: DiscreteSamplingMethod | None = field(
converter=optional(DiscreteSamplingMethod), default=None
)
"""Strategy used for sampling the discrete subspace when performing hybrid search
space optimization."""
sampling_percentage: float = field(default=1.0)
"""Percentage of discrete search space that is sampled when performing hybrid search
space optimization. Ignored when ``hybrid_sampler="None"``."""
@sampling_percentage.validator
def _validate_percentage( # noqa: DOC101, DOC103
self, _: Any, value: float
) -> None:
"""Validate that the given value is in fact a percentage.
Raises:
ValueError: If ``value`` is not between 0 and 1.
"""
if not 0 <= value <= 1:
raise ValueError(
f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}"
)
def _recommend_discrete(
self,
subspace_discrete: SubspaceDiscrete,
candidates_comp: pd.DataFrame,
batch_size: int,
) -> pd.Index:
"""Generate recommendations from a discrete search space.
Args:
subspace_discrete: The discrete subspace from which to generate
recommendations.
candidates_comp: The computational representation of all discrete candidate
points to be considered.
batch_size: The size of the recommendation batch.
Raises:
NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is
used with a batch size > 1.
Returns:
The dataframe indices of the recommended points in the provided
computational representation.
"""
# For batch size > 1, this optimizer needs a MC acquisition function
if batch_size > 1 and not self.acquisition_function.is_mc:
raise NoMCAcquisitionFunctionError(
f"The '{self.__class__.__name__}' only works with Monte Carlo "
f"acquisition functions for batch sizes > 1."
)
from botorch.optim import optimize_acqf_discrete
# determine the next set of points to be tested
candidates_tensor = to_tensor(candidates_comp)
points, _ = optimize_acqf_discrete(
self._botorch_acqf, batch_size, candidates_tensor
)
# retrieve the index of the points from the input dataframe
# IMPROVE: The merging procedure is conceptually similar to what
# `SearchSpace._match_measurement_with_searchspace_indices` does, though using
# a simpler matching logic. When refactoring the SearchSpace class to
# handle continuous parameters, a corresponding utility could be extracted.
idxs = pd.Index(
pd.merge(
candidates_comp.reset_index(),
pd.DataFrame(points, columns=candidates_comp.columns),
on=list(candidates_comp),
)["index"]
)
return idxs
def _recommend_continuous(
self,
subspace_continuous: SubspaceContinuous,
batch_size: int,
) -> pd.DataFrame:
"""Generate recommendations from a continuous search space.
Args:
subspace_continuous: The continuous subspace from which to generate
recommendations.
batch_size: The size of the recommendation batch.
Raises:
NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is
used with a batch size > 1.
Returns:
A dataframe containing the recommendations as individual rows.
"""
# For batch size > 1, this optimizer needs a MC acquisition function
if batch_size > 1 and not self.acquisition_function.is_mc:
raise NoMCAcquisitionFunctionError(
f"The '{self.__class__.__name__}' only works with Monte Carlo "
f"acquisition functions for batch sizes > 1."
)
import torch
from botorch.optim import optimize_acqf
points, _ = optimize_acqf(
acq_function=self._botorch_acqf,
bounds=torch.from_numpy(subspace_continuous.param_bounds_comp),
q=batch_size,
num_restarts=5, # TODO make choice for num_restarts
raw_samples=10, # TODO make choice for raw_samples
equality_constraints=[
c.to_botorch(subspace_continuous.parameters)
for c in subspace_continuous.constraints_lin_eq
]
or None, # TODO: https://github.com/pytorch/botorch/issues/2042
inequality_constraints=[
c.to_botorch(subspace_continuous.parameters)
for c in subspace_continuous.constraints_lin_ineq
]
or None, # TODO: https://github.com/pytorch/botorch/issues/2042
sequential=self.sequential_continuous,
)
# Return optimized points as dataframe
rec = pd.DataFrame(points, columns=subspace_continuous.param_names)
return rec
def _recommend_hybrid(
self,
searchspace: SearchSpace,
candidates_comp: pd.DataFrame,
batch_size: int,
) -> pd.DataFrame:
"""Recommend points using the ``optimize_acqf_mixed`` function of BoTorch.
This functions samples points from the discrete subspace, performs optimization
in the continuous subspace with these points being fixed and returns the best
found solution.
**Important**: This performs a brute-force calculation by fixing every possible
assignment of discrete variables and optimizing the continuous subspace for
each of them. It is thus computationally expensive.
Args:
searchspace: The search space in which the recommendations should be made.
candidates_comp: The computational representation of the candidates
of the discrete subspace.
batch_size: The size of the calculated batch.
Raises:
NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is
used with a batch size > 1.
Returns:
The recommended points.
"""
# For batch size > 1, this optimizer needs a MC acquisition function
if batch_size > 1 and not self.acquisition_function.is_mc:
raise NoMCAcquisitionFunctionError(
f"The '{self.__class__.__name__}' only works with Monte Carlo "
f"acquisition functions for batch sizes > 1."
)
import torch
from botorch.optim import optimize_acqf_mixed
if len(candidates_comp) > 0:
# Calculate the number of samples from the given percentage
n_candidates = math.ceil(
self.sampling_percentage * len(candidates_comp.index)
)
# Potential sampling of discrete candidates
if self.hybrid_sampler is not None:
candidates_comp = sample_numerical_df(
candidates_comp, n_candidates, method=self.hybrid_sampler
)
# Prepare all considered discrete configurations in the
# List[Dict[int, float]] format expected by BoTorch.
# TODO: Currently assumes that discrete parameters are first and continuous
# second. Once parameter redesign [11611] is completed, we might adjust
# this.
num_comp_columns = len(candidates_comp.columns)
candidates_comp.columns = list(range(num_comp_columns)) # type: ignore
fixed_features_list = candidates_comp.to_dict("records")
else:
fixed_features_list = None
# Actual call of the BoTorch optimization routine
points, _ = optimize_acqf_mixed(
acq_function=self._botorch_acqf,
bounds=torch.from_numpy(searchspace.param_bounds_comp),
q=batch_size,
num_restarts=5, # TODO make choice for num_restarts
raw_samples=10, # TODO make choice for raw_samples
fixed_features_list=fixed_features_list,
equality_constraints=[
c.to_botorch(
searchspace.continuous.parameters,
idx_offset=len(candidates_comp.columns),
)
for c in searchspace.continuous.constraints_lin_eq
]
or None, # TODO: https://github.com/pytorch/botorch/issues/2042
inequality_constraints=[
c.to_botorch(
searchspace.continuous.parameters,
idx_offset=num_comp_columns,
)
for c in searchspace.continuous.constraints_lin_ineq
]
or None, # TODO: https://github.com/pytorch/botorch/issues/2042
)
disc_points = points[:, :num_comp_columns]
cont_points = points[:, num_comp_columns:]
# Get selected candidate indices
idxs = pd.Index(
pd.merge(
candidates_comp.reset_index(),
pd.DataFrame(disc_points, columns=candidates_comp.columns),
on=list(candidates_comp),
)["index"]
)
# Get experimental representation of discrete and continuous parts
rec_disc_exp = searchspace.discrete.exp_rep.loc[idxs]
rec_cont_exp = pd.DataFrame(
cont_points, columns=searchspace.continuous.param_names
)
# Adjust the index of the continuous part and create overall recommendations
rec_cont_exp.index = rec_disc_exp.index
rec_exp = pd.concat([rec_disc_exp, rec_cont_exp], axis=1)
return rec_exp