Source code for baybe.simulation.transfer_learning
"""Functionality for transfer learning backtesting."""
from __future__ import annotations
from copy import deepcopy
from typing import Any
import pandas as pd
from baybe.campaign import Campaign
from baybe.parameters import TaskParameter
from baybe.searchspace import SearchSpaceType
from baybe.simulation.scenarios import simulate_scenarios
[docs]
def simulate_transfer_learning(
campaign: Campaign,
lookup: pd.DataFrame,
/,
*,
batch_size: int = 1,
n_doe_iterations: int | None = None,
groupby: list[str] | None = None,
n_mc_iterations: int = 1,
) -> pd.DataFrame:
"""Simulate Bayesian optimization with transfer learning.
A wrapper around :func:`baybe.simulation.scenarios.simulate_scenarios` that
partitions the search space into its tasks and simulates each task with the training
data from the remaining tasks.
**NOTE:**
Currently, the simulation only supports purely discrete search spaces. This is
because ``lookup`` serves both as the loop-closing element **and** as the source
for off-task training data. For continuous (or mixed) spaces, the lookup mechanism
would need to be either implemented as a callable (in which case the training data
must be provided separately) or the continuous parameters need to be effectively
restricted to the finite number of provided lookup configurations. Neither is
implemented at the moment.
Args:
campaign: See :func:`baybe.simulation.core.simulate_experiment`.
lookup: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
batch_size: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
n_doe_iterations: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
groupby: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
n_mc_iterations: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
Returns:
A dataframe as returned by :func:`baybe.simulation.scenarios.simulate_scenarios`
where the different tasks are represented in the ``Scenario`` column.
Raises:
NotImplementedError: If a non-discrete search space is chosen.
"""
# TODO: Currently, we assume a purely discrete search space
if campaign.searchspace.type != SearchSpaceType.DISCRETE:
raise NotImplementedError(
"Currently, only purely discrete search spaces are supported. "
"For details, see NOTE in the function docstring."
)
# TODO [16932]: Currently, we assume exactly one task parameter exists
# Extract the single task parameter
task_params = [p for p in campaign.parameters if isinstance(p, TaskParameter)]
if len(task_params) > 1:
raise NotImplementedError(
"Currently, transfer learning supports only a single task parameter."
)
task_param = task_params[0]
# Create simulation objects for all tasks
scenarios: dict[Any, Campaign] = {}
for task in task_param.values:
# Create a campaign that focuses only on the current task by excluding
# off-task configurations from the candidates list
# TODO: Reconsider if deepcopies are required once [16605] is resolved
campaign_task = deepcopy(campaign)
campaign_task.toggle_discrete_candidates(
pd.DataFrame({task_param.name: [task]}), exclude=True, complement=True
)
# Use all off-task data as training data
df_train = lookup[lookup[task_param.name] != task]
campaign_task.add_measurements(df_train)
# Add the task scenario
scenarios[task] = campaign_task
# Simulate all tasks
return simulate_scenarios(
scenarios,
lookup,
batch_size=batch_size,
n_doe_iterations=n_doe_iterations,
groupby=groupby,
n_mc_iterations=n_mc_iterations,
impute_mode="ignore",
)