"""Functionality for managing DOE campaigns. Main point of interaction via Python."""
from __future__ import annotations
import json
import cattrs
import numpy as np
import pandas as pd
from attrs import define, field
from attrs.converters import optional
from attrs.validators import instance_of
from baybe.objectives.base import Objective, to_objective
from baybe.parameters.base import Parameter
from baybe.recommenders.base import RecommenderProtocol
from baybe.recommenders.meta.sequential import TwoPhaseMetaRecommender
from baybe.searchspace.core import (
SearchSpace,
to_searchspace,
validate_searchspace_from_config,
)
from baybe.serialization import SerialMixin, converter
from baybe.targets.base import Target
from baybe.telemetry import (
TELEM_LABELS,
telemetry_record_recommended_measurement_percentage,
telemetry_record_value,
)
from baybe.utils.boolean import eq_dataframe
[docs]
@define
class Campaign(SerialMixin):
"""Main class for interaction with BayBE.
Campaigns define and record an experimentation process, i.e. the execution of a
series of measurements and the iterative sequence of events involved.
In particular, a campaign:
* Defines the objective of an experimentation process.
* Defines the search space over which the experimental parameter may vary.
* Defines a recommender for exploring the search space.
* Records the measurement data collected during the process.
* Records metadata about the progress of the experimentation process.
"""
# DOE specifications
searchspace: SearchSpace = field(converter=to_searchspace)
"""The search space in which the experiments are conducted.
When passing a :class:`baybe.parameters.base.Parameter`,
a :class:`baybe.searchspace.discrete.SubspaceDiscrete`, or a
a :class:`baybe.searchspace.continuous.SubspaceContinuous`, conversion to
:class:`baybe.searchspace.core.SearchSpace` is automatically applied."""
objective: Objective | None = field(default=None, converter=optional(to_objective))
"""The optimization objective.
When passing a :class:`baybe.targets.base.Target`, conversion to
:class:`baybe.objectives.single.SingleTargetObjective` is automatically applied."""
recommender: RecommenderProtocol = field(
factory=TwoPhaseMetaRecommender,
validator=instance_of(RecommenderProtocol), # type: ignore[type-abstract]
)
"""The employed recommender"""
# Metadata
n_batches_done: int = field(default=0, init=False)
"""The number of already processed batches."""
n_fits_done: int = field(default=0, init=False)
"""The number of fits already done."""
# Private
_measurements_exp: pd.DataFrame = field(
factory=pd.DataFrame, eq=eq_dataframe, init=False
)
"""The experimental representation of the conducted experiments."""
_cached_recommendation: pd.DataFrame = field(
factory=pd.DataFrame, eq=eq_dataframe, init=False
)
"""The cached recommendations."""
def __str__(self) -> str:
start_bold = "\033[1m"
end_bold = "\033[0m"
# Get str representation of campaign fields
fields_to_print = [self.searchspace, self.objective, self.recommender]
fields_str = "\n\n".join(str(x) for x in fields_to_print)
# Put all relevant attributes of the campaign in one string
campaign_str = f"""{start_bold}Campaign{end_bold}
\n{start_bold}Meta Data{end_bold}\nBatches Done: {self.n_batches_done}
\rFits Done: {self.n_fits_done}\n\n{fields_str}\n"""
return campaign_str.replace("\n", "\n ").replace("\r", "\r ")
@property
def measurements(self) -> pd.DataFrame:
"""The experimental data added to the Campaign."""
return self._measurements_exp
@property
def parameters(self) -> tuple[Parameter, ...]:
"""The parameters of the underlying search space."""
return self.searchspace.parameters
@property
def targets(self) -> tuple[Target, ...]:
"""The targets of the underlying objective."""
return self.objective.targets if self.objective is not None else ()
[docs]
@classmethod
def from_config(cls, config_json: str) -> Campaign:
"""Create a campaign from a configuration JSON.
Args:
config_json: The string with the configuration JSON.
Returns:
The constructed campaign.
"""
config = json.loads(config_json)
return converter.structure(config, Campaign)
[docs]
@classmethod
def validate_config(cls, config_json: str) -> None:
"""Validate a given campaign configuration JSON.
Args:
config_json: The JSON that should be validated.
"""
config = json.loads(config_json)
_validation_converter.structure(config, Campaign)
[docs]
def add_measurements(
self,
data: pd.DataFrame,
numerical_measurements_must_be_within_tolerance: bool = True,
) -> None:
"""Add results from a dataframe to the internal database.
Each addition of data is considered a new batch. Added results are checked for
validity. Categorical values need to have an exact match. For numerical values,
a campaign flag determines if values that lie outside a specified tolerance
are accepted.
Note that this modifies the provided data in-place.
Args:
data: The data to be added (with filled values for targets). Preferably
created via :func:`baybe.campaign.Campaign.recommend`.
numerical_measurements_must_be_within_tolerance: Flag indicating if
numerical parameters need to be within their tolerances.
Raises:
ValueError: If one of the targets has missing values or NaNs in the provided
dataframe.
TypeError: If the target has non-numeric entries in the provided dataframe.
"""
# Invalidate recommendation cache first (in case of uncaught exceptions below)
self._cached_recommendation = pd.DataFrame()
# Check if all targets have valid values
for target in self.targets:
if data[target.name].isna().any():
raise ValueError(
f"The target '{target.name}' has missing values or NaNs in the "
f"provided dataframe. Missing target values are not supported."
)
if data[target.name].dtype.kind not in "iufb":
raise TypeError(
f"The target '{target.name}' has non-numeric entries in the "
f"provided dataframe. Non-numeric target values are not supported."
)
# Check if all targets have valid values
for param in self.parameters:
if data[param.name].isna().any():
raise ValueError(
f"The parameter '{param.name}' has missing values or NaNs in the "
f"provided dataframe. Missing parameter values are not supported."
)
if param.is_numerical and (data[param.name].dtype.kind not in "iufb"):
raise TypeError(
f"The numerical parameter '{param.name}' has non-numeric entries in"
f" the provided dataframe."
)
# Update meta data
# TODO: refactor responsibilities
self.searchspace.discrete.mark_as_measured(
data, numerical_measurements_must_be_within_tolerance
)
# Read in measurements and add them to the database
self.n_batches_done += 1
to_insert = data.copy()
to_insert["BatchNr"] = self.n_batches_done
to_insert["FitNr"] = np.nan
self._measurements_exp = pd.concat(
[self._measurements_exp, to_insert], axis=0, ignore_index=True
)
# Telemetry
telemetry_record_value(TELEM_LABELS["COUNT_ADD_RESULTS"], 1)
telemetry_record_recommended_measurement_percentage(
self._cached_recommendation,
data,
self.parameters,
numerical_measurements_must_be_within_tolerance,
)
[docs]
def recommend(
self,
batch_size: int,
) -> pd.DataFrame:
"""Provide the recommendations for the next batch of experiments.
Args:
batch_size: Number of requested recommendations.
Returns:
Dataframe containing the recommendations in experimental representation.
Raises:
ValueError: If ``batch_size`` is smaller than 1.
"""
if batch_size < 1:
raise ValueError(
f"You must at least request one recommendation per batch, but provided "
f"{batch_size=}."
)
# If there are cached recommendations and the batch size of those is equal to
# the previously requested one, we just return those
if len(self._cached_recommendation) == batch_size:
return self._cached_recommendation
# Update recommendation meta data
if len(self._measurements_exp) > 0:
self.n_fits_done += 1
self._measurements_exp.fillna({"FitNr": self.n_fits_done}, inplace=True)
# Get the recommended search space entries
rec = self.recommender.recommend(
batch_size,
self.searchspace,
self.objective,
self._measurements_exp,
)
# Cache the recommendations
self._cached_recommendation = rec.copy()
# Telemetry
telemetry_record_value(TELEM_LABELS["COUNT_RECOMMEND"], 1)
telemetry_record_value(TELEM_LABELS["BATCH_SIZE"], batch_size)
return rec
def _add_version(dict_: dict) -> dict:
"""Add the package version to the given dictionary."""
from baybe import __version__
return {**dict_, "version": __version__}
def _drop_version(dict_: dict) -> dict:
"""Drop the package version from the given dictionary."""
dict_.pop("version", None)
return dict_
# Register de-/serialization hooks
unstructure_hook = cattrs.gen.make_dict_unstructure_fn(
Campaign, converter, _cattrs_include_init_false=True
)
structure_hook = cattrs.gen.make_dict_structure_fn(
Campaign, converter, _cattrs_include_init_false=True, _cattrs_forbid_extra_keys=True
)
converter.register_unstructure_hook(
Campaign, lambda x: _add_version(unstructure_hook(x))
)
converter.register_structure_hook(
Campaign, lambda d, cl: structure_hook(_drop_version(d), cl)
)
# Converter for config validation
_validation_converter = converter.copy()
_validation_converter.register_structure_hook(
SearchSpace, validate_searchspace_from_config
)