Source code for baybe.campaign

"""Functionality for managing DOE campaigns. Main point of interaction via Python."""

from __future__ import annotations

import json

import cattrs
import numpy as np
import pandas as pd
from attrs import define, field
from attrs.converters import optional
from attrs.validators import instance_of

from baybe.objectives.base import Objective, to_objective
from baybe.parameters.base import Parameter
from baybe.recommenders.base import RecommenderProtocol
from baybe.recommenders.meta.sequential import TwoPhaseMetaRecommender
from baybe.searchspace.core import (
    SearchSpace,
    to_searchspace,
    validate_searchspace_from_config,
)
from baybe.serialization import SerialMixin, converter
from baybe.targets.base import Target
from baybe.telemetry import (
    TELEM_LABELS,
    telemetry_record_recommended_measurement_percentage,
    telemetry_record_value,
)
from baybe.utils.boolean import eq_dataframe


[docs] @define class Campaign(SerialMixin): """Main class for interaction with BayBE. Campaigns define and record an experimentation process, i.e. the execution of a series of measurements and the iterative sequence of events involved. In particular, a campaign: * Defines the objective of an experimentation process. * Defines the search space over which the experimental parameter may vary. * Defines a recommender for exploring the search space. * Records the measurement data collected during the process. * Records metadata about the progress of the experimentation process. """ # DOE specifications searchspace: SearchSpace = field(converter=to_searchspace) """The search space in which the experiments are conducted. When passing a :class:`baybe.parameters.base.Parameter`, a :class:`baybe.searchspace.discrete.SubspaceDiscrete`, or a a :class:`baybe.searchspace.continuous.SubspaceContinuous`, conversion to :class:`baybe.searchspace.core.SearchSpace` is automatically applied.""" objective: Objective | None = field(default=None, converter=optional(to_objective)) """The optimization objective. When passing a :class:`baybe.targets.base.Target`, conversion to :class:`baybe.objectives.single.SingleTargetObjective` is automatically applied.""" recommender: RecommenderProtocol = field( factory=TwoPhaseMetaRecommender, validator=instance_of(RecommenderProtocol), # type: ignore[type-abstract] ) """The employed recommender""" # Metadata n_batches_done: int = field(default=0, init=False) """The number of already processed batches.""" n_fits_done: int = field(default=0, init=False) """The number of fits already done.""" # Private _measurements_exp: pd.DataFrame = field( factory=pd.DataFrame, eq=eq_dataframe, init=False ) """The experimental representation of the conducted experiments.""" _cached_recommendation: pd.DataFrame = field( factory=pd.DataFrame, eq=eq_dataframe, init=False ) """The cached recommendations.""" def __str__(self) -> str: start_bold = "\033[1m" end_bold = "\033[0m" # Get str representation of campaign fields fields_to_print = [self.searchspace, self.objective, self.recommender] fields_str = "\n\n".join(str(x) for x in fields_to_print) # Put all relevant attributes of the campaign in one string campaign_str = f"""{start_bold}Campaign{end_bold} \n{start_bold}Meta Data{end_bold}\nBatches Done: {self.n_batches_done} \rFits Done: {self.n_fits_done}\n\n{fields_str}\n""" return campaign_str.replace("\n", "\n ").replace("\r", "\r ") @property def measurements(self) -> pd.DataFrame: """The experimental data added to the Campaign.""" return self._measurements_exp @property def parameters(self) -> tuple[Parameter, ...]: """The parameters of the underlying search space.""" return self.searchspace.parameters @property def targets(self) -> tuple[Target, ...]: """The targets of the underlying objective.""" return self.objective.targets if self.objective is not None else ()
[docs] @classmethod def from_config(cls, config_json: str) -> Campaign: """Create a campaign from a configuration JSON. Args: config_json: The string with the configuration JSON. Returns: The constructed campaign. """ config = json.loads(config_json) return converter.structure(config, Campaign)
[docs] @classmethod def validate_config(cls, config_json: str) -> None: """Validate a given campaign configuration JSON. Args: config_json: The JSON that should be validated. """ config = json.loads(config_json) _validation_converter.structure(config, Campaign)
[docs] def add_measurements( self, data: pd.DataFrame, numerical_measurements_must_be_within_tolerance: bool = True, ) -> None: """Add results from a dataframe to the internal database. Each addition of data is considered a new batch. Added results are checked for validity. Categorical values need to have an exact match. For numerical values, a campaign flag determines if values that lie outside a specified tolerance are accepted. Note that this modifies the provided data in-place. Args: data: The data to be added (with filled values for targets). Preferably created via :func:`baybe.campaign.Campaign.recommend`. numerical_measurements_must_be_within_tolerance: Flag indicating if numerical parameters need to be within their tolerances. Raises: ValueError: If one of the targets has missing values or NaNs in the provided dataframe. TypeError: If the target has non-numeric entries in the provided dataframe. """ # Invalidate recommendation cache first (in case of uncaught exceptions below) self._cached_recommendation = pd.DataFrame() # Check if all targets have valid values for target in self.targets: if data[target.name].isna().any(): raise ValueError( f"The target '{target.name}' has missing values or NaNs in the " f"provided dataframe. Missing target values are not supported." ) if data[target.name].dtype.kind not in "iufb": raise TypeError( f"The target '{target.name}' has non-numeric entries in the " f"provided dataframe. Non-numeric target values are not supported." ) # Check if all targets have valid values for param in self.parameters: if data[param.name].isna().any(): raise ValueError( f"The parameter '{param.name}' has missing values or NaNs in the " f"provided dataframe. Missing parameter values are not supported." ) if param.is_numerical and (data[param.name].dtype.kind not in "iufb"): raise TypeError( f"The numerical parameter '{param.name}' has non-numeric entries in" f" the provided dataframe." ) # Update meta data # TODO: refactor responsibilities self.searchspace.discrete.mark_as_measured( data, numerical_measurements_must_be_within_tolerance ) # Read in measurements and add them to the database self.n_batches_done += 1 to_insert = data.copy() to_insert["BatchNr"] = self.n_batches_done to_insert["FitNr"] = np.nan self._measurements_exp = pd.concat( [self._measurements_exp, to_insert], axis=0, ignore_index=True ) # Telemetry telemetry_record_value(TELEM_LABELS["COUNT_ADD_RESULTS"], 1) telemetry_record_recommended_measurement_percentage( self._cached_recommendation, data, self.parameters, numerical_measurements_must_be_within_tolerance, )
[docs] def recommend( self, batch_size: int, ) -> pd.DataFrame: """Provide the recommendations for the next batch of experiments. Args: batch_size: Number of requested recommendations. Returns: Dataframe containing the recommendations in experimental representation. Raises: ValueError: If ``batch_size`` is smaller than 1. """ if batch_size < 1: raise ValueError( f"You must at least request one recommendation per batch, but provided " f"{batch_size=}." ) # If there are cached recommendations and the batch size of those is equal to # the previously requested one, we just return those if len(self._cached_recommendation) == batch_size: return self._cached_recommendation # Update recommendation meta data if len(self._measurements_exp) > 0: self.n_fits_done += 1 self._measurements_exp.fillna({"FitNr": self.n_fits_done}, inplace=True) # Get the recommended search space entries rec = self.recommender.recommend( batch_size, self.searchspace, self.objective, self._measurements_exp, ) # Cache the recommendations self._cached_recommendation = rec.copy() # Telemetry telemetry_record_value(TELEM_LABELS["COUNT_RECOMMEND"], 1) telemetry_record_value(TELEM_LABELS["BATCH_SIZE"], batch_size) return rec
def _add_version(dict_: dict) -> dict: """Add the package version to the given dictionary.""" from baybe import __version__ return {**dict_, "version": __version__} def _drop_version(dict_: dict) -> dict: """Drop the package version from the given dictionary.""" dict_.pop("version", None) return dict_ # Register de-/serialization hooks unstructure_hook = cattrs.gen.make_dict_unstructure_fn( Campaign, converter, _cattrs_include_init_false=True ) structure_hook = cattrs.gen.make_dict_structure_fn( Campaign, converter, _cattrs_include_init_false=True, _cattrs_forbid_extra_keys=True ) converter.register_unstructure_hook( Campaign, lambda x: _add_version(unstructure_hook(x)) ) converter.register_structure_hook( Campaign, lambda d, cl: structure_hook(_drop_version(d), cl) ) # Converter for config validation _validation_converter = converter.copy() _validation_converter.register_structure_hook( SearchSpace, validate_searchspace_from_config )