Source code for baybe.simulation.lookup

"""Target lookup mechanisms."""

from __future__ import annotations

import logging
from collections.abc import Callable, Collection
from typing import Literal

import numpy as np
import pandas as pd

from baybe.simulation._imputation import impute_target_values
from baybe.targets.base import Target
from baybe.utils.dataframe import add_fake_measurements

_logger = logging.getLogger(__name__)


[docs] def look_up_targets( queries: pd.DataFrame, targets: Collection[Target], lookup: pd.DataFrame | Callable[[pd.DataFrame], pd.DataFrame] | None, impute_mode: Literal[ "error", "worst", "best", "mean", "random", "ignore" ] = "error", ) -> None: """Add/fill target values in a dataframe using a lookup mechanism. Note: This does not create a new dataframe but modifies ``queries`` in-place. Args: queries: The dataframe to be modified. Its content must be compatible with the chosen lookup mechanism. targets: The targets whose values are to be looked up. lookup: The lookup mechanism. Can be one of the following choices: - A dataframe mapping rows of ``queries`` to the corresponding target values. That is, it must contain the same columns as ``queries`` plus one additional column for each of the given target. - A callable, providing target values for each row of ``queries``. - ``None``. Produces fake values for all targets. impute_mode: Specifies how a missing lookup will be handled. Only relevant for dataframe lookups. Can be one of the following choices: - ``"error"``: An error will be thrown. - ``"worst"``: Imputes the worst available value for each target. - ``"best"``: Imputes the best available value for each target. - ``"mean"``: Imputes the mean value for each target. - ``"random"``: A random row will be used for the lookup. Raises: ValueError: If an unsupported lookup mechanism is provided. Example: >>> import pandas as pd >>> from baybe.targets.numerical import NumericalTarget >>> from baybe.simulation.lookup import look_up_targets >>> >>> targets = [NumericalTarget("target")] >>> df = pd.DataFrame({"x": [1, 2, 3]}) >>> lookup_df = pd.DataFrame({"x": [1, 2], "target": [10, 20]}) >>> look_up_targets(df, targets, lookup_df, impute_mode="mean") >>> print(df) x target 0 1 10.0 1 2 20.0 2 3 15.0 """ if lookup is None: add_fake_measurements(queries, targets) elif isinstance(lookup, Callable): _look_up_targets_from_callable(queries, lookup) elif isinstance(lookup, pd.DataFrame): _look_up_targets_from_dataframe(queries, targets, lookup, impute_mode) else: raise ValueError("Unsupported lookup mechanism.")
def _look_up_targets_from_callable( queries: pd.DataFrame, lookup: Callable[[pd.DataFrame], pd.DataFrame] ) -> None: """Look up target values by querying a callable.""" df_targets = lookup(queries) queries[df_targets.columns] = df_targets def _look_up_targets_from_dataframe( queries: pd.DataFrame, targets: Collection[Target], lookup: pd.DataFrame, impute_mode: Literal[ "error", "worst", "best", "mean", "random", "ignore" ] = "error", ) -> None: """Look up target values from a dataframe.""" # # IMPROVE: Although it's not too important for a simulation, this # # could also be implemented for approximate matches""" target_names = [t.name for t in targets] all_match_vals = [] for _, row in queries.iterrows(): # IMPROVE: to the entire matching at once via a merge ind = lookup[ (lookup.loc[:, row.index] == row).all(axis=1, skipna=False) ].index.values if len(ind) > 1: # More than two instances of this parameter combination # have been measured _logger.warning( "The lookup rows with indexes %s seem to be " "duplicates regarding parameter values. Choosing a " "random one.", ind, ) match_vals = lookup.loc[np.random.choice(ind), target_names] elif len(ind) < 1: # Parameter combination cannot be looked up and needs to be # imputed. if impute_mode == "ignore": raise AssertionError( "Something went wrong for impute_mode 'ignore'. " "It seems the search space was not correctly " "reduced before recommendations were generated." ) if impute_mode == "error": raise IndexError( f"Cannot look up target values for {row}. " "No matching row found in the lookup dataframe." ) match_vals = impute_target_values(targets, lookup, impute_mode) else: # Exactly one match has been found match_vals = lookup.loc[ind[0], target_names] # Collect the matches all_match_vals.append(match_vals.values) # Add the lookup values queries.loc[:, target_names] = np.asarray(all_match_vals)