Source code for baybe.searchspace.core

"""Functionality for managing search spaces."""

from __future__ import annotations

import gc
from collections.abc import Iterable, Sequence
from enum import Enum
from typing import cast

import pandas as pd
from attrs import define, field
from typing_extensions import override

from baybe.constraints import (
    validate_constraints,
)
from baybe.constraints.base import Constraint
from baybe.parameters import TaskParameter
from baybe.parameters.base import Parameter
from baybe.searchspace.continuous import SubspaceContinuous
from baybe.searchspace.discrete import (
    MemorySize,
    SubspaceDiscrete,
    validate_simplex_subspace_from_config,
)
from baybe.searchspace.validation import validate_parameters
from baybe.serialization import SerialMixin, converter, select_constructor_hook
from baybe.utils.conversion import to_string



[docs]
class SearchSpaceType(Enum):
    """Enum class for different types of search spaces and respective compatibility."""

    DISCRETE = "DISCRETE"
    """Flag for discrete search spaces resp. compatibility with discrete search
    spaces."""

    CONTINUOUS = "CONTINUOUS"
    """Flag for continuous search spaces resp. compatibility with continuous
    search spaces."""

    EITHER = "EITHER"
    """Flag compatibility with either discrete or continuous, but not hybrid
    search spaces."""

    HYBRID = "HYBRID"
    """Flag for hybrid search spaces resp. compatibility with hybrid search spaces."""




[docs]
@define
class SearchSpace(SerialMixin):
    """Class for managing the overall search space.

    The search space might be purely discrete, purely continuous, or hybrid.
    Note that created objects related to the computational representations of parameters
    (e.g., parameter bounds, computational dataframes, etc.) may use a different
    parameter order than what is specified through the constructor: While the
    passed parameter list can contain parameters in arbitrary order, the
    aforementioned objects (by convention) list discrete parameters first, followed
    by continuous ones.
    """

    discrete: SubspaceDiscrete = field(factory=SubspaceDiscrete.empty)
    """The (potentially empty) discrete subspace of the overall search space."""

    continuous: SubspaceContinuous = field(factory=SubspaceContinuous.empty)
    """The (potentially empty) continuous subspace of the overall search space."""

    @override
    def __str__(self) -> str:
        fields = [
            to_string("Search Space Type", self.type.name, single_line=True),
        ]
        if not self.discrete.is_empty:
            fields.append(str(self.discrete))
        if not self.continuous.is_empty:
            fields.append(str(self.continuous))
        return to_string(self.__class__.__name__, *fields)

    def __attrs_post_init__(self):
        """Perform validation."""
        validate_parameters(self.parameters)
        validate_constraints(self.constraints, self.parameters)


[docs]
    @classmethod
    def from_parameter(cls, parameter: Parameter) -> SearchSpace:
        """Create a search space from a single parameter.

        Args:
            parameter: The parameter to span the search space.

        Returns:
            The created search space.
        """
        return cls.from_product([parameter])



[docs]
    @classmethod
    def from_product(
        cls,
        parameters: Sequence[Parameter],
        constraints: Sequence[Constraint] | None = None,
        empty_encoding: bool = False,
    ) -> SearchSpace:
        """Create a search space from a cartesian product.

        In the search space, optional subsequent constraints are applied.
        That is, the discrete subspace becomes the (filtered) cartesian product
        containing all discrete parameter combinations while, analogously, the
        continuous subspace represents the (filtered) cartesian product of all
        continuous parameters.

        Args:
            parameters: The parameters spanning the search space.
            constraints: An optional set of constraints restricting the valid parameter
                space.
            empty_encoding: If ``True``, uses an "empty" encoding for all parameters.
                This is useful, for instance, in combination with random search
                strategies that do not read the actual parameter values, since it avoids
                the (potentially costly) transformation of the parameter values to their
                computational representation.

        Returns:
            The constructed search space.
        """
        # IMPROVE: The arguments get pre-validated here to avoid the potentially costly
        #   creation of the subspaces. Perhaps there is an elegant way to bypass the
        #   default validation in the initializer (which is required for other
        #   ways of object creation) in this particular case.
        validate_parameters(parameters)
        if constraints:
            validate_constraints(constraints, parameters)
        else:
            constraints = []

        discrete = SubspaceDiscrete.from_product(
            parameters=[p for p in parameters if p.is_discrete],  # type:ignore[misc]
            constraints=[c for c in constraints if c.is_discrete],  # type:ignore[misc]
            empty_encoding=empty_encoding,
        )
        continuous = SubspaceContinuous.from_product(
            parameters=[p for p in parameters if p.is_continuous],  # type:ignore[misc]
            constraints=[c for c in constraints if c.is_continuous],  # type:ignore[misc]
        )

        return SearchSpace(discrete=discrete, continuous=continuous)



[docs]
    @classmethod
    def from_dataframe(
        cls,
        df: pd.DataFrame,
        parameters: Sequence[Parameter],
    ) -> SearchSpace:
        """Create a search space from a specified set of parameter configurations.

        The way in which the contents of the columns are interpreted depends on the
        types of the corresponding parameter objects provided. For details, see
        :meth:`baybe.searchspace.discrete.SubspaceDiscrete.from_dataframe` and
        :meth:`baybe.searchspace.continuous.SubspaceContinuous.from_dataframe`.

        Args:
            df: A dataframe whose parameter configurations are used as
                search space specification.
            parameters: The corresponding parameter objects, one for each column
                in the provided dataframe.

        Returns:
            The created search space.

        Raises:
            ValueError: If the dataframe columns do not match with the parameters.
        """
        if {p.name for p in parameters} != set(df.columns.values):
            raise ValueError(
                "The provided dataframe columns must match exactly with the specified "
                "parameter names."
            )

        disc_params = [p for p in parameters if p.is_discrete]
        cont_params = [p for p in parameters if p.is_continuous]

        return SearchSpace(
            discrete=SubspaceDiscrete.from_dataframe(
                df[[p.name for p in disc_params]],
                disc_params,  # type:ignore[arg-type]
            ),
            continuous=SubspaceContinuous.from_dataframe(
                df[[p.name for p in cont_params]],
                cont_params,  # type:ignore[arg-type]
            ),
        )


    @property
    def parameters(self) -> tuple[Parameter, ...]:
        """Return the list of parameters of the search space."""
        return (*self.discrete.parameters, *self.continuous.parameters)

    @property
    def constraints(self) -> tuple[Constraint, ...]:
        """Return the constraints of the search space."""
        return (
            *self.discrete.constraints,
            *self.continuous.constraints_lin_eq,
            *self.continuous.constraints_lin_ineq,
            *self.continuous.constraints_nonlin,
        )

    @property
    def is_constrained(self) -> bool:
        """Boolean indicating if the search space has any constraints."""
        return self.discrete.is_constrained or self.continuous.is_constrained

    @property
    def type(self) -> SearchSpaceType:
        """Return the type of the search space."""
        if self.discrete.is_empty and not self.continuous.is_empty:
            return SearchSpaceType.CONTINUOUS
        if not self.discrete.is_empty and self.continuous.is_empty:
            return SearchSpaceType.DISCRETE
        if not self.discrete.is_empty and not self.continuous.is_empty:
            return SearchSpaceType.HYBRID
        raise RuntimeError("This line should be impossible to reach.")

    @property
    def comp_rep_columns(self) -> tuple[str, ...]:
        """The columns spanning the computational representation."""
        return self.discrete.comp_rep_columns + self.continuous.comp_rep_columns

    @property
    def comp_rep_bounds(self) -> pd.DataFrame:
        """The minimum and maximum values of the computational representation."""
        return pd.concat(
            [self.discrete.comp_rep_bounds, self.continuous.comp_rep_bounds],
            axis=1,
        )

    @property
    def scaling_bounds(self) -> pd.DataFrame:
        """The bounds used for scaling the surrogate model input."""
        return pd.concat(
            [self.discrete.scaling_bounds, self.continuous.scaling_bounds], axis=1
        )

    @property
    def parameter_names(self) -> tuple[str, ...]:
        """Return tuple of parameter names."""
        return self.discrete.parameter_names + self.continuous.parameter_names

    @property
    def task_idx(self) -> int | None:
        """The column index of the task parameter in computational representation."""
        try:
            # TODO [16932]: Redesign metadata handling
            task_param = next(
                p for p in self.parameters if isinstance(p, TaskParameter)
            )
        except StopIteration:
            return None
        # TODO[11611]: The current approach has three limitations:
        #   1.  It matches by column name and thus assumes that the parameter name
        #       is used as the column name.
        #   2.  It relies on the current implementation detail that discrete parameters
        #       appear first in the computational dataframe.
        #   3.  It assumes there exists exactly one task parameter
        #   --> Fix this when refactoring the data
        return cast(int, self.discrete.comp_rep.columns.get_loc(task_param.name))

    @property
    def n_tasks(self) -> int:
        """The number of tasks encoded in the search space."""
        # TODO [16932]: This approach only works for a single task parameter. For
        #  multiple task parameters, we need to align what the output should even
        #  represent (e.g. number of combinatorial task combinations, number of
        #  tasks per task parameter, etc).
        try:
            task_param = next(
                p for p in self.parameters if isinstance(p, TaskParameter)
            )
            return len(task_param.values)

        # When there are no task parameters, we effectively have a single task
        except StopIteration:
            return 1


[docs]
    def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]:
        """Find a parameter's column indices in the computational representation.

        Args:
            name: The name of the parameter whose columns indices are to be retrieved.

        Raises:
            ValueError: If no parameter with the provided name exists.
            ValueError: If more than one parameter with the provided name exists.

        Returns:
            A tuple containing the integer indices of the columns in the computational
            representation associated with the parameter. When the parameter is not part
            of the computational representation, an empty tuple is returned.
        """
        params = self.get_parameters_by_name([name])
        if len(params) < 1:
            raise ValueError(
                f"There exists no parameter named '{name}' in the search space."
            )
        if len(params) > 1:
            raise ValueError(
                f"There exist multiple parameter matches for '{name}' in the search "
                f"space."
            )
        p = params[0]

        return tuple(
            i
            for i, col in enumerate(self.comp_rep_columns)
            if col in p.comp_rep_columns
        )



[docs]
    @staticmethod
    def estimate_product_space_size(parameters: Iterable[Parameter]) -> MemorySize:
        """Estimate an upper bound for the memory size of a product space.

        Continuous parameters are ignored because creating a continuous subspace has
        no considerable memory footprint.

        Args:
            parameters: The parameters spanning the product space.

        Returns:
            The estimated memory size.
        """
        discrete_parameters = [p for p in parameters if p.is_discrete]
        return SubspaceDiscrete.estimate_product_space_size(discrete_parameters)  # type: ignore[arg-type]



[docs]
    def transform(
        self,
        df: pd.DataFrame,
        /,
        *,
        allow_missing: bool = False,
        allow_extra: bool = False,
    ) -> pd.DataFrame:
        """Transform parameters from experimental to computational representation.

        Args:
            df: The dataframe to be transformed. The allowed columns of the dataframe
                are dictated by the ``allow_missing`` and ``allow_extra`` flags.
            allow_missing: If ``False``, each parameter of the space must have
                exactly one corresponding column in the given dataframe. If ``True``,
                the dataframe may contain only a subset of parameter columns.
            allow_extra: If ``False``, each column present in the dataframe must
                correspond to exactly one parameter of the space. If ``True``, the
                dataframe may contain additional non-parameter-related columns, which
                will be ignored.

        Returns:
            A corresponding dataframe with parameters in computational representation.
        """
        # Potentially existing columns of the respective other subspace would trivially
        # be "extra" columns, so we drop them first. However, in this step, we can
        # ignore if columns are not complete since a proper error will be raised in the
        # corresponding transformation step of each space below.
        df_disc_in = df.drop(
            columns=list(self.continuous.parameter_names), errors="ignore"
        )
        df_cont_in = df.drop(
            columns=list(self.discrete.parameter_names), errors="ignore"
        )

        # Transform subspaces separately
        df_disc_out = self.discrete.transform(
            df_disc_in, allow_missing=allow_missing, allow_extra=allow_extra
        )
        df_cont_out = self.continuous.transform(
            df_cont_in, allow_missing=allow_missing, allow_extra=allow_extra
        )

        # Combine Subspaces
        comp_rep = pd.concat([df_disc_out, df_cont_out], axis=1)

        return comp_rep


    @property
    def constraints_augmentable(self) -> tuple[Constraint, ...]:
        """The searchspace constraints that can be considered during augmentation."""
        return tuple(c for c in self.constraints if c.eval_during_augmentation)


[docs]
    def get_parameters_by_name(self, names: Sequence[str]) -> tuple[Parameter, ...]:
        """Return parameters with the specified names.

        Args:
            names: Sequence of parameter names.

        Returns:
            The named parameters.
        """
        return self.discrete.get_parameters_by_name(
            names
        ) + self.continuous.get_parameters_by_name(names)





[docs]
def to_searchspace(
    x: Parameter | SubspaceDiscrete | SubspaceContinuous | SearchSpace, /
) -> SearchSpace:
    """Convert a parameter/subspace into a search space (with search space passthrough)."""  # noqa: E501
    return x if isinstance(x, SearchSpace) else x.to_searchspace()




[docs]
def validate_searchspace_from_config(specs: dict, _) -> None:
    """Validate the search space specifications while skipping costly creation steps."""
    # Validate product inputs without constructing it
    if specs.get("constructor", None) == "from_product":
        parameters = converter.structure(specs["parameters"], list[Parameter])
        validate_parameters(parameters)

        constraints = specs.get("constraints", None)
        if constraints:
            constraints = converter.structure(specs["constraints"], list[Constraint])
            validate_constraints(constraints, parameters)

    else:
        discrete_subspace_specs = specs.get("discrete", {})
        if discrete_subspace_specs.get("constructor", None) == "from_simplex":
            # Validate discrete simplex subspace
            _validation_converter = converter.copy()
            _validation_converter.register_structure_hook(
                SubspaceDiscrete, validate_simplex_subspace_from_config
            )
            _validation_converter.structure(discrete_subspace_specs, SubspaceDiscrete)
        else:
            # For all other types, validate by construction
            converter.structure(specs, SearchSpace)



# Register deserialization hook
converter.register_structure_hook(SearchSpace, select_constructor_hook)

# Collect leftover original slotted classes processed by `attrs.define`
gc.collect()