# Campaign Stopping


```python
# Based on the insights from
# [this other example](/examples/Custom_Hooks/probability_of_improvement), we now
# demonstrate how to leverage the
# {func}`register_hooks <baybe.utils.basic.register_hooks>`
# mechanics to interrupt a running campaign based on a simple *Probability of
# Improvement (PI)* criterion. This approach could be used, for instance, to terminate
# unpromising campaigns early and refine their search spaces, or to end an ongoing
# optimization if the found results are sufficiently good.
# The underlying use case is taken from the example shown
# [here](/examples/Backtesting/full_lookup).
### Imports
import math
import os
import warnings
```


```python
import pandas as pd
import seaborn as sns
import torch
```


```python
from baybe import Campaign
from baybe.acquisition import ProbabilityOfImprovement
from baybe.exceptions import UnusedObjectWarning
from baybe.objectives import SingleTargetObjective
from baybe.objectives.base import Objective
from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter
from baybe.recommenders import (
    BotorchRecommender,
    RandomRecommender,
    TwoPhaseMetaRecommender,
)
from baybe.searchspace import SearchSpace, SearchSpaceType
from baybe.simulation import simulate_scenarios
from baybe.targets import NumericalTarget
from baybe.utils import register_hooks
from baybe.utils.dataframe import to_tensor
from baybe.utils.plotting import create_example_plots
```


```python
### Temporary
warnings.filterwarnings(
    "ignore", category=UnusedObjectWarning, message="explicit objective"
)
warnings.filterwarnings("ignore", category=DeprecationWarning)
```

## Settings

Let's start by defining some basic settings required for the example:


```python
SMOKE_TEST = "SMOKE_TEST" in os.environ
N_DOE_ITERATIONS = 2 if SMOKE_TEST else 25
N_MC_ITERATIONS = 2 if SMOKE_TEST else 20
N_INTERRUPTED_CAMPAIGNS = 2 if SMOKE_TEST else 5
BATCH_SIZE = 1
RANDOM_SEED = 1337
```

## Problem Definition and Lookup Functionality

Following the setup described [here](../Backtesting/full_lookup.md), we create the
building blocks for the optimization problem:


```python
dict_solvent = {
    "DMAc": r"CC(N(C)C)=O",
    "Butyornitrile": r"CCCC#N",
    "Butyl Ester": r"CCCCOC(C)=O",
    "p-Xylene": r"CC1=CC=C(C)C=C1",
}
dict_base = {
    "Potassium acetate": r"O=C([O-])C.[K+]",
    "Potassium pivalate": r"O=C([O-])C(C)(C)C.[K+]",
    "Cesium acetate": r"O=C([O-])C.[Cs+]",
    "Cesium pivalate": r"O=C([O-])C(C)(C)C.[Cs+]",
}
dict_ligand = {
    "BrettPhos": r"CC(C)C1=CC(C(C)C)=C(C(C(C)C)=C1)C2=C(P(C3CCCCC3)C4CCCCC4)C(OC)="
    "CC=C2OC",
    "Di-tert-butylphenylphosphine": r"CC(C)(C)P(C1=CC=CC=C1)C(C)(C)C",
    "(t-Bu)PhCPhos": r"CN(C)C1=CC=CC(N(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C3=CC=CC=C3",
    "Tricyclohexylphosphine": r"P(C1CCCCC1)(C2CCCCC2)C3CCCCC3",
    "PPh3": r"P(C1=CC=CC=C1)(C2=CC=CC=C2)C3=CC=CC=C3",
    "XPhos": r"CC(C1=C(C2=CC=CC=C2P(C3CCCCC3)C4CCCCC4)C(C(C)C)=CC(C(C)C)=C1)C",
    "P(2-furyl)3": r"P(C1=CC=CO1)(C2=CC=CO2)C3=CC=CO3",
    "Methyldiphenylphosphine": r"CP(C1=CC=CC=C1)C2=CC=CC=C2",
    "1268824-69-6": r"CC(OC1=C(P(C2CCCCC2)C3CCCCC3)C(OC(C)C)=CC=C1)C",
    "JackiePhos": r"FC(F)(F)C1=CC(P(C2=C(C3=C(C(C)C)C=C(C(C)C)C=C3C(C)C)C(OC)=CC=C2OC)"
    r"C4=CC(C(F)(F)F)=CC(C(F)(F)F)=C4)=CC(C(F)(F)F)=C1",
    "SCHEMBL15068049": r"C[C@]1(O2)O[C@](C[C@]2(C)P3C4=CC=CC=C4)(C)O[C@]3(C)C1",
    "Me2PPh": r"CP(C)C1=CC=CC=C1",
}
```


```python
parameters = [
    SubstanceParameter(name="Solvent", data=dict_solvent, encoding="MORDRED"),
    SubstanceParameter(name="Base", data=dict_base, encoding="MORDRED"),
    SubstanceParameter(name="Ligand", data=dict_ligand, encoding="MORDRED"),
    NumericalDiscreteParameter(name="Temp_C", values=[90, 105, 120], tolerance=2),
    NumericalDiscreteParameter(name="Concentration", values=[0.057, 0.1, 0.153]),
]
```


```python
searchspace = SearchSpace.from_product(parameters=parameters)
```


```python
objective = SingleTargetObjective(target=NumericalTarget(name="yield", mode="MAX"))
```


```python
recommender = TwoPhaseMetaRecommender(
    initial_recommender=RandomRecommender(), recommender=BotorchRecommender()
)
```

Also, we load the dataframe containing the lookup data for the closed-loop simulation:


```python
try:
    lookup = pd.read_excel("./../Backtesting/lookup.xlsx")
except FileNotFoundError:
    lookup = pd.read_excel("examples/Backtesting/lookup.xlsx")
```

## Simulating the Uninterrupted Campaigns

First, we run several Monte Carlo repetitions of the uninterrupted campaign to get a
feeling for the average trajectory. For reproducibility, we also fix the random seed:


```python
campaign = Campaign(searchspace, objective, recommender)
results_uninterrupted = simulate_scenarios(
    {"Average uninterrupted": campaign},
    lookup,
    batch_size=BATCH_SIZE,
    n_doe_iterations=N_DOE_ITERATIONS,
    n_mc_iterations=N_MC_ITERATIONS,
    random_seed=RANDOM_SEED,
)
```

    
## Defining the Campaign-Stopping Hook

In order to interrupt a running campaign, we define a custom exception to identify our
stopping event:


```python
class CampaignStoppedException(Exception):
    """The campaign should be stopped."""
```

Based on this exception class, we can now define a hook implementing the stopping
criterion. For this purpose, we count the fraction of candidates with a PI exceeding a
given value and terminate the campaign once the fraction falls below a certain
threshold.


```python
PI_THRESHOLD = 0.01  # PI of 1% to identify promising points
PI_REQUIRED_FRACTION = 0.2  # 20% of candidates must be above the threshold
```


```python
def stop_on_PI(
    self: BotorchRecommender,
    searchspace: SearchSpace,
    objective: Objective | None = None,
    measurements: pd.DataFrame | None = None,
) -> None:
    """Raise an exception if the PI-based stopping criterion is fulfilled."""
    if searchspace.type != SearchSpaceType.DISCRETE:
        raise TypeError(
            f"Search spaces of type '{searchspace.type}' are not supported. "
            f"Currently, only search spaces of type '{SearchSpaceType.DISCRETE}' are "
            f"accepted."
        )
    train_x = searchspace.transform(measurements, allow_extra=True)
    train_y = objective.transform(measurements)
    acqf = ProbabilityOfImprovement()

    botorch_acqf = acqf.to_botorch(self.surrogate_model, searchspace, train_x, train_y)
    _, candidates_comp_rep = searchspace.discrete.get_candidates(
        allow_repeated_recommendations=self.allow_repeated_recommendations,
        allow_recommending_already_measured=self.allow_recommending_already_measured,
    )
    comp_rep_tensor = to_tensor(candidates_comp_rep).unsqueeze(1)
    acqf_values = botorch_acqf(comp_rep_tensor)

    n_pis_over = torch.sum(acqf_values > PI_THRESHOLD)
    n_pis_over_required = math.ceil(len(candidates_comp_rep) * PI_REQUIRED_FRACTION)
    if n_pis_over < n_pis_over_required:
        raise CampaignStoppedException(
            f"Less than {PI_REQUIRED_FRACTION*100:.0f}% of candidates are above the PI "
            f"threshold of {PI_THRESHOLD*100:.0f}% - Stopping the campaign."
        )
```

Now, we attach the hook to the ``recommend`` function of our recommender class:


```python
BotorchRecommender.recommend = register_hooks(
    BotorchRecommender.recommend, post_hooks=[stop_on_PI]
)
```

```{admonition} Monkeypatching
:class: note
The above monkeypatch registers the hook with all future instances of the recommender
class. While it is possible to attach the hook only to a specific instance via
``MethodType`` (see [here](./basics.md)), this approach does not work well with the
simulation utilities because they internally create deep copies of the simulated
campaign, effectively bypassing the patch.
```

## Simulating the Interrupted Campaigns

With the hook attached to the class, we again run several Monte Carlo repetitions of
the same campaign. For this purpose, we instantiate a new recommender with the active
hook and assign it to a fresh copy of the campaign:


```python
recommender_with_hook = TwoPhaseMetaRecommender(
    initial_recommender=RandomRecommender(), recommender=BotorchRecommender()
)
campaign_with_hook = Campaign(searchspace, objective, recommender)
```

Now, we can simply re-trigger the simulation loop. In order to establish a 1:1
comparison, we use the same random seed as before so that the initial states of all
trajectories are aligned with the previous runs:


```python
results_interrupted = simulate_scenarios(
    {"Interrupted": campaign_with_hook},
    lookup,
    batch_size=BATCH_SIZE,
    n_doe_iterations=N_DOE_ITERATIONS,
    n_mc_iterations=N_INTERRUPTED_CAMPAIGNS,
    random_seed=RANDOM_SEED,
)
```

    
```{note}
If an exception is thrown inside the loop, the function still returns the partial
trajectory, which effectively implements early stopping.
```

## Plotting the Results

Finally, we plot both the interrupted and the uninterrupted results.
To display the latter in terms of individual trajectories, we can leverage the column
that keeps track of the Monte Carlo iterations:


```python
results_interrupted = results_interrupted.drop("Scenario", axis=1)
results_interrupted["Scenario"] = results_interrupted["Monte_Carlo_Run"].apply(
    lambda k: f"PI-stopped, run {k}"
)
```

Now, we can easily create the plot from a single combined dataframe:


```python
results = pd.concat([results_uninterrupted, results_interrupted])
ax = sns.lineplot(
    data=results,
    marker="o",
    markersize=10,
    x="Num_Experiments",
    y="yield_CumBest",
    hue="Scenario",
)
for line in ax.get_lines()[1:]:
    line.set_dashes([5, 2])
create_example_plots(ax=ax, base_name="campaign_stopping")
```


```{image} campaign_stopping_light.svg
:align: center
:class: only-light
```
```{image} campaign_stopping_dark.svg
:align: center
:class: only-dark
```