emdgroup · Scienfitz · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
@@ -36,6 +36,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `SubstanceParameter`, `CustomDisreteParameter` and `CategoricalParameter` now also 
   support restricting the search space via `active_values`, while `values` continue to 
   identify allowed measurement inputs
+- `Campaign.posterior_stats` and `Surrogate.posterior_stats` as convenience methods for
+  providing statistical measures about the target predictions of a given set of
+  candidates
 
 ### Changed
 - Acquisition function indicator `is_mc` has been removed in favor of new indicators 

@@ -4,7 +4,7 @@
 
 import gc
 import json
-from collections.abc import Callable, Collection
+from collections.abc import Callable, Collection, Sequence
 from functools import reduce
 from typing import TYPE_CHECKING, Any
 
@@ -33,7 +33,7 @@
     validate_searchspace_from_config,
 )
 from baybe.serialization import SerialMixin, converter
-from baybe.surrogates.base import SurrogateProtocol
+from baybe.surrogates.base import PosteriorStatistic, SurrogateProtocol
 from baybe.targets.base import Target
 from baybe.telemetry import (
     TELEM_LABELS,
@@ -506,12 +506,14 @@ def recommend(
 
         return rec
 
-    def posterior(self, candidates: pd.DataFrame) -> Posterior:
+    def posterior(self, candidates: pd.DataFrame | None = None) -> Posterior:
         """Get the posterior predictive distribution for the given candidates.
 
         Args:
-            candidates: The candidate points in experimental recommendations.
-                For details, see :meth:`baybe.surrogates.base.Surrogate.posterior`.
+            candidates: The candidate points in experimental recommendations. If not
+                provided, the posterior for the existing campaign measurements is
+                returned. For details, see
+                :meth:`baybe.surrogates.base.Surrogate.posterior`.
 
         Raises:
             IncompatibilityError: If the underlying surrogate model exposes no
@@ -521,17 +523,52 @@ def posterior(self, candidates: pd.DataFrame) -> Posterior:
             Posterior: The corresponding posterior object.
             For details, see :meth:`baybe.surrogates.base.Surrogate.posterior`.
         """
+        if candidates is None:
+            candidates = self.measurements[[p.name for p in self.parameters]]
+
         surrogate = self.get_surrogate()
         if not hasattr(surrogate, method_name := "posterior"):
             raise IncompatibilityError(
                 f"The used surrogate type '{surrogate.__class__.__name__}' does not "
                 f"provide a '{method_name}' method."
             )
 
-        import torch
+        return surrogate.posterior(candidates)
+
+    def posterior_stats(
+        self,
+        candidates: pd.DataFrame | None = None,
+        stats: Sequence[PosteriorStatistic] = ("mean", "std"),
+    ) -> pd.DataFrame:
+        """Return posterior statistics for each target.
+
+        Args:
+            candidates: The candidate points in experimental representation. If not
+                provided, the statistics of the existing campaign measurements are
+                calculated. For details, see
+                :meth:`baybe.surrogates.base.Surrogate.posterior_stats`.
+            stats: Sequence indicating which statistics to compute. Also accepts
+                floats, for which the corresponding quantile point will be computed.
+
+        Raises:
+            ValueError: If a requested quantile is outside the open interval (0,1).
+            TypeError: If the posterior utilized by the surrogate does not support
+                a requested statistic.
+
+        Returns:
+            A dataframe with posterior statistics for each target and candidate.
+        """
+        if candidates is None:
+            candidates = self.measurements[[p.name for p in self.parameters]]
+
+        surrogate = self.get_surrogate()
+        if not hasattr(surrogate, method_name := "posterior_stats"):
+            raise IncompatibilityError(
+                f"The used surrogate type '{surrogate.__class__.__name__}' does not "
+                f"provide a '{method_name}' method."
+            )
 
-        with torch.no_grad():
-            return surrogate.posterior(candidates)
+        return surrogate.posterior_stats(candidates, stats)
 
     def get_surrogate(
         self,

@@ -71,7 +71,7 @@ class DiscreteSumConstraint(DiscreteConstraint):
 
     # class variables
     numerical_only: ClassVar[bool] = True
-    # see base class.
+    # See base class.
 
     # object variables
     condition: ThresholdCondition = field()
@@ -99,7 +99,7 @@ class DiscreteProductConstraint(DiscreteConstraint):
 
     # class variables
     numerical_only: ClassVar[bool] = True
-    # see base class.
+    # See base class.
 
     # object variables
     condition: ThresholdCondition = field()

@@ -4,8 +4,9 @@
 
 import gc
 from abc import ABC, abstractmethod
+from collections.abc import Sequence
 from enum import Enum, auto
-from typing import TYPE_CHECKING, ClassVar, Protocol
+from typing import TYPE_CHECKING, ClassVar, Literal, Protocol, TypeAlias
 
 import cattrs
 import pandas as pd
@@ -21,6 +22,7 @@
 from typing_extensions import override
 
 from baybe.exceptions import IncompatibleSurrogateError, ModelNotTrainedError
+from baybe.objectives import DesirabilityObjective
 from baybe.objectives.base import Objective
 from baybe.parameters.base import Parameter
 from baybe.searchspace import SearchSpace
@@ -43,6 +45,11 @@
 
     from baybe.surrogates.composite import CompositeSurrogate
 
+PosteriorStatistic: TypeAlias = float | Literal["mean", "std", "var", "mode"]
+"""Type alias for requestable posterior statistics.
+
+A float will result in the corresponding quantile points."""
+
 _ONNX_ENCODING = "latin-1"
 """Constant signifying the encoding for onnx byte strings in pretrained models.
 
@@ -218,7 +225,7 @@ def _make_output_scaler(
 
         return scaler
 
-    def posterior(self, candidates: pd.DataFrame, /) -> Posterior:
+    def posterior(self, candidates: pd.DataFrame) -> Posterior:
         """Compute the posterior for candidates in experimental representation.
 
         Takes a dataframe of parameter configurations in **experimental representation**
@@ -306,6 +313,86 @@ def _posterior(self, candidates_comp_scaled: Tensor, /) -> Posterior:
             obtained via :meth:`baybe.surrogates.base.Surrogate._make_output_scaler`.
         """
 
+    def posterior_stats(
+        self,
+        candidates: pd.DataFrame,
+        stats: Sequence[PosteriorStatistic] = ("mean", "std"),
+    ) -> pd.DataFrame:
+        """Return posterior statistics for each target.
+
+        Args:
+            candidates: The candidate points in experimental representation.
+                For details, see :meth:`baybe.surrogates.base.Surrogate.posterior`.
+            stats: Sequence indicating which statistics to compute. Also accepts
+                floats, for which the corresponding quantile point will be computed.
+
+        Raises:
+            ModelNotTrainedError: When called before the model has been trained.
+            ValueError: If a requested quantile is outside the open interval (0,1).
+            TypeError: If the posterior utilized by the surrogate does not support
+                a requested statistic.
+
+        Returns:
+            A dataframe with posterior statistics for each target and candidate.
+        """
+        if self._objective is None:
+            raise ModelNotTrainedError(
+                "The surrogate must be trained before a posterior can be computed."
+            )
+
+        stat: PosteriorStatistic
+        for stat in (x for x in stats if isinstance(x, float)):
+            if not 0.0 < stat < 1.0:
+                raise ValueError(
+                    f"Posterior quantile statistics can only be computed for quantiles "
+                    f"between 0 and 1 (non-inclusive). Provided value: '{stat}' as "
+                    f"part of '{stats=}'."
+                )
+        posterior = self.posterior(candidates)
+
+        match self._objective:
+            case DesirabilityObjective():
+                # TODO: Once desirability also supports posterior transforms this check
+                #  here will have to depend on the configuration of the objective and
+                #  whether it uses the transforms or not.
+                targets = ["Desirability"]
+            case _:
+                targets = [t.name for t in self._objective.targets]
+
+        import torch
+
+        result = pd.DataFrame(index=candidates.index)
+        with torch.no_grad():
+            for k, target_name in enumerate(targets):
+                for stat in stats:
+                    try:
+                        if isinstance(stat, float):  # Calculate quantile statistic
+                            stat_name = f"Q_{stat}"
+                            vals = posterior.quantile(torch.tensor(stat))
+                        else:  # Calculate non-quantile statistic
+                            stat_name = stat
+                            vals = getattr(
+                                posterior,
+                                stat if stat not in ["std", "var"] else "variance",
+                            )
+                    except (AttributeError, NotImplementedError) as e:
+                        # We could arrive here because an invalid statistics string has
+                        # been requested or because a quantile point has been requested,
+                        # but the posterior type does not implement quantiles.
+                        raise TypeError(
+                            f"The utilized posterior of type "
+                            f"'{posterior.__class__.__name__}' does not support the "
+                            f"statistic associated with the requested input '{stat}'."
+                        ) from e
+
+                    if stat == "std":
+                        vals = torch.sqrt(vals)
+
+                    numpyvals = vals.cpu().numpy().reshape((len(result), len(targets)))
+                    result[f"{target_name}_{stat_name}"] = numpyvals[:, k]
+
+        return result
+
     @override
     def fit(
         self,

@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from collections.abc import Sequence
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar
 
@@ -14,7 +15,7 @@
 from baybe.searchspace.core import SearchSpace
 from baybe.serialization import converter
 from baybe.serialization.mixin import SerialMixin
-from baybe.surrogates.base import SurrogateProtocol
+from baybe.surrogates.base import PosteriorStatistic, SurrogateProtocol
 from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate
 from baybe.utils.basic import is_all_instance
 
@@ -113,7 +114,7 @@ def to_botorch(self) -> ModelList:
         )
         return cls(*(s.to_botorch() for s in self._surrogates_flat))
 
-    def posterior(self, candidates: pd.DataFrame, /) -> PosteriorList:
+    def posterior(self, candidates: pd.DataFrame) -> PosteriorList:
         """Compute the posterior for candidates in experimental representation.
 
         The (independent joint) posterior is represented as a collection of individual
@@ -133,6 +134,21 @@ def posterior(self, candidates: pd.DataFrame, /) -> PosteriorList:
         posteriors = [s.posterior(candidates) for s in self._surrogates_flat]  # type: ignore[attr-defined]
         return PosteriorList(*posteriors)
 
+    def posterior_stats(
+        self,
+        candidates: pd.DataFrame,
+        stats: Sequence[PosteriorStatistic] = ("mean", "std"),
+    ) -> pd.DataFrame:
+        """See :meth:`baybe.surrogates.base.Surrogate.posterior_stats`."""
+        if not all(hasattr(s, "posterior_stats") for s in self._surrogates_flat):
+            raise IncompatibleSurrogateError(
+                "Posterior statistics can only be computed if all involved surrogates "
+                "offer this computation."
+            )
+
+        dfs = [s.posterior_stats(candidates, stats) for s in self._surrogates_flat]  # type: ignore[attr-defined]
+        return pd.concat(dfs, axis=1)
+
 
 def _structure_surrogate_getter(obj: dict, _) -> _SurrogateGetter:
     """Resolve the object type."""