Skip to content

from cluster_experiments.power_config import *

PowerConfig dataclass

Dataclass to create a power analysis from.

Parameters:

Name Type Description Default
splitter str

Splitter object to use

required
perturbator str

Perturbator object to use, defaults to "" for normal power analysis

''
analysis str

ExperimentAnalysis object to use

required
washover str

Washover object to use, defaults to ""

''
cupac_model str

CUPAC model to use

''
n_simulations int

number of simulations to run

100
cluster_cols Optional[List[str]]

list of columns to use as clusters

None
target_col str

column to use as target

'target'
treatment_col str

column to use as treatment

'treatment'
treatment str

what value of treatment_col should be considered as treatment

'B'
control str

what value of treatment_col should be considered as control

'A'
strata_cols Optional[List[str]]

columns to stratify with

None
splitter_weights Optional[List[float]]

weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments

None
switch_frequency Optional[str]

how often to switch treatments

None
time_col Optional[str]

column to use as time in switchback splitter

None
washover_time_delta Union[datetime.timedelta, int]

optional, int indicating the washover time in minutes or datetime.timedelta object

None
covariates Optional[List[str]]

list of columns to use as covariates

None
average_effect Optional[float]

average effect to use in the perturbator

None
scale Optional[float]

scale to use in stochastic perturbators

None
range_min Optional[float]

minimum value of the target range for relative beta perturbator, must be >-1

None
range_max Optional[float]

maximum value of the target range for relative beta perturbator

None
reduce_variance Optional[bool]

whether to reduce variance in the BetaRelative perturbator

None
segment_cols Optional[List[str]]

list of segmentation columns for segmented perturbator

None
treatments Optional[List[str]]

list of treatments to use

None
alpha float

alpha value to use in the power analysis

0.05
agg_col str

column to use for aggregation in the CUPAC model

''
smoothing_factor float

smoothing value to use in the CUPAC model

20
features_cupac_model Optional[List[str]]

list of features to use in the CUPAC model

None
seed Optional[int]

seed to make the power analysis reproducible

None

Usage:

from cluster_experiments.power_config import PowerConfig
from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis

p = PowerConfig(
    analysis="gee",
    splitter="clustered_balance",
    perturbator="constant",
    cluster_cols=["city"],
    n_simulations=100,
    alpha=0.05,
)
power_analysis = PowerAnalysis.from_config(p)

normal_power_analysis = NormalPowerAnalysis.from_config(p)
Source code in cluster_experiments/power_config.py
class PowerConfig:
    """
    Dataclass to create a power analysis from.

    Arguments:
        splitter: Splitter object to use
        perturbator: Perturbator object to use, defaults to "" for normal power analysis
        analysis: ExperimentAnalysis object to use
        washover: Washover object to use, defaults to ""
        cupac_model: CUPAC model to use
        n_simulations: number of simulations to run
        cluster_cols: list of columns to use as clusters
        target_col: column to use as target
        treatment_col: column to use as treatment
        treatment: what value of treatment_col should be considered as treatment
        control: what value of treatment_col should be considered as control
        strata_cols: columns to stratify with
        splitter_weights: weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments
        switch_frequency: how often to switch treatments
        time_col: column to use as time in switchback splitter
        washover_time_delta: optional, int indicating the washover time in minutes or datetime.timedelta object
        covariates: list of columns to use as covariates
        average_effect: average effect to use in the perturbator
        scale: scale to use in stochastic perturbators
        range_min: minimum value of the target range for relative beta perturbator, must be >-1
        range_max: maximum value of the target range for relative beta perturbator
        reduce_variance: whether to reduce variance in the BetaRelative perturbator
        segment_cols: list of segmentation columns for segmented perturbator
        treatments: list of treatments to use
        alpha: alpha value to use in the power analysis
        agg_col: column to use for aggregation in the CUPAC model
        smoothing_factor: smoothing value to use in the CUPAC model
        features_cupac_model: list of features to use in the CUPAC model
        seed: seed to make the power analysis reproducible

    Usage:

    ```python
    from cluster_experiments.power_config import PowerConfig
    from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis

    p = PowerConfig(
        analysis="gee",
        splitter="clustered_balance",
        perturbator="constant",
        cluster_cols=["city"],
        n_simulations=100,
        alpha=0.05,
    )
    power_analysis = PowerAnalysis.from_config(p)

    normal_power_analysis = NormalPowerAnalysis.from_config(p)
    ```
    """

    # mappings
    splitter: str
    analysis: str
    perturbator: str = ""
    washover: str = ""

    # Needed
    cluster_cols: Optional[List[str]] = None

    # optional mappings
    cupac_model: str = ""

    # Shared
    target_col: str = "target"
    treatment_col: str = "treatment"
    treatment: str = "B"

    # Perturbator
    average_effect: Optional[float] = None
    scale: Optional[float] = None
    range_min: Optional[float] = None
    range_max: Optional[float] = None
    reduce_variance: Optional[bool] = None
    segment_cols: Optional[List[str]] = None

    # Splitter
    treatments: Optional[List[str]] = None
    strata_cols: Optional[List[str]] = None
    splitter_weights: Optional[List[float]] = None
    switch_frequency: Optional[str] = None
    # Switchback
    time_col: Optional[str] = None
    washover_time_delta: Optional[Union[datetime.timedelta, int]] = None

    # Analysis
    covariates: Optional[List[str]] = None
    hypothesis: str = "two-sided"

    # Power analysis
    n_simulations: int = 100
    alpha: float = 0.05
    control: str = "A"

    # Cupac
    agg_col: str = ""
    smoothing_factor: float = 20
    features_cupac_model: Optional[List[str]] = None

    seed: Optional[int] = None

    def __post_init__(self):
        if "switchback" not in self.splitter:
            if self._are_different(self.switch_frequency, None):
                self._set_and_log("switch_frequency", None, "splitter")
            if self._are_different(self.washover_time_delta, None):
                self._set_and_log("washover_time_delta", None, "splitter")
            if self._are_different(self.washover, ""):
                self._set_and_log("washover", "", "splitter")
            # an exception is made when we have no perturbator (normal power analysis)
            if self._are_different(self.time_col, None) and self.perturbator != "":
                self._set_and_log("time_col", None, "splitter")

        if self.perturbator not in {"normal", "beta_relative_positive"}:
            if self._are_different(self.scale, None):
                self._set_and_log("scale", None, "perturbator")

        if self.perturbator not in {"beta_relative", "segmented_beta_relative"}:
            if self._are_different(self.range_min, None):
                self._set_and_log("range_min", None, "perturbator")
            if self._are_different(self.range_max, None):
                self._set_and_log("range_max", None, "perturbator")
            if self._are_different(self.reduce_variance, None):
                self._set_and_log("reduce_variance", None, "perturbator")

        if self.perturbator not in {"segmented_beta_relative"}:
            if self._are_different(self.segment_cols, None):
                self._set_and_log("segment_cols", None, "perturbator")

        if "stratified" not in self.splitter and "paired_ttest" not in self.analysis:
            if self._are_different(self.strata_cols, None):
                self._set_and_log("strata_cols", None, "splitter")

        if "stratified" in self.splitter or "balanced" in self.splitter:
            if self._are_different(self.splitter_weights, None):
                self._set_and_log("splitter_weights", None, "splitter")

        if self.cupac_model != "mean_cupac_model":
            if self._are_different(self.agg_col, ""):
                self._set_and_log("agg_col", "", "cupac_model")
            if self._are_different(self.smoothing_factor, 20):
                self._set_and_log("smoothing_factor", 20, "cupac_model")
        # for now, features_cupac_model are not used
        if self._are_different(self.features_cupac_model, None):
            self._set_and_log("features_cupac_model", None, "cupac_model")

        if "ttest" in self.analysis:
            if self._are_different(self.covariates, None):
                self._set_and_log("covariates", None, "analysis")

        if "segmented" in self.perturbator:
            self._raise_error_if_missing("segment_cols", "perturbator")

    def _are_different(self, arg1, arg2) -> bool:
        return arg1 != arg2

    def _set_and_log(self, attr, value, other_attr):
        logging.warning(
            f"{attr} = {getattr(self, attr)} has no effect with "
            f"{other_attr} = {getattr(self, other_attr)}. "
            f"Overriding {attr} to {value}."
        )
        setattr(self, attr, value)

    def _raise_error_if_missing(self, attr, other_attr):
        if getattr(self, attr) is None:
            raise MissingArgumentError(
                f"{attr} is required when using "
                f"{other_attr} = {getattr(self, other_attr)}."
            )