Skip to content

Cluster Experiments Docs

Power config

`from cluster_experiments.power_config import *`¶

`PowerConfig` `dataclass` ¶

Dataclass to create a power analysis from.

Parameters:

Name	Type	Description	Default
`splitter`	`str`	Splitter object to use	required
`perturbator`	`str`	Perturbator object to use, defaults to "" for normal power analysis	`''`
`analysis`	`str`	ExperimentAnalysis object to use	required
`washover`	`str`	Washover object to use, defaults to ""	`''`
`cupac_model`	`str`	CUPAC model to use	`''`
`n_simulations`	`int`	number of simulations to run	`100`
`cluster_cols`	`Optional[List[str]]`	list of columns to use as clusters	`None`
`target_col`	`str`	column to use as target	`'target'`
`treatment_col`	`str`	column to use as treatment	`'treatment'`
`treatment`	`str`	what value of treatment_col should be considered as treatment	`'B'`
`control`	`str`	what value of treatment_col should be considered as control	`'A'`
`strata_cols`	`Optional[List[str]]`	columns to stratify with	`None`
`splitter_weights`	`Optional[List[float]]`	weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments	`None`
`switch_frequency`	`Optional[str]`	how often to switch treatments	`None`
`time_col`	`Optional[str]`	column to use as time in switchback splitter	`None`
`washover_time_delta`	`Optional[Union[timedelta, int]]`	optional, int indicating the washover time in minutes or datetime.timedelta object	`None`
`covariates`	`Optional[List[str]]`	list of columns to use as covariates	`None`
`average_effect`	`Optional[float]`	average effect to use in the perturbator	`None`
`scale`	`Optional[float]`	scale to use in stochastic perturbators	`None`
`range_min`	`Optional[float]`	minimum value of the target range for relative beta perturbator, must be >-1	`None`
`range_max`	`Optional[float]`	maximum value of the target range for relative beta perturbator	`None`
`reduce_variance`	`Optional[bool]`	whether to reduce variance in the BetaRelative perturbator	`None`
`segment_cols`	`Optional[List[str]]`	list of segmentation columns for segmented perturbator	`None`
`treatments`	`Optional[List[str]]`	list of treatments to use	`None`
`alpha`	`float`	alpha value to use in the power analysis	`0.05`
`agg_col`	`str`	column to use for aggregation in the CUPAC model	`''`
`smoothing_factor`	`float`	smoothing value to use in the CUPAC model	`20`
`features_cupac_model`	`Optional[List[str]]`	list of features to use in the CUPAC model	`None`
`seed`	`Optional[int]`	seed to make the power analysis reproducible	`None`

Usage:

from cluster_experiments.power_config import PowerConfig
from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis

p = PowerConfig(
    analysis="gee",
    splitter="clustered_balance",
    perturbator="constant",
    cluster_cols=["city"],
    n_simulations=100,
    alpha=0.05,
)
power_analysis = PowerAnalysis.from_config(p)

normal_power_analysis = NormalPowerAnalysis.from_config(p)

Source code in cluster_experiments/power_config.py

@dataclass(eq=True)
class PowerConfig:
    """
    Dataclass to create a power analysis from.

    Arguments:
        splitter: Splitter object to use
        perturbator: Perturbator object to use, defaults to "" for normal power analysis
        analysis: ExperimentAnalysis object to use
        washover: Washover object to use, defaults to ""
        cupac_model: CUPAC model to use
        n_simulations: number of simulations to run
        cluster_cols: list of columns to use as clusters
        target_col: column to use as target
        treatment_col: column to use as treatment
        treatment: what value of treatment_col should be considered as treatment
        control: what value of treatment_col should be considered as control
        strata_cols: columns to stratify with
        splitter_weights: weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments
        switch_frequency: how often to switch treatments
        time_col: column to use as time in switchback splitter
        washover_time_delta: optional, int indicating the washover time in minutes or datetime.timedelta object
        covariates: list of columns to use as covariates
        average_effect: average effect to use in the perturbator
        scale: scale to use in stochastic perturbators
        range_min: minimum value of the target range for relative beta perturbator, must be >-1
        range_max: maximum value of the target range for relative beta perturbator
        reduce_variance: whether to reduce variance in the BetaRelative perturbator
        segment_cols: list of segmentation columns for segmented perturbator
        treatments: list of treatments to use
        alpha: alpha value to use in the power analysis
        agg_col: column to use for aggregation in the CUPAC model
        smoothing_factor: smoothing value to use in the CUPAC model
        features_cupac_model: list of features to use in the CUPAC model
        seed: seed to make the power analysis reproducible

    Usage:

    ```python
    from cluster_experiments.power_config import PowerConfig
    from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis

    p = PowerConfig(
        analysis="gee",
        splitter="clustered_balance",
        perturbator="constant",
        cluster_cols=["city"],
        n_simulations=100,
        alpha=0.05,
    )
    power_analysis = PowerAnalysis.from_config(p)

    normal_power_analysis = NormalPowerAnalysis.from_config(p)
    ```
    """

    # mappings
    splitter: str
    analysis: str
    perturbator: str = ""
    washover: str = ""

    # Needed
    cluster_cols: Optional[List[str]] = None

    # optional mappings
    cupac_model: str = ""
    scale_col: Optional[str] = None

    # Shared
    target_col: str = "target"
    treatment_col: str = "treatment"
    treatment: str = "B"

    # Perturbator
    average_effect: Optional[float] = None
    scale: Optional[float] = None
    range_min: Optional[float] = None
    range_max: Optional[float] = None
    reduce_variance: Optional[bool] = None
    segment_cols: Optional[List[str]] = None

    # Splitter
    treatments: Optional[List[str]] = None
    strata_cols: Optional[List[str]] = None
    splitter_weights: Optional[List[float]] = None
    switch_frequency: Optional[str] = None
    # Switchback
    time_col: Optional[str] = None
    washover_time_delta: Optional[Union[datetime.timedelta, int]] = None

    # Analysis
    covariates: Optional[List[str]] = None
    hypothesis: str = "two-sided"
    cov_type: Optional[
        Literal[
            "nonrobust",
            "fixed scale",
            "HC0",
            "HC1",
            "HC2",
            "HC3",
            "HAC",
            "hac-panel",
            "hac-groupsum",
            "cluster",
        ]
    ] = None
    add_covariate_interaction: bool = False

    # Power analysis
    n_simulations: int = 100
    alpha: float = 0.05
    control: str = "A"

    # Cupac
    agg_col: str = ""
    smoothing_factor: float = 20
    features_cupac_model: Optional[List[str]] = None

    seed: Optional[int] = None

    def __post_init__(self):
        if "switchback" not in self.splitter:
            if self._are_different(self.switch_frequency, None):
                self._set_and_log("switch_frequency", None, "splitter")
            if self._are_different(self.washover_time_delta, None):
                self._set_and_log("washover_time_delta", None, "splitter")
            if self._are_different(self.washover, ""):
                self._set_and_log("washover", "", "splitter")
            # an exception is made when we have no perturbator (normal power analysis)
            if self._are_different(self.time_col, None) and self.perturbator != "":
                self._set_and_log("time_col", None, "splitter")

        if self.perturbator not in {"normal", "beta_relative_positive"}:
            if self._are_different(self.scale, None):
                self._set_and_log("scale", None, "perturbator")

        if self.perturbator not in {"beta_relative", "segmented_beta_relative"}:
            if self._are_different(self.range_min, None):
                self._set_and_log("range_min", None, "perturbator")
            if self._are_different(self.range_max, None):
                self._set_and_log("range_max", None, "perturbator")
            if self._are_different(self.reduce_variance, None):
                self._set_and_log("reduce_variance", None, "perturbator")

        if self.perturbator not in {"segmented_beta_relative"}:
            if self._are_different(self.segment_cols, None):
                self._set_and_log("segment_cols", None, "perturbator")

        if "stratified" not in self.splitter and "paired_ttest" not in self.analysis:
            if self._are_different(self.strata_cols, None):
                self._set_and_log("strata_cols", None, "splitter")

        if "stratified" in self.splitter or "balanced" in self.splitter:
            if self._are_different(self.splitter_weights, None):
                self._set_and_log("splitter_weights", None, "splitter")

        if self.cupac_model != "mean_cupac_model":
            if self._are_different(self.agg_col, ""):
                self._set_and_log("agg_col", "", "cupac_model")
            if self._are_different(self.smoothing_factor, 20):
                self._set_and_log("smoothing_factor", 20, "cupac_model")
        # for now, features_cupac_model are not used
        if self._are_different(self.features_cupac_model, None):
            self._set_and_log("features_cupac_model", None, "cupac_model")

        if "ttest" in self.analysis:
            if self._are_different(self.covariates, None):
                self._set_and_log("covariates", None, "analysis")

        if "segmented" in self.perturbator:
            self._raise_error_if_missing("segment_cols", "perturbator")

        if "delta" not in self.analysis:
            if self.scale_col is not None:
                self._raise_error_if_missing("scale_col", "analysis")

    def _are_different(self, arg1, arg2) -> bool:
        return arg1 != arg2

    def _set_and_log(self, attr, value, other_attr):
        logging.warning(
            f"{attr} = {getattr(self, attr)} has no effect with "
            f"{other_attr} = {getattr(self, other_attr)}. "
            f"Overriding {attr} to {value}."
        )
        setattr(self, attr, value)

    def _raise_error_if_missing(self, attr, other_attr):
        if getattr(self, attr) is None:
            raise MissingArgumentError(
                f"{attr} is required when using "
                f"{other_attr} = {getattr(self, other_attr)}."
            )

    def _raise_error_if_present(self, attr, other_attr):
        if getattr(self, attr) is None:
            raise UnexpectedArgumentError(
                f"{attr} is not expected when using "
                f"{other_attr} = {getattr(self, other_attr)}."
            )