Skip to content

from cluster_experiments.power_config import *

PowerConfig dataclass

Dataclass to create a power analysis from.

Parameters:

Name Type Description Default
splitter str

Splitter object to use

required
perturbator str

Perturbator object to use, defaults to "" for normal power analysis

''
analysis str

ExperimentAnalysis object to use

required
washover str

Washover object to use, defaults to ""

''
cupac_model str

CUPAC model to use

''
n_simulations int

number of simulations to run

100
cluster_cols Optional[List[str]]

list of columns to use as clusters

None
target_col str

column to use as target

'target'
treatment_col str

column to use as treatment

'treatment'
treatment str

what value of treatment_col should be considered as treatment

'B'
control str

what value of treatment_col should be considered as control

'A'
strata_cols Optional[List[str]]

columns to stratify with

None
splitter_weights Optional[List[float]]

weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments

None
switch_frequency Optional[str]

how often to switch treatments

None
time_col Optional[str]

column to use as time in switchback splitter

None
washover_time_delta Optional[Union[timedelta, int]]

optional, int indicating the washover time in minutes or datetime.timedelta object

None
covariates Optional[List[str]]

list of columns to use as covariates

None
average_effect Optional[float]

average effect to use in the perturbator

None
scale Optional[float]

scale to use in stochastic perturbators

None
range_min Optional[float]

minimum value of the target range for relative beta perturbator, must be >-1

None
range_max Optional[float]

maximum value of the target range for relative beta perturbator

None
reduce_variance Optional[bool]

whether to reduce variance in the BetaRelative perturbator

None
segment_cols Optional[List[str]]

list of segmentation columns for segmented perturbator

None
treatments Optional[List[str]]

list of treatments to use

None
alpha float

alpha value to use in the power analysis

0.05
agg_col str

column to use for aggregation in the CUPAC model

''
smoothing_factor float

smoothing value to use in the CUPAC model

20
features_cupac_model Optional[List[str]]

list of features to use in the CUPAC model

None
seed Optional[int]

seed to make the power analysis reproducible

None

Usage:

from cluster_experiments.power_config import PowerConfig
from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis

p = PowerConfig(
    analysis="gee",
    splitter="clustered_balance",
    perturbator="constant",
    cluster_cols=["city"],
    n_simulations=100,
    alpha=0.05,
)
power_analysis = PowerAnalysis.from_config(p)

normal_power_analysis = NormalPowerAnalysis.from_config(p)
Source code in cluster_experiments/power_config.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
@dataclass(eq=True)
class PowerConfig:
    """
    Dataclass to create a power analysis from.

    Arguments:
        splitter: Splitter object to use
        perturbator: Perturbator object to use, defaults to "" for normal power analysis
        analysis: ExperimentAnalysis object to use
        washover: Washover object to use, defaults to ""
        cupac_model: CUPAC model to use
        n_simulations: number of simulations to run
        cluster_cols: list of columns to use as clusters
        target_col: column to use as target
        treatment_col: column to use as treatment
        treatment: what value of treatment_col should be considered as treatment
        control: what value of treatment_col should be considered as control
        strata_cols: columns to stratify with
        splitter_weights: weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments
        switch_frequency: how often to switch treatments
        time_col: column to use as time in switchback splitter
        washover_time_delta: optional, int indicating the washover time in minutes or datetime.timedelta object
        covariates: list of columns to use as covariates
        average_effect: average effect to use in the perturbator
        scale: scale to use in stochastic perturbators
        range_min: minimum value of the target range for relative beta perturbator, must be >-1
        range_max: maximum value of the target range for relative beta perturbator
        reduce_variance: whether to reduce variance in the BetaRelative perturbator
        segment_cols: list of segmentation columns for segmented perturbator
        treatments: list of treatments to use
        alpha: alpha value to use in the power analysis
        agg_col: column to use for aggregation in the CUPAC model
        smoothing_factor: smoothing value to use in the CUPAC model
        features_cupac_model: list of features to use in the CUPAC model
        seed: seed to make the power analysis reproducible

    Usage:

    ```python
    from cluster_experiments.power_config import PowerConfig
    from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis

    p = PowerConfig(
        analysis="gee",
        splitter="clustered_balance",
        perturbator="constant",
        cluster_cols=["city"],
        n_simulations=100,
        alpha=0.05,
    )
    power_analysis = PowerAnalysis.from_config(p)

    normal_power_analysis = NormalPowerAnalysis.from_config(p)
    ```
    """

    # mappings
    splitter: str
    analysis: str
    perturbator: str = ""
    washover: str = ""

    # Needed
    cluster_cols: Optional[List[str]] = None

    # optional mappings
    cupac_model: str = ""
    scale_col: Optional[str] = None

    # Shared
    target_col: str = "target"
    treatment_col: str = "treatment"
    treatment: str = "B"

    # Perturbator
    average_effect: Optional[float] = None
    scale: Optional[float] = None
    range_min: Optional[float] = None
    range_max: Optional[float] = None
    reduce_variance: Optional[bool] = None
    segment_cols: Optional[List[str]] = None

    # Splitter
    treatments: Optional[List[str]] = None
    strata_cols: Optional[List[str]] = None
    splitter_weights: Optional[List[float]] = None
    switch_frequency: Optional[str] = None
    # Switchback
    time_col: Optional[str] = None
    washover_time_delta: Optional[Union[datetime.timedelta, int]] = None

    # Analysis
    covariates: Optional[List[str]] = None
    hypothesis: str = "two-sided"
    cov_type: Optional[
        Literal[
            "nonrobust",
            "fixed scale",
            "HC0",
            "HC1",
            "HC2",
            "HC3",
            "HAC",
            "hac-panel",
            "hac-groupsum",
            "cluster",
        ]
    ] = None
    add_covariate_interaction: bool = False

    # Power analysis
    n_simulations: int = 100
    alpha: float = 0.05
    control: str = "A"

    # Cupac
    agg_col: str = ""
    smoothing_factor: float = 20
    features_cupac_model: Optional[List[str]] = None

    seed: Optional[int] = None

    def __post_init__(self):
        if "switchback" not in self.splitter:
            if self._are_different(self.switch_frequency, None):
                self._set_and_log("switch_frequency", None, "splitter")
            if self._are_different(self.washover_time_delta, None):
                self._set_and_log("washover_time_delta", None, "splitter")
            if self._are_different(self.washover, ""):
                self._set_and_log("washover", "", "splitter")
            # an exception is made when we have no perturbator (normal power analysis)
            if self._are_different(self.time_col, None) and self.perturbator != "":
                self._set_and_log("time_col", None, "splitter")

        if self.perturbator not in {"normal", "beta_relative_positive"}:
            if self._are_different(self.scale, None):
                self._set_and_log("scale", None, "perturbator")

        if self.perturbator not in {"beta_relative", "segmented_beta_relative"}:
            if self._are_different(self.range_min, None):
                self._set_and_log("range_min", None, "perturbator")
            if self._are_different(self.range_max, None):
                self._set_and_log("range_max", None, "perturbator")
            if self._are_different(self.reduce_variance, None):
                self._set_and_log("reduce_variance", None, "perturbator")

        if self.perturbator not in {"segmented_beta_relative"}:
            if self._are_different(self.segment_cols, None):
                self._set_and_log("segment_cols", None, "perturbator")

        if "stratified" not in self.splitter and "paired_ttest" not in self.analysis:
            if self._are_different(self.strata_cols, None):
                self._set_and_log("strata_cols", None, "splitter")

        if "stratified" in self.splitter or "balanced" in self.splitter:
            if self._are_different(self.splitter_weights, None):
                self._set_and_log("splitter_weights", None, "splitter")

        if self.cupac_model != "mean_cupac_model":
            if self._are_different(self.agg_col, ""):
                self._set_and_log("agg_col", "", "cupac_model")
            if self._are_different(self.smoothing_factor, 20):
                self._set_and_log("smoothing_factor", 20, "cupac_model")
        # for now, features_cupac_model are not used
        if self._are_different(self.features_cupac_model, None):
            self._set_and_log("features_cupac_model", None, "cupac_model")

        if "ttest" in self.analysis:
            if self._are_different(self.covariates, None):
                self._set_and_log("covariates", None, "analysis")

        if "segmented" in self.perturbator:
            self._raise_error_if_missing("segment_cols", "perturbator")

        if "delta" not in self.analysis:
            if self.scale_col is not None:
                self._raise_error_if_missing("scale_col", "analysis")

    def _are_different(self, arg1, arg2) -> bool:
        return arg1 != arg2

    def _set_and_log(self, attr, value, other_attr):
        logging.warning(
            f"{attr} = {getattr(self, attr)} has no effect with "
            f"{other_attr} = {getattr(self, other_attr)}. "
            f"Overriding {attr} to {value}."
        )
        setattr(self, attr, value)

    def _raise_error_if_missing(self, attr, other_attr):
        if getattr(self, attr) is None:
            raise MissingArgumentError(
                f"{attr} is required when using "
                f"{other_attr} = {getattr(self, other_attr)}."
            )

    def _raise_error_if_present(self, attr, other_attr):
        if getattr(self, attr) is None:
            raise UnexpectedArgumentError(
                f"{attr} is not expected when using "
                f"{other_attr} = {getattr(self, other_attr)}."
            )