from cluster_experiments.power_config import *
¶
PowerConfig
dataclass
¶
Dataclass to create a power analysis from.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
splitter |
str |
Splitter object to use |
required |
perturbator |
str |
Perturbator object to use, defaults to "" for normal power analysis |
'' |
analysis |
str |
ExperimentAnalysis object to use |
required |
washover |
str |
Washover object to use, defaults to "" |
'' |
cupac_model |
str |
CUPAC model to use |
'' |
n_simulations |
int |
number of simulations to run |
100 |
cluster_cols |
Optional[List[str]] |
list of columns to use as clusters |
None |
target_col |
str |
column to use as target |
'target' |
treatment_col |
str |
column to use as treatment |
'treatment' |
treatment |
str |
what value of treatment_col should be considered as treatment |
'B' |
control |
str |
what value of treatment_col should be considered as control |
'A' |
strata_cols |
Optional[List[str]] |
columns to stratify with |
None |
splitter_weights |
Optional[List[float]] |
weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments |
None |
switch_frequency |
Optional[str] |
how often to switch treatments |
None |
time_col |
Optional[str] |
column to use as time in switchback splitter |
None |
washover_time_delta |
Union[datetime.timedelta, int] |
optional, int indicating the washover time in minutes or datetime.timedelta object |
None |
covariates |
Optional[List[str]] |
list of columns to use as covariates |
None |
average_effect |
Optional[float] |
average effect to use in the perturbator |
None |
scale |
Optional[float] |
scale to use in stochastic perturbators |
None |
range_min |
Optional[float] |
minimum value of the target range for relative beta perturbator, must be >-1 |
None |
range_max |
Optional[float] |
maximum value of the target range for relative beta perturbator |
None |
reduce_variance |
Optional[bool] |
whether to reduce variance in the BetaRelative perturbator |
None |
segment_cols |
Optional[List[str]] |
list of segmentation columns for segmented perturbator |
None |
treatments |
Optional[List[str]] |
list of treatments to use |
None |
alpha |
float |
alpha value to use in the power analysis |
0.05 |
agg_col |
str |
column to use for aggregation in the CUPAC model |
'' |
smoothing_factor |
float |
smoothing value to use in the CUPAC model |
20 |
features_cupac_model |
Optional[List[str]] |
list of features to use in the CUPAC model |
None |
seed |
Optional[int] |
seed to make the power analysis reproducible |
None |
Usage:
from cluster_experiments.power_config import PowerConfig
from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis
p = PowerConfig(
analysis="gee",
splitter="clustered_balance",
perturbator="constant",
cluster_cols=["city"],
n_simulations=100,
alpha=0.05,
)
power_analysis = PowerAnalysis.from_config(p)
normal_power_analysis = NormalPowerAnalysis.from_config(p)
Source code in cluster_experiments/power_config.py
class PowerConfig:
"""
Dataclass to create a power analysis from.
Arguments:
splitter: Splitter object to use
perturbator: Perturbator object to use, defaults to "" for normal power analysis
analysis: ExperimentAnalysis object to use
washover: Washover object to use, defaults to ""
cupac_model: CUPAC model to use
n_simulations: number of simulations to run
cluster_cols: list of columns to use as clusters
target_col: column to use as target
treatment_col: column to use as treatment
treatment: what value of treatment_col should be considered as treatment
control: what value of treatment_col should be considered as control
strata_cols: columns to stratify with
splitter_weights: weights to use for the splitter, should have the same length as treatments, each weight should correspond to an element in treatments
switch_frequency: how often to switch treatments
time_col: column to use as time in switchback splitter
washover_time_delta: optional, int indicating the washover time in minutes or datetime.timedelta object
covariates: list of columns to use as covariates
average_effect: average effect to use in the perturbator
scale: scale to use in stochastic perturbators
range_min: minimum value of the target range for relative beta perturbator, must be >-1
range_max: maximum value of the target range for relative beta perturbator
reduce_variance: whether to reduce variance in the BetaRelative perturbator
segment_cols: list of segmentation columns for segmented perturbator
treatments: list of treatments to use
alpha: alpha value to use in the power analysis
agg_col: column to use for aggregation in the CUPAC model
smoothing_factor: smoothing value to use in the CUPAC model
features_cupac_model: list of features to use in the CUPAC model
seed: seed to make the power analysis reproducible
Usage:
```python
from cluster_experiments.power_config import PowerConfig
from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis
p = PowerConfig(
analysis="gee",
splitter="clustered_balance",
perturbator="constant",
cluster_cols=["city"],
n_simulations=100,
alpha=0.05,
)
power_analysis = PowerAnalysis.from_config(p)
normal_power_analysis = NormalPowerAnalysis.from_config(p)
```
"""
# mappings
splitter: str
analysis: str
perturbator: str = ""
washover: str = ""
# Needed
cluster_cols: Optional[List[str]] = None
# optional mappings
cupac_model: str = ""
# Shared
target_col: str = "target"
treatment_col: str = "treatment"
treatment: str = "B"
# Perturbator
average_effect: Optional[float] = None
scale: Optional[float] = None
range_min: Optional[float] = None
range_max: Optional[float] = None
reduce_variance: Optional[bool] = None
segment_cols: Optional[List[str]] = None
# Splitter
treatments: Optional[List[str]] = None
strata_cols: Optional[List[str]] = None
splitter_weights: Optional[List[float]] = None
switch_frequency: Optional[str] = None
# Switchback
time_col: Optional[str] = None
washover_time_delta: Optional[Union[datetime.timedelta, int]] = None
# Analysis
covariates: Optional[List[str]] = None
hypothesis: str = "two-sided"
# Power analysis
n_simulations: int = 100
alpha: float = 0.05
control: str = "A"
# Cupac
agg_col: str = ""
smoothing_factor: float = 20
features_cupac_model: Optional[List[str]] = None
seed: Optional[int] = None
def __post_init__(self):
if "switchback" not in self.splitter:
if self._are_different(self.switch_frequency, None):
self._set_and_log("switch_frequency", None, "splitter")
if self._are_different(self.washover_time_delta, None):
self._set_and_log("washover_time_delta", None, "splitter")
if self._are_different(self.washover, ""):
self._set_and_log("washover", "", "splitter")
# an exception is made when we have no perturbator (normal power analysis)
if self._are_different(self.time_col, None) and self.perturbator != "":
self._set_and_log("time_col", None, "splitter")
if self.perturbator not in {"normal", "beta_relative_positive"}:
if self._are_different(self.scale, None):
self._set_and_log("scale", None, "perturbator")
if self.perturbator not in {"beta_relative", "segmented_beta_relative"}:
if self._are_different(self.range_min, None):
self._set_and_log("range_min", None, "perturbator")
if self._are_different(self.range_max, None):
self._set_and_log("range_max", None, "perturbator")
if self._are_different(self.reduce_variance, None):
self._set_and_log("reduce_variance", None, "perturbator")
if self.perturbator not in {"segmented_beta_relative"}:
if self._are_different(self.segment_cols, None):
self._set_and_log("segment_cols", None, "perturbator")
if "stratified" not in self.splitter and "paired_ttest" not in self.analysis:
if self._are_different(self.strata_cols, None):
self._set_and_log("strata_cols", None, "splitter")
if "stratified" in self.splitter or "balanced" in self.splitter:
if self._are_different(self.splitter_weights, None):
self._set_and_log("splitter_weights", None, "splitter")
if self.cupac_model != "mean_cupac_model":
if self._are_different(self.agg_col, ""):
self._set_and_log("agg_col", "", "cupac_model")
if self._are_different(self.smoothing_factor, 20):
self._set_and_log("smoothing_factor", 20, "cupac_model")
# for now, features_cupac_model are not used
if self._are_different(self.features_cupac_model, None):
self._set_and_log("features_cupac_model", None, "cupac_model")
if "ttest" in self.analysis:
if self._are_different(self.covariates, None):
self._set_and_log("covariates", None, "analysis")
if "segmented" in self.perturbator:
self._raise_error_if_missing("segment_cols", "perturbator")
def _are_different(self, arg1, arg2) -> bool:
return arg1 != arg2
def _set_and_log(self, attr, value, other_attr):
logging.warning(
f"{attr} = {getattr(self, attr)} has no effect with "
f"{other_attr} = {getattr(self, other_attr)}. "
f"Overriding {attr} to {value}."
)
setattr(self, attr, value)
def _raise_error_if_missing(self, attr, other_attr):
if getattr(self, attr) is None:
raise MissingArgumentError(
f"{attr} is required when using "
f"{other_attr} = {getattr(self, other_attr)}."
)