Skip to content

from cluster_experiments.inference.analysis_plan import *

AnalysisPlan

A class used to represent an Analysis Plan with a list of hypothesis tests and a list of variants. All the hypothesis tests in the same analysis plan will be analysed with the same dataframe, which will need to be passed in the analyze() method.

Attributes

tests : List[HypothesisTest] A list of HypothesisTest instances variants : List[Variant] A list of Variant instances variant_col : str name of the column with the experiment groups alpha : float significance level used to construct confidence intervals

Source code in cluster_experiments/inference/analysis_plan.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
class AnalysisPlan:
    """
    A class used to represent an Analysis Plan with a list of hypothesis tests and a list of variants.
    All the hypothesis tests in the same analysis plan will be analysed with the same dataframe, which will need to be passed in the analyze() method.

    Attributes
    ----------
    tests : List[HypothesisTest]
        A list of HypothesisTest instances
    variants : List[Variant]
        A list of Variant instances
    variant_col : str
        name of the column with the experiment groups
    alpha : float
        significance level used to construct confidence intervals
    """

    def __init__(
        self,
        tests: List[HypothesisTest],
        variants: List[Variant],
        variant_col: str = "treatment",
        alpha: float = 0.05,
    ):
        """
        Parameters
        ----------
        tests : List[HypothesisTest]
            A list of HypothesisTest instances
        variants : List[Variant]
            A list of Variant instances
        variant_col : str
            The name of the column containing the variant names.
        alpha : float
            significance level used to construct confidence intervals
        """

        self.tests = tests
        self.variants = variants
        self.variant_col = variant_col
        self.alpha = alpha

        self._validate_inputs()

    def _validate_inputs(self):
        """
        Validates the inputs for the AnalysisPlan class.

        Raises
        ------
        TypeError
            If tests is not a list of HypothesisTest instances or if variants is not a list of Variant instances.
        ValueError
            If tests or variants are empty lists.
        """
        if not isinstance(self.tests, list) or not all(
            isinstance(test, HypothesisTest) for test in self.tests
        ):
            raise TypeError("Tests must be a list of HypothesisTest instances")
        if not isinstance(self.variants, list) or not all(
            isinstance(variant, Variant) for variant in self.variants
        ):
            raise TypeError("Variants must be a list of Variant instances")
        if not isinstance(self.variant_col, str):
            raise TypeError("Variant_col must be a string")
        if not self.tests:
            raise ValueError("Tests list cannot be empty")
        if not self.variants:
            raise ValueError("Variants list cannot be empty")

    def analyze(
        self,
        exp_data: pd.DataFrame,
        pre_exp_data: Optional[pd.DataFrame] = None,
        verbose: bool = False,
    ) -> AnalysisPlanResults:
        """
        Method to run the experiment analysis.
        """

        # Validate input data at the beginning
        self._validate_data(exp_data, pre_exp_data)

        analysis_results = AnalysisPlanResults()

        for test in self.tests:
            exp_data = test.add_covariates(exp_data, pre_exp_data)

            for treatment_variant in self.treatment_variants:
                for dimension in test.dimensions:
                    for dimension_value in dimension.iterate_dimension_values():

                        if verbose:
                            logger.info(
                                f"Metric: {test.metric.alias}, "
                                f"Treatment: {treatment_variant.name}, "
                                f"Dimension: {dimension.name}, "
                                f"Value: {dimension_value}"
                            )

                        test_results = test.get_test_results(
                            exp_data=exp_data,
                            control_variant=self.control_variant,
                            treatment_variant=treatment_variant,
                            variant_col=self.variant_col,
                            dimension=dimension,
                            dimension_value=dimension_value,
                            alpha=self.alpha,
                        )

                        analysis_results = analysis_results + test_results

        return analysis_results

    def _validate_data(
        self, exp_data: pd.DataFrame, pre_exp_data: Optional[pd.DataFrame] = None
    ):
        """
        Validates the input dataframes for the analyze method.

        Parameters
        ----------
        exp_data : pd.DataFrame
            The experimental data
        pre_exp_data : Optional[pd.DataFrame]
            The pre-experimental data (optional)

        Raises
        ------
        ValueError
            If exp_data is not a DataFrame or is empty
            If pre_exp_data is provided and is not a DataFrame or is empty
        """
        if not isinstance(exp_data, pd.DataFrame):
            raise ValueError("exp_data must be a pandas DataFrame")
        if exp_data.empty:
            raise ValueError("exp_data cannot be empty")
        if pre_exp_data is not None:
            if not isinstance(pre_exp_data, pd.DataFrame):
                raise ValueError("pre_exp_data must be a pandas DataFrame if provided")
            if pre_exp_data.empty:
                raise ValueError("pre_exp_data cannot be empty if provided")

    @property
    def control_variant(self) -> Variant:
        """
        Returns the control variant from the list of variants. Raises an error if no control variant is found.

        Returns
        -------
        Variant
            The control variant

        Raises
        ------
        ValueError
            If no control variant is found
        """
        for variant in self.variants:
            if variant.is_control:
                return variant
        raise ValueError("No control variant found")

    @property
    def treatment_variants(self) -> List[Variant]:
        """
        Returns the treatment variants from the list of variants. Raises an error if no treatment variants are found.

        Returns
        -------
        List[Variant]
            A list of treatment variants

        Raises
        ------
        ValueError
            If no treatment variants are found
        """
        treatments = [variant for variant in self.variants if not variant.is_control]
        if not treatments:
            raise ValueError("No treatment variants found")
        return treatments

    @classmethod
    def from_metrics(
        cls,
        metrics: List[Metric],
        variants: List[Variant],
        variant_col: str = "treatment",
        alpha: float = 0.05,
        dimensions: Optional[List[Dimension]] = None,
        analysis_type: str = "default",
        analysis_config: Optional[Dict[str, Any]] = None,
        custom_analysis_type_mapper: Optional[Dict[str, ExperimentAnalysis]] = None,
    ) -> "AnalysisPlan":
        """
        Creates a simplified AnalysisPlan instance from a list of metrics. It will create HypothesisTest objects under the hood.
        This shortcut does not support cupac, and uses the same dimensions, analysis type and analysis config for all metrics.

        Parameters
        ----------
        metrics : List[Metric]
            A list of Metric instances
        variants : List[Variant]
            A list of Variant instances
        variant_col : str
            The name of the column containing the variant names.
        alpha : float
            Significance level used to construct confidence intervals
        dimensions : Optional[List[Dimension]]
            A list of Dimension instances (optional)
        analysis_type : str
            The type of analysis to be conducted (default: "default")
        analysis_config : Optional[Dict[str, Any]]
            A dictionary containing analysis configuration options (optional)
        custom_analysis_type_mapper : Optional[Dict[str, ExperimentAnalysis]]
            An optional dictionary mapping the names of custom analysis types to the corresponding ExperimentAnalysis classes

        Returns
        -------
        AnalysisPlan
            An instance of AnalysisPlan
        """
        tests = [
            HypothesisTest(
                metric=metric,
                dimensions=dimensions or [],
                analysis_type=analysis_type,
                analysis_config=analysis_config or {},
                custom_analysis_type_mapper=custom_analysis_type_mapper or {},
            )
            for metric in metrics
        ]

        return cls(
            tests=tests,
            variants=variants,
            variant_col=variant_col,
            alpha=alpha,
        )

    @classmethod
    def from_metrics_config(cls, config: AnalysisPlanConfig) -> "AnalysisPlan":
        """
        Creates an AnalysisPlan instance from a configuration object.

        Parameters
        ----------
        config : AnalysisPlanConfig
            An instance of AnalysisPlanConfig

        Returns
        -------
        AnalysisPlan
            An instance of AnalysisPlan
        """
        metrics = [
            Metric.from_metrics_config(metric_config)
            for metric_config in config.metrics
        ]
        variants = [
            Variant.from_metrics_config(variant_config)
            for variant_config in config.variants
        ]
        dimensions = [
            Dimension.from_metrics_config(dimension_config)
            for dimension_config in config.dimensions
        ]
        return cls.from_metrics(
            metrics=metrics,
            variants=variants,
            variant_col=config.variant_col,
            alpha=config.alpha,
            dimensions=dimensions,
            analysis_type=config.analysis_type,
            analysis_config=config.analysis_config,
            custom_analysis_type_mapper=config.custom_analysis_type_mapper,
        )

    @classmethod
    def from_metrics_dict(cls, d: Dict[str, Any]) -> "AnalysisPlan":
        """
        Creates an AnalysisPlan instance from a dictionary.

        Parameters
        ----------
        d : Dict[str, Any]
            A dictionary containing the analysis plan configuration

        Returns
        -------
        AnalysisPlan
            An instance of AnalysisPlan
        """
        config = AnalysisPlanConfig(**d)
        return cls.from_metrics_config(config)

control_variant: Variant property

Returns the control variant from the list of variants. Raises an error if no control variant is found.

Returns

Variant The control variant

Raises

ValueError If no control variant is found

treatment_variants: List[Variant] property

Returns the treatment variants from the list of variants. Raises an error if no treatment variants are found.

Returns

List[Variant] A list of treatment variants

Raises

ValueError If no treatment variants are found

__init__(tests, variants, variant_col='treatment', alpha=0.05)

Parameters

tests : List[HypothesisTest] A list of HypothesisTest instances variants : List[Variant] A list of Variant instances variant_col : str The name of the column containing the variant names. alpha : float significance level used to construct confidence intervals

Source code in cluster_experiments/inference/analysis_plan.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    tests: List[HypothesisTest],
    variants: List[Variant],
    variant_col: str = "treatment",
    alpha: float = 0.05,
):
    """
    Parameters
    ----------
    tests : List[HypothesisTest]
        A list of HypothesisTest instances
    variants : List[Variant]
        A list of Variant instances
    variant_col : str
        The name of the column containing the variant names.
    alpha : float
        significance level used to construct confidence intervals
    """

    self.tests = tests
    self.variants = variants
    self.variant_col = variant_col
    self.alpha = alpha

    self._validate_inputs()

analyze(exp_data, pre_exp_data=None, verbose=False)

Method to run the experiment analysis.

Source code in cluster_experiments/inference/analysis_plan.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def analyze(
    self,
    exp_data: pd.DataFrame,
    pre_exp_data: Optional[pd.DataFrame] = None,
    verbose: bool = False,
) -> AnalysisPlanResults:
    """
    Method to run the experiment analysis.
    """

    # Validate input data at the beginning
    self._validate_data(exp_data, pre_exp_data)

    analysis_results = AnalysisPlanResults()

    for test in self.tests:
        exp_data = test.add_covariates(exp_data, pre_exp_data)

        for treatment_variant in self.treatment_variants:
            for dimension in test.dimensions:
                for dimension_value in dimension.iterate_dimension_values():

                    if verbose:
                        logger.info(
                            f"Metric: {test.metric.alias}, "
                            f"Treatment: {treatment_variant.name}, "
                            f"Dimension: {dimension.name}, "
                            f"Value: {dimension_value}"
                        )

                    test_results = test.get_test_results(
                        exp_data=exp_data,
                        control_variant=self.control_variant,
                        treatment_variant=treatment_variant,
                        variant_col=self.variant_col,
                        dimension=dimension,
                        dimension_value=dimension_value,
                        alpha=self.alpha,
                    )

                    analysis_results = analysis_results + test_results

    return analysis_results

from_metrics(metrics, variants, variant_col='treatment', alpha=0.05, dimensions=None, analysis_type='default', analysis_config=None, custom_analysis_type_mapper=None) classmethod

Creates a simplified AnalysisPlan instance from a list of metrics. It will create HypothesisTest objects under the hood. This shortcut does not support cupac, and uses the same dimensions, analysis type and analysis config for all metrics.

Parameters

metrics : List[Metric] A list of Metric instances variants : List[Variant] A list of Variant instances variant_col : str The name of the column containing the variant names. alpha : float Significance level used to construct confidence intervals dimensions : Optional[List[Dimension]] A list of Dimension instances (optional) analysis_type : str The type of analysis to be conducted (default: "default") analysis_config : Optional[Dict[str, Any]] A dictionary containing analysis configuration options (optional) custom_analysis_type_mapper : Optional[Dict[str, ExperimentAnalysis]] An optional dictionary mapping the names of custom analysis types to the corresponding ExperimentAnalysis classes

Returns

AnalysisPlan An instance of AnalysisPlan

Source code in cluster_experiments/inference/analysis_plan.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
@classmethod
def from_metrics(
    cls,
    metrics: List[Metric],
    variants: List[Variant],
    variant_col: str = "treatment",
    alpha: float = 0.05,
    dimensions: Optional[List[Dimension]] = None,
    analysis_type: str = "default",
    analysis_config: Optional[Dict[str, Any]] = None,
    custom_analysis_type_mapper: Optional[Dict[str, ExperimentAnalysis]] = None,
) -> "AnalysisPlan":
    """
    Creates a simplified AnalysisPlan instance from a list of metrics. It will create HypothesisTest objects under the hood.
    This shortcut does not support cupac, and uses the same dimensions, analysis type and analysis config for all metrics.

    Parameters
    ----------
    metrics : List[Metric]
        A list of Metric instances
    variants : List[Variant]
        A list of Variant instances
    variant_col : str
        The name of the column containing the variant names.
    alpha : float
        Significance level used to construct confidence intervals
    dimensions : Optional[List[Dimension]]
        A list of Dimension instances (optional)
    analysis_type : str
        The type of analysis to be conducted (default: "default")
    analysis_config : Optional[Dict[str, Any]]
        A dictionary containing analysis configuration options (optional)
    custom_analysis_type_mapper : Optional[Dict[str, ExperimentAnalysis]]
        An optional dictionary mapping the names of custom analysis types to the corresponding ExperimentAnalysis classes

    Returns
    -------
    AnalysisPlan
        An instance of AnalysisPlan
    """
    tests = [
        HypothesisTest(
            metric=metric,
            dimensions=dimensions or [],
            analysis_type=analysis_type,
            analysis_config=analysis_config or {},
            custom_analysis_type_mapper=custom_analysis_type_mapper or {},
        )
        for metric in metrics
    ]

    return cls(
        tests=tests,
        variants=variants,
        variant_col=variant_col,
        alpha=alpha,
    )

from_metrics_config(config) classmethod

Creates an AnalysisPlan instance from a configuration object.

Parameters

config : AnalysisPlanConfig An instance of AnalysisPlanConfig

Returns

AnalysisPlan An instance of AnalysisPlan

Source code in cluster_experiments/inference/analysis_plan.py
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
@classmethod
def from_metrics_config(cls, config: AnalysisPlanConfig) -> "AnalysisPlan":
    """
    Creates an AnalysisPlan instance from a configuration object.

    Parameters
    ----------
    config : AnalysisPlanConfig
        An instance of AnalysisPlanConfig

    Returns
    -------
    AnalysisPlan
        An instance of AnalysisPlan
    """
    metrics = [
        Metric.from_metrics_config(metric_config)
        for metric_config in config.metrics
    ]
    variants = [
        Variant.from_metrics_config(variant_config)
        for variant_config in config.variants
    ]
    dimensions = [
        Dimension.from_metrics_config(dimension_config)
        for dimension_config in config.dimensions
    ]
    return cls.from_metrics(
        metrics=metrics,
        variants=variants,
        variant_col=config.variant_col,
        alpha=config.alpha,
        dimensions=dimensions,
        analysis_type=config.analysis_type,
        analysis_config=config.analysis_config,
        custom_analysis_type_mapper=config.custom_analysis_type_mapper,
    )

from_metrics_dict(d) classmethod

Creates an AnalysisPlan instance from a dictionary.

Parameters

d : Dict[str, Any] A dictionary containing the analysis plan configuration

Returns

AnalysisPlan An instance of AnalysisPlan

Source code in cluster_experiments/inference/analysis_plan.py
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
@classmethod
def from_metrics_dict(cls, d: Dict[str, Any]) -> "AnalysisPlan":
    """
    Creates an AnalysisPlan instance from a dictionary.

    Parameters
    ----------
    d : Dict[str, Any]
        A dictionary containing the analysis plan configuration

    Returns
    -------
    AnalysisPlan
        An instance of AnalysisPlan
    """
    config = AnalysisPlanConfig(**d)
    return cls.from_metrics_config(config)