Experiment Analysis¶

This notebook demonstrates how to analyze the results of an experiment and get a useful scorecard as a summary of the analysis.

The experiment is an A/B/C test where we randomise users of a food delivery app to test a new ranking algorithm, and we compare the monetary value and the delivery time of the orders created.

First of all we import the necessary libraries and generate some fake data about our experiment.

In [1]:

Copied!





import pandas as pd
import numpy as np
from cluster_experiments import (
    AnalysisPlan,
    SimpleMetric,
    Dimension,
    Variant,
    HypothesisTest,
    TargetAggregation,
)
import pandas as pd
import numpy as np
from cluster_experiments import (
    AnalysisPlan,
    SimpleMetric,
    Dimension,
    Variant,
    HypothesisTest,
    TargetAggregation,
)

In [3]:

Copied!





def generate_fake_data():

    # Constants
    NUM_ORDERS = 10_000
    NUM_CUSTOMERS = 3_000
    EXPERIMENT_GROUPS = ['control', 'treatment_1', 'treatment_2']
    GROUP_SIZE = NUM_CUSTOMERS // len(EXPERIMENT_GROUPS)
    
    # Generate customer_ids
    customer_ids = np.arange(1, NUM_CUSTOMERS + 1)
    
    # Shuffle and split customer_ids into experiment groups
    np.random.shuffle(customer_ids)
    experiment_group = np.repeat(EXPERIMENT_GROUPS, GROUP_SIZE)
    experiment_group = np.concatenate((experiment_group, np.random.choice(EXPERIMENT_GROUPS, NUM_CUSTOMERS - len(experiment_group))))
    
    # Assign customers to groups
    customer_group_mapping = dict(zip(customer_ids, experiment_group))
    
    # Generate orders
    order_ids = np.arange(1, NUM_ORDERS + 1)
    customers = np.random.choice(customer_ids, NUM_ORDERS)
    order_values = np.abs(np.random.normal(loc=10, scale=2, size=NUM_ORDERS))  # Normally distributed around 10 and positive
    order_delivery_times = np.abs(np.random.normal(loc=30, scale=5, size=NUM_ORDERS))  # Normally distributed around 30 minutes and positive
    order_city_codes = np.random.randint(1, 3, NUM_ORDERS)  # Random city codes between 1 and 2
    
    # Create DataFrame
    data = {
        'order_id': order_ids,
        'customer_id': customers,
        'experiment_group': [customer_group_mapping[customer_id] for customer_id in customers],
        'order_value': order_values,
        'order_delivery_time_in_minutes': order_delivery_times,
        'order_city_code': order_city_codes
    }
    
    df = pd.DataFrame(data)
    df.order_city_code = df.order_city_code.astype(str)
    
    pre_exp_df = df.assign(
        order_value = lambda df: df['order_value'] + np.random.normal(loc=0, scale=1, size=NUM_ORDERS),
        order_delivery_time_in_minutes = lambda df: df['order_delivery_time_in_minutes'] + np.random.normal(loc=0, scale=2, size=NUM_ORDERS)
    ).sample(int(NUM_ORDERS/3))
    
    return df, pre_exp_df

df, pre_exp_df = generate_fake_data()

# Show the first few rows of the DataFrame
display(df.head())
display(pre_exp_df.head())
def generate_fake_data():

    # Constants
    NUM_ORDERS = 10_000
    NUM_CUSTOMERS = 3_000
    EXPERIMENT_GROUPS = ['control', 'treatment_1', 'treatment_2']
    GROUP_SIZE = NUM_CUSTOMERS // len(EXPERIMENT_GROUPS)
    
    # Generate customer_ids
    customer_ids = np.arange(1, NUM_CUSTOMERS + 1)
    
    # Shuffle and split customer_ids into experiment groups
    np.random.shuffle(customer_ids)
    experiment_group = np.repeat(EXPERIMENT_GROUPS, GROUP_SIZE)
    experiment_group = np.concatenate((experiment_group, np.random.choice(EXPERIMENT_GROUPS, NUM_CUSTOMERS - len(experiment_group))))
    
    # Assign customers to groups
    customer_group_mapping = dict(zip(customer_ids, experiment_group))
    
    # Generate orders
    order_ids = np.arange(1, NUM_ORDERS + 1)
    customers = np.random.choice(customer_ids, NUM_ORDERS)
    order_values = np.abs(np.random.normal(loc=10, scale=2, size=NUM_ORDERS))  # Normally distributed around 10 and positive
    order_delivery_times = np.abs(np.random.normal(loc=30, scale=5, size=NUM_ORDERS))  # Normally distributed around 30 minutes and positive
    order_city_codes = np.random.randint(1, 3, NUM_ORDERS)  # Random city codes between 1 and 2
    
    # Create DataFrame
    data = {
        'order_id': order_ids,
        'customer_id': customers,
        'experiment_group': [customer_group_mapping[customer_id] for customer_id in customers],
        'order_value': order_values,
        'order_delivery_time_in_minutes': order_delivery_times,
        'order_city_code': order_city_codes
    }
    
    df = pd.DataFrame(data)
    df.order_city_code = df.order_city_code.astype(str)
    
    pre_exp_df = df.assign(
        order_value = lambda df: df['order_value'] + np.random.normal(loc=0, scale=1, size=NUM_ORDERS),
        order_delivery_time_in_minutes = lambda df: df['order_delivery_time_in_minutes'] + np.random.normal(loc=0, scale=2, size=NUM_ORDERS)
    ).sample(int(NUM_ORDERS/3))
    
    return df, pre_exp_df

df, pre_exp_df = generate_fake_data()

# Show the first few rows of the DataFrame
display(df.head())
display(pre_exp_df.head())

	order_id	customer_id	experiment_group	order_value	order_delivery_time_in_minutes	order_city_code
0	1	2121	treatment_1	8.617823	28.018369	2
1	2	1358	control	14.091706	35.770020	2
2	3	1891	control	10.710713	31.483367	1
3	4	2173	control	12.860734	29.235751	1
4	5	995	treatment_1	11.782487	33.076537	1

	order_id	customer_id	experiment_group	order_value	order_delivery_time_in_minutes	order_city_code
6211	6212	2470	control	9.740159	28.986726	1
5971	5972	1586	treatment_2	10.682427	38.246326	1
6460	6461	1706	control	7.093542	19.930616	2
249	250	1828	treatment_2	5.679958	39.600876	2
5258	5259	1990	treatment_2	9.483023	24.158893	2

Now that we have a sample experimental dataset and also a pre-experimental dataset that we can use to showcase how to include cupac-style variance reduction in the analysis flow, we can proceed to define the building blocks of the analysis plan: metrics, dimensions and variants first.

Metrics:

AOV (Average Order Value)
AVG DT (Average Delivery Time)

Dimensions:

order_city_code

Variants:

control
treatment_1
treatment_2

In [4]:

Copied!





dimension__city_code = Dimension(
    name='order_city_code',
    values=['1','2']
)

metric__order_value = SimpleMetric(
    alias='AOV',
    name='order_value'
)

metric__delivery_time = SimpleMetric(
    alias='AVG DT',
    name='order_delivery_time_in_minutes'
)

variants = [
    Variant('control', is_control=True),
    Variant('treatment_1', is_control=False),
    Variant('treatment_2', is_control=False)
]
dimension__city_code = Dimension(
    name='order_city_code',
    values=['1','2']
)

metric__order_value = SimpleMetric(
    alias='AOV',
    name='order_value'
)

metric__delivery_time = SimpleMetric(
    alias='AVG DT',
    name='order_delivery_time_in_minutes'
)

variants = [
    Variant('control', is_control=True),
    Variant('treatment_1', is_control=False),
    Variant('treatment_2', is_control=False)
]

Now we can define the hypothesis tests that we want to run on the data. We will run two tests:

A clustered OLS test for the order value:
- no variance reduction
- slice results by the city code of the orders
A GEE test for the delivery time:
- with variance reduction using cupac (target aggregation)
- no slicing

As you can see, each hypothesis test can be flexible enough to have its own logic.

In [5]:

Copied!





test__order_value = HypothesisTest(
    metric=metric__order_value,
    analysis_type="clustered_ols",
    analysis_config={"cluster_cols":["customer_id"]},
    dimensions=[dimension__city_code]
)

cupac__model = TargetAggregation(agg_col="customer_id", target_col="order_delivery_time_in_minutes")

test__delivery_time = HypothesisTest(
    metric=metric__delivery_time,
    analysis_type="gee",
    analysis_config={"cluster_cols":["customer_id"], "covariates":["estimate_order_delivery_time_in_minutes"]},
    cupac_config={"cupac_model":cupac__model,
                  "target_col":"order_delivery_time_in_minutes"}
)
test__order_value = HypothesisTest(
    metric=metric__order_value,
    analysis_type="clustered_ols",
    analysis_config={"cluster_cols":["customer_id"]},
    dimensions=[dimension__city_code]
)

cupac__model = TargetAggregation(agg_col="customer_id", target_col="order_delivery_time_in_minutes")

test__delivery_time = HypothesisTest(
    metric=metric__delivery_time,
    analysis_type="gee",
    analysis_config={"cluster_cols":["customer_id"], "covariates":["estimate_order_delivery_time_in_minutes"]},
    cupac_config={"cupac_model":cupac__model,
                  "target_col":"order_delivery_time_in_minutes"}
)

Finally, we can define the analysis plan where we pack and run all the tests on the data. The results will be displayed in a DataFrame.

Note that all tests included in a single analysis plan must run on the same exact dataset (or datasets, in case the pre-experimental data is provided and used). Should there be the need to use different datasets, the user must create separate analysis plans for each dataset.

In [6]:

Copied!





analysis_plan = AnalysisPlan(
    tests=[test__order_value, test__delivery_time],
    variants=variants,
    variant_col='experiment_group',
    alpha=0.01
)

results = analysis_plan.analyze(exp_data=df, pre_exp_data=pre_exp_df)

results_df = results.to_dataframe()

display(results_df)
analysis_plan = AnalysisPlan(
    tests=[test__order_value, test__delivery_time],
    variants=variants,
    variant_col='experiment_group',
    alpha=0.01
)

results = analysis_plan.analyze(exp_data=df, pre_exp_data=pre_exp_df)

results_df = results.to_dataframe()

display(results_df)

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	control	treatment_1	9.987651	10.027835	clustered_ols	0.040185	-0.081815	0.162184	0.396195	0.047363	__total_dimension	total	0.01
1	AOV	control	treatment_1	10.037250	9.999080	clustered_ols	-0.038170	-0.211476	0.135135	0.570493	0.067281	order_city_code	1	0.01
2	AOV	control	treatment_1	9.936038	10.056783	clustered_ols	0.120746	-0.059459	0.300951	0.084361	0.069960	order_city_code	2	0.01
3	AOV	control	treatment_2	9.987651	10.048016	clustered_ols	0.060365	-0.061972	0.182701	0.203729	0.047494	__total_dimension	total	0.01
4	AOV	control	treatment_2	10.037250	10.026263	clustered_ols	-0.010987	-0.187316	0.165342	0.872492	0.068455	order_city_code	1	0.01
5	AOV	control	treatment_2	9.936038	10.070004	clustered_ols	0.133967	-0.039486	0.307419	0.046652	0.067339	order_city_code	2	0.01
6	AVG DT	control	treatment_1	29.964957	29.985370	gee	0.031963	-0.250092	0.314017	0.770368	0.109500	__total_dimension	total	0.01
7	AVG DT	control	treatment_2	29.964957	30.005273	gee	0.129250	-0.149748	0.408248	0.232756	0.108314	__total_dimension	total	0.01

Shortcut: creating a simple analysis plan skipping hypothesis tests definition¶

In case the user does not need to define custom hypothesis tests, they can use the AnalysisPlan.from_metrics method to create a simple analysis plan where the user only needs to define the metrics, dimensions and variants. The method will automatically create the necessary hypothesis tests and run them on the data.

This works for the cases where all the desired tests should run with the same analysis type and configuration, and with the same dimensions to slice upon. In case the user needs to run tests with differences in such components, they should use the standard way of defining the analysis plan as illustrated in the previous section.

Below is an example of how to create a simple analysis plan using the same metrics, dimensions and variants as before. The results will be displayed in a DataFrame. Additionally, we also show how setting verbose=True will log the setup of all the comparisons that are performed when running the analysis plan.

In [7]:

Copied!





simple_analysis_plan = AnalysisPlan.from_metrics(
    metrics=[metric__delivery_time, metric__order_value],
    variants=variants,
    variant_col='experiment_group',
    alpha=0.01,
    dimensions=[dimension__city_code],
    analysis_type="clustered_ols",
    analysis_config={"cluster_cols":["customer_id"]},
)

simple_results = simple_analysis_plan.analyze(exp_data=df, verbose=True)

simple_results_df = simple_results.to_dataframe()

display(simple_results_df)
simple_analysis_plan = AnalysisPlan.from_metrics(
    metrics=[metric__delivery_time, metric__order_value],
    variants=variants,
    variant_col='experiment_group',
    alpha=0.01,
    dimensions=[dimension__city_code],
    analysis_type="clustered_ols",
    analysis_config={"cluster_cols":["customer_id"]},
)

simple_results = simple_analysis_plan.analyze(exp_data=df, verbose=True)

simple_results_df = simple_results.to_dataframe()

display(simple_results_df)

2024-11-06 20:03:19,308 - Metric: AVG DT, Treatment: treatment_1, Dimension: __total_dimension, Value: total
2024-11-06 20:03:19,385 - Metric: AVG DT, Treatment: treatment_1, Dimension: order_city_code, Value: 1
2024-11-06 20:03:19,413 - Metric: AVG DT, Treatment: treatment_1, Dimension: order_city_code, Value: 2
2024-11-06 20:03:19,440 - Metric: AVG DT, Treatment: treatment_2, Dimension: __total_dimension, Value: total
2024-11-06 20:03:19,482 - Metric: AVG DT, Treatment: treatment_2, Dimension: order_city_code, Value: 1
2024-11-06 20:03:19,513 - Metric: AVG DT, Treatment: treatment_2, Dimension: order_city_code, Value: 2
2024-11-06 20:03:19,556 - Metric: AOV, Treatment: treatment_1, Dimension: __total_dimension, Value: total
2024-11-06 20:03:19,609 - Metric: AOV, Treatment: treatment_1, Dimension: order_city_code, Value: 1
2024-11-06 20:03:19,639 - Metric: AOV, Treatment: treatment_1, Dimension: order_city_code, Value: 2
2024-11-06 20:03:19,669 - Metric: AOV, Treatment: treatment_2, Dimension: __total_dimension, Value: total
2024-11-06 20:03:19,705 - Metric: AOV, Treatment: treatment_2, Dimension: order_city_code, Value: 1
2024-11-06 20:03:19,732 - Metric: AOV, Treatment: treatment_2, Dimension: order_city_code, Value: 2

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AVG DT	control	treatment_1	29.964957	29.985370	clustered_ols	0.020413	-0.300872	0.341698	0.870000	0.124731	__total_dimension	total	0.01
1	AVG DT	control	treatment_1	29.862094	29.800854	clustered_ols	-0.061240	-0.508945	0.386466	0.724585	0.173810	order_city_code	1	0.01
2	AVG DT	control	treatment_1	30.071997	30.171119	clustered_ols	0.099122	-0.347323	0.545568	0.567389	0.173321	order_city_code	2	0.01
3	AVG DT	control	treatment_2	29.964957	30.005273	clustered_ols	0.040316	-0.279913	0.360546	0.745716	0.124321	__total_dimension	total	0.01
4	AVG DT	control	treatment_2	29.862094	30.069727	clustered_ols	0.207634	-0.232033	0.647301	0.223817	0.170689	order_city_code	1	0.01
5	AVG DT	control	treatment_2	30.071997	29.940118	clustered_ols	-0.131878	-0.581788	0.318032	0.450230	0.174666	order_city_code	2	0.01
6	AOV	control	treatment_1	9.987651	10.027835	clustered_ols	0.040185	-0.081815	0.162184	0.396195	0.047363	__total_dimension	total	0.01
7	AOV	control	treatment_1	10.037250	9.999080	clustered_ols	-0.038170	-0.211476	0.135135	0.570493	0.067281	order_city_code	1	0.01
8	AOV	control	treatment_1	9.936038	10.056783	clustered_ols	0.120746	-0.059459	0.300951	0.084361	0.069960	order_city_code	2	0.01
9	AOV	control	treatment_2	9.987651	10.048016	clustered_ols	0.060365	-0.061972	0.182701	0.203729	0.047494	__total_dimension	total	0.01
10	AOV	control	treatment_2	10.037250	10.026263	clustered_ols	-0.010987	-0.187316	0.165342	0.872492	0.068455	order_city_code	1	0.01
11	AOV	control	treatment_2	9.936038	10.070004	clustered_ols	0.133967	-0.039486	0.307419	0.046652	0.067339	order_city_code	2	0.01

Bonus: Plugging in a custom analysis method¶

In case the user needs to run a custom analysis method that is not covered by the standard analysis types provided by the library, they can define a custom analysis class and plug it into the analysis plan. Below is an example of how to do this.

In this example, we define a custom analysis class that extends the ClusteredOLSAnalysis class provided by the library. The custom class will be used to run a clustered OLS analysis with a custom logic.

The analysis plan will be created with the custom analysis type mapper that will map the custom analysis type to the custom analysis class.

In [9]:

Copied!





from cluster_experiments.experiment_analysis import ClusteredOLSAnalysis

# assuming we define a meaningful custom ExperimentAnalysis class
class CustomExperimentAnalysis(ClusteredOLSAnalysis):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

custom_simple_analysis_plan = AnalysisPlan.from_metrics(
    metrics=[metric__order_value],
    variants=variants,
    variant_col='experiment_group',
    alpha=0.01,
    dimensions=[dimension__city_code],
    analysis_type="custom_clustered_ols",
    analysis_config={"cluster_cols":["customer_id"]},
    custom_analysis_type_mapper={"custom_clustered_ols": CustomExperimentAnalysis}
)

custom_simple_results = custom_simple_analysis_plan.analyze(exp_data=df)

custom_simple_results_df = custom_simple_results.to_dataframe()

display(custom_simple_results_df)
from cluster_experiments.experiment_analysis import ClusteredOLSAnalysis

# assuming we define a meaningful custom ExperimentAnalysis class
class CustomExperimentAnalysis(ClusteredOLSAnalysis):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

custom_simple_analysis_plan = AnalysisPlan.from_metrics(
    metrics=[metric__order_value],
    variants=variants,
    variant_col='experiment_group',
    alpha=0.01,
    dimensions=[dimension__city_code],
    analysis_type="custom_clustered_ols",
    analysis_config={"cluster_cols":["customer_id"]},
    custom_analysis_type_mapper={"custom_clustered_ols": CustomExperimentAnalysis}
)

custom_simple_results = custom_simple_analysis_plan.analyze(exp_data=df)

custom_simple_results_df = custom_simple_results.to_dataframe()

display(custom_simple_results_df)

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	control	treatment_1	9.987651	10.027835	custom_clustered_ols	0.040185	-0.081815	0.162184	0.396195	0.047363	__total_dimension	total	0.01
1	AOV	control	treatment_1	10.037250	9.999080	custom_clustered_ols	-0.038170	-0.211476	0.135135	0.570493	0.067281	order_city_code	1	0.01
2	AOV	control	treatment_1	9.936038	10.056783	custom_clustered_ols	0.120746	-0.059459	0.300951	0.084361	0.069960	order_city_code	2	0.01
3	AOV	control	treatment_2	9.987651	10.048016	custom_clustered_ols	0.060365	-0.061972	0.182701	0.203729	0.047494	__total_dimension	total	0.01
4	AOV	control	treatment_2	10.037250	10.026263	custom_clustered_ols	-0.010987	-0.187316	0.165342	0.872492	0.068455	order_city_code	1	0.01
5	AOV	control	treatment_2	9.936038	10.070004	custom_clustered_ols	0.133967	-0.039486	0.307419	0.046652	0.067339	order_city_code	2	0.01

Now it's your turn! Have fun experimenting with the library and analyzing your data!