End-to-end example¶

We'll show the different functionalities of delta method cluster_experiments, which are:

MDE calculation in cluster-randomized experiments with delta method and clustered standard errors.
MDE calculation with simple covariate adjustment using delta method.
MDE calculation with cupac (adjustment via ML models) using delta method.
Inference for all the cases above using delta method, and comparing with OLS with clustered standard errors.

Data generation¶

We create some pre-experimental data that we could use to run power analysis.

We have a dataframe with orders and customers, each customer may have many orders, and the two target metrics are delivery time and order value.

In [ ]:

Copied!





import random
import warnings
import time

import numpy as np
import pandas as pd
import seaborn as sns

from cluster_experiments import NormalPowerAnalysis


warnings.filterwarnings('ignore')
np.random.seed(42)
random.seed(42)

# Constants
N = 100000  # Number of orders
NUM_CUSTOMERS = 6000  # Unique customers

def generate_customers(num_customers):
    """Generate unique customers with a mean order value based on age."""
    customer_ids = np.arange(1, num_customers + 1)
    customer_ages = np.random.randint(20, 60, size=num_customers)
    customer_historical_orders = np.random.poisson(5, size=num_customers)
    mean_order_values = 50 + 2.5 * customer_ages - 2 * (customer_ages <= 30) + 2 * (customer_historical_orders >= 8) + np.random.normal(0, 15, size=num_customers)

    return pd.DataFrame({
        "customer_id": customer_ids,
        "customer_age": customer_ages,
        "mean_order_value": mean_order_values,
        "historical_orders": customer_historical_orders
    })

def sample_orders(customers, num_orders):
    """Sample customers and generate order-level data."""
    sampled_customers = np.random.choice(customers["customer_id"], size=num_orders)
    return pd.DataFrame({"customer_id": sampled_customers}).merge(customers, on="customer_id", how="left")

def generate_orders(customers, num_orders):
    """Full order generation pipeline using .assign() for cleaner transformations."""
    date_range = pd.date_range(start="2024-01-01", end="2024-03-31")

    return (
        sample_orders(customers, num_orders)
        .assign(
            order_value=lambda df: df["mean_order_value"] + np.random.normal(0, 15, size=len(df)),
            delivery_time=lambda df: 8 + np.sin(df["customer_id"] / 10) + np.random.normal(0, 0.5, size=len(df)),
            city=lambda df: np.random.choice(["NYC", "LA"], size=len(df)),
            date=lambda df: np.random.choice(date_range, size=len(df))
        )
        .drop(columns=["mean_order_value"])  # Remove intermediate column
    )


def plot_mdes(mdes, x_lim=40, y_value=3):
    sns.lineplot(
        data=pd.DataFrame(mdes),
        x="experiment_length",
        y="mde",
    )

    sns.lineplot(
        x=[0, x_lim],
        y=[y_value, y_value],
        color="red",
        linestyle="--",
    )

def get_length(mdes, mde_value):
    if any(x["mde"] <= mde_value for x in mdes):
        return min(x["experiment_length"] for x in mdes if x["mde"] < mde_value)
    return None

def get_length_print(mdes, mde_value):
    length = get_length(mdes, mde_value)
    print(f"Minimum experiment length to detect MDE of {mde_value}: {length}")

# Run the pipeline
customers = generate_customers(NUM_CUSTOMERS)
experiment_data = generate_orders(customers, N).assign(
    one=1
)

print(experiment_data.head())
import random
import warnings
import time

import numpy as np
import pandas as pd
import seaborn as sns

from cluster_experiments import NormalPowerAnalysis


warnings.filterwarnings('ignore')
np.random.seed(42)
random.seed(42)

# Constants
N = 100000  # Number of orders
NUM_CUSTOMERS = 6000  # Unique customers

def generate_customers(num_customers):
    """Generate unique customers with a mean order value based on age."""
    customer_ids = np.arange(1, num_customers + 1)
    customer_ages = np.random.randint(20, 60, size=num_customers)
    customer_historical_orders = np.random.poisson(5, size=num_customers)
    mean_order_values = 50 + 2.5 * customer_ages - 2 * (customer_ages <= 30) + 2 * (customer_historical_orders >= 8) + np.random.normal(0, 15, size=num_customers)

    return pd.DataFrame({
        "customer_id": customer_ids,
        "customer_age": customer_ages,
        "mean_order_value": mean_order_values,
        "historical_orders": customer_historical_orders
    })

def sample_orders(customers, num_orders):
    """Sample customers and generate order-level data."""
    sampled_customers = np.random.choice(customers["customer_id"], size=num_orders)
    return pd.DataFrame({"customer_id": sampled_customers}).merge(customers, on="customer_id", how="left")

def generate_orders(customers, num_orders):
    """Full order generation pipeline using .assign() for cleaner transformations."""
    date_range = pd.date_range(start="2024-01-01", end="2024-03-31")

    return (
        sample_orders(customers, num_orders)
        .assign(
            order_value=lambda df: df["mean_order_value"] + np.random.normal(0, 15, size=len(df)),
            delivery_time=lambda df: 8 + np.sin(df["customer_id"] / 10) + np.random.normal(0, 0.5, size=len(df)),
            city=lambda df: np.random.choice(["NYC", "LA"], size=len(df)),
            date=lambda df: np.random.choice(date_range, size=len(df))
        )
        .drop(columns=["mean_order_value"])  # Remove intermediate column
    )


def plot_mdes(mdes, x_lim=40, y_value=3):
    sns.lineplot(
        data=pd.DataFrame(mdes),
        x="experiment_length",
        y="mde",
    )

    sns.lineplot(
        x=[0, x_lim],
        y=[y_value, y_value],
        color="red",
        linestyle="--",
    )

def get_length(mdes, mde_value):
    if any(x["mde"] <= mde_value for x in mdes):
        return min(x["experiment_length"] for x in mdes if x["mde"] < mde_value)
    return None

def get_length_print(mdes, mde_value):
    length = get_length(mdes, mde_value)
    print(f"Minimum experiment length to detect MDE of {mde_value}: {length}")

# Run the pipeline
customers = generate_customers(NUM_CUSTOMERS)
experiment_data = generate_orders(customers, N).assign(
    one=1
)

print(experiment_data.head())

   customer_id  customer_age  historical_orders  order_value  delivery_time  \
0         4162            49                  3   177.806483       9.875481   
1         1076            58                  5   194.904074       8.637389   
2         5361            38                  2   153.913243       9.020768   
3         2036            35                  4   138.692537       8.016001   
4          390            22                  6    92.457381       8.753228   

  city       date  one  
0   LA 2024-03-08    1  
1   LA 2024-03-08    1  
2  NYC 2024-02-06    1  
3  NYC 2024-02-05    1  
4  NYC 2024-03-29    1

Power analysis¶

Customer-level split¶

Assume we randomize at customer level, in this case we need to use delta to run the power analysis. The following code shows the mde on order_value given some experiment length (1, 2, 3 and 4 weeks).

In [2]:

Copied!





mde = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "delta",
    "time_col": "date",
    "target_col": "order_value",
    "scale_col": "one",
    "cluster_cols": ["customer_id"]
})
mde = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "delta",
    "time_col": "date",
    "target_col": "order_value",
    "scale_col": "one",
    "cluster_cols": ["customer_id"]
})

In [3]:

Copied!





start = time.time()
mdes = mde.mde_time_line(
    experiment_data,
    powers=[0.8],
    experiment_length=(7, 14, 21, 28),
    n_simulations=5
)
print(f"Time taken for MDE calculation: {time.time() - start:.2f} seconds")
start = time.time()
mdes = mde.mde_time_line(
    experiment_data,
    powers=[0.8],
    experiment_length=(7, 14, 21, 28),
    n_simulations=5
)
print(f"Time taken for MDE calculation: {time.time() - start:.2f} seconds")

Time taken for MDE calculation: 0.44 seconds

In [4]:

Copied!

mde_value = 1.4
plot_mdes(mdes, y_value=mde_value, x_lim=30)
mde_value = 1.4
plot_mdes(mdes, y_value=mde_value, x_lim=30)

No description has been provided for this image

In [5]:

Copied!

get_length_print(mdes, mde_value)
get_length_print(mdes, mde_value)

Minimum experiment length to detect MDE of 1.4: None

Check that results with clustered ols are the same

In [6]:

Copied!





mde = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "clustered_ols",
    "time_col": "date",
    "target_col": "order_value",
    "cluster_cols": ["customer_id"]
})

start = time.time()
mdes = mde.mde_time_line(
    experiment_data,
    powers=[0.8],
    experiment_length=(7, 14, 21, 28),
    n_simulations=5
)
print(f"Time taken for OLS MDE calculation: {time.time() - start:.2f} seconds")

plot_mdes(mdes, y_value=mde_value, x_lim=30)
get_length_print(mdes, mde_value)
mde = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "clustered_ols",
    "time_col": "date",
    "target_col": "order_value",
    "cluster_cols": ["customer_id"]
})

start = time.time()
mdes = mde.mde_time_line(
    experiment_data,
    powers=[0.8],
    experiment_length=(7, 14, 21, 28),
    n_simulations=5
)
print(f"Time taken for OLS MDE calculation: {time.time() - start:.2f} seconds")

plot_mdes(mdes, y_value=mde_value, x_lim=30)
get_length_print(mdes, mde_value)

Time taken for OLS MDE calculation: 0.63 seconds
Minimum experiment length to detect MDE of 1.4: None

Results with clustered ols are the same as with delta method, but the delta method is faster and more efficient.

The age of the customer is a good predictor of the orde value that is not impacted by the treatment. We can use this covariate to adjust our analysis and decrease mde. We see that mde is smaller in this case, because we are using a covariate that is not impacted by the treatment.

In delta method, we use this form of CUPED for variance reduction:

$$ ATE = \frac{\sum_{i=1}^N T_i (Y_i - \sum_{j=1}^m \theta_j (Z_{ij} - E[Z_j]) N_i)}{\sum_{i=1}^N T_i N_i} - \frac{\sum_{i=1}^N (1 - T_i) (Y_i - \sum_{j=1}^m \theta_j (Z_{ij} - E[Z_j]) N_i)}{\sum_{i=1}^N (1 - T_i) N_i} $$

that is, we apply variance reduction to the numerator, but not the denominator. An issue is open to add the same variance reduction to the denominator, but it is not implemented yet. This follows the same notation as the Deng et al. paper on CUPED.

In [7]:

Copied!





mde_variance_reduction = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "delta",
    "time_col": "date",
    "target_col": "order_value",
    "scale_col": "one",
    "cluster_cols": ["customer_id"],
    "covariates": ["customer_age"]
})
mde_variance_reduction = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "delta",
    "time_col": "date",
    "target_col": "order_value",
    "scale_col": "one",
    "cluster_cols": ["customer_id"],
    "covariates": ["customer_age"]
})

In [8]:

Copied!





start = time.time()
mdes = []
# should be simplified with aggregation logic in NormalPowerAnalysis
for experiment_length in [7, 14, 21, 28]:
    filtered_data = experiment_data[experiment_data["date"] <= pd.Timestamp("2024-01-01") + pd.Timedelta(days=experiment_length)]
    aggregated_data = filtered_data.groupby("customer_id").agg({
        "order_value": "sum",
        "one": "sum",
        "customer_age": "mean",
        "date": "min"
    }).reset_index()

    mde = mde_variance_reduction.mde_time_line(
        aggregated_data,
        powers=[0.8],
        experiment_length=[experiment_length],
        n_simulations=5
    )
    mdes.append(mde[0])
print(f"Time taken for MDE with covariate calculation: {time.time() - start:.2f} seconds")
start = time.time()
mdes = []
# should be simplified with aggregation logic in NormalPowerAnalysis
for experiment_length in [7, 14, 21, 28]:
    filtered_data = experiment_data[experiment_data["date"] <= pd.Timestamp("2024-01-01") + pd.Timedelta(days=experiment_length)]
    aggregated_data = filtered_data.groupby("customer_id").agg({
        "order_value": "sum",
        "one": "sum",
        "customer_age": "mean",
        "date": "min"
    }).reset_index()

    mde = mde_variance_reduction.mde_time_line(
        aggregated_data,
        powers=[0.8],
        experiment_length=[experiment_length],
        n_simulations=5
    )
    mdes.append(mde[0])
print(f"Time taken for MDE with covariate calculation: {time.time() - start:.2f} seconds")

Time taken for MDE with covariate calculation: 0.20 seconds

In [9]:

Copied!

plot_mdes(mdes, y_value=mde_value, x_lim=30)
plot_mdes(mdes, y_value=mde_value, x_lim=30)

In [10]:

Copied!

get_length_print(mdes, mde_value)
get_length_print(mdes, mde_value)

Minimum experiment length to detect MDE of 1.4: 21

Now we repeat the same using clustered standard errors with OLS. We see that the results are the same as with delta method.

In [11]:

Copied!





mde_variance_reduction_ols = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "clustered_ols",
    "time_col": "date",
    "target_col": "order_value",
    "cluster_cols": ["customer_id"],
    "covariates": ["customer_age"]
})

start = time.time()
mdes_ols_covariates = mde_variance_reduction_ols.mde_time_line(
    experiment_data,
    powers=[0.8],
    experiment_length=[7, 14, 21, 28],
    n_simulations=5
)
print(f"Time taken for OLS with covariate MDE calculation: {time.time() - start:.2f} seconds")
mde_variance_reduction_ols = NormalPowerAnalysis.from_dict({
    "splitter": "clustered",
    "analysis": "clustered_ols",
    "time_col": "date",
    "target_col": "order_value",
    "cluster_cols": ["customer_id"],
    "covariates": ["customer_age"]
})

start = time.time()
mdes_ols_covariates = mde_variance_reduction_ols.mde_time_line(
    experiment_data,
    powers=[0.8],
    experiment_length=[7, 14, 21, 28],
    n_simulations=5
)
print(f"Time taken for OLS with covariate MDE calculation: {time.time() - start:.2f} seconds")

Time taken for OLS with covariate MDE calculation: 0.66 seconds

In [12]:

Copied!

plot_mdes(mdes_ols_covariates, y_value=mde_value, x_lim=30)
plot_mdes(mdes_ols_covariates, y_value=mde_value, x_lim=30)

This is the last example of power analysis.

We assume data from Feb onwards is used to run the power analysis (it's the data we take as experimental, though it is actually pre-experimental data). Data from Jan is taken as pre-experimental data. We simulate that the experiment happened after Feb. We do this because we need pre-experimental data to train the Cupac Model, which we didn't do before.

We use cupac model with customer id, historical orders and age features (order value has a non-linear relationship with customer id, that's why we don't add it a single covariate). We use the same data as before, but we train the model with pre-experimental data. We see that mde is smaller in this case, because we are using a better covariate that is not impacted by the treatment.

In this case we cannot init by dict because we're using cupac, but happy to review a PR that includes this :) In this case we need to create splitter and analysis classes.

In [13]:

Copied!

pre_experiment_df = experiment_data.query("date < '2024-02-01'")
experiment_df = experiment_data.query("date >= '2024-02-01'")
pre_experiment_df = experiment_data.query("date < '2024-02-01'")
experiment_df = experiment_data.query("date >= '2024-02-01'")

In [14]:

Copied!





from cluster_experiments import ClusteredSplitter, DeltaMethodAnalysis, ClusteredOLSAnalysis
from sklearn.ensemble import HistGradientBoostingRegressor


splitter = ClusteredSplitter(
    cluster_cols=["customer_id"],
)
delta = DeltaMethodAnalysis(
    target_col="order_value",
    scale_col="one",
    covariates=["estimate_order_value"],
    cluster_cols=["customer_id"],
)
ols = ClusteredOLSAnalysis(
    target_col="order_value",
    covariates=["estimate_order_value"],
    cluster_cols=["customer_id"],
)

pwr = NormalPowerAnalysis(
    splitter=splitter,
    analysis=delta,
    cupac_model=HistGradientBoostingRegressor(),
    time_col="date",
    target_col="order_value",
    scale_col="one",
    features_cupac_model=["customer_id", "customer_age", "historical_orders"],
)

pwr_ols = NormalPowerAnalysis(
    splitter=splitter,
    analysis=ols,
    time_col="date",
    target_col="order_value",
    cupac_model=HistGradientBoostingRegressor(),
    features_cupac_model=["customer_id", "customer_age", "historical_orders"],
)
from cluster_experiments import ClusteredSplitter, DeltaMethodAnalysis, ClusteredOLSAnalysis
from sklearn.ensemble import HistGradientBoostingRegressor


splitter = ClusteredSplitter(
    cluster_cols=["customer_id"],
)
delta = DeltaMethodAnalysis(
    target_col="order_value",
    scale_col="one",
    covariates=["estimate_order_value"],
    cluster_cols=["customer_id"],
)
ols = ClusteredOLSAnalysis(
    target_col="order_value",
    covariates=["estimate_order_value"],
    cluster_cols=["customer_id"],
)

pwr = NormalPowerAnalysis(
    splitter=splitter,
    analysis=delta,
    cupac_model=HistGradientBoostingRegressor(),
    time_col="date",
    target_col="order_value",
    scale_col="one",
    features_cupac_model=["customer_id", "customer_age", "historical_orders"],
)

pwr_ols = NormalPowerAnalysis(
    splitter=splitter,
    analysis=ols,
    time_col="date",
    target_col="order_value",
    cupac_model=HistGradientBoostingRegressor(),
    features_cupac_model=["customer_id", "customer_age", "historical_orders"],
)

In [15]:

Copied!





mdes_cupac = []
mde_cupac_ols = []

for experiment_length in range(7, 35, 7):
    filtered_data = experiment_df[experiment_df["date"] <= pd.Timestamp("2024-02-01") + pd.Timedelta(days=experiment_length)]
    aggregated_data = filtered_data.groupby("customer_id").agg({
        "order_value": "sum",
        "one": "sum",
        "customer_age": "mean",
        "historical_orders": "mean",
        "date": "min"
    }).reset_index()
    pre_experiment_aggregated_data = pre_experiment_df.groupby("customer_id").agg({
        "order_value": "sum",
        "one": "sum",
        "customer_age": "mean",
        "historical_orders": "mean",
        "date": "min"
    }).reset_index()

    timer_cupac_start = time.time()
    mde = pwr.mde_time_line(
        aggregated_data,
        pre_experiment_aggregated_data,
        powers=[0.8],
        experiment_length=[experiment_length],
        n_simulations=5
    )
    timer_cupac_end = time.time()
    print(f"Cupac MDE calculation for length {experiment_length} took {timer_cupac_end - timer_cupac_start:.2f} seconds")

    timer_ols_start = time.time()
    mde_ols = pwr_ols.mde_time_line(
        filtered_data,
        pre_experiment_df,
        powers=[0.8],
        experiment_length=[experiment_length],
        n_simulations=5
    )
    timer_ols_end = time.time()
    print(f"Cupac OLS MDE calculation for length {experiment_length} took {timer_ols_end - timer_ols_start:.2f} seconds")
    mdes_cupac.append(mde[0])
    mde_cupac_ols.append(mde_ols[0])


plot_mdes(mdes_cupac, y_value=mde_value, x_lim=30)
mdes_cupac = []
mde_cupac_ols = []

for experiment_length in range(7, 35, 7):
    filtered_data = experiment_df[experiment_df["date"] <= pd.Timestamp("2024-02-01") + pd.Timedelta(days=experiment_length)]
    aggregated_data = filtered_data.groupby("customer_id").agg({
        "order_value": "sum",
        "one": "sum",
        "customer_age": "mean",
        "historical_orders": "mean",
        "date": "min"
    }).reset_index()
    pre_experiment_aggregated_data = pre_experiment_df.groupby("customer_id").agg({
        "order_value": "sum",
        "one": "sum",
        "customer_age": "mean",
        "historical_orders": "mean",
        "date": "min"
    }).reset_index()

    timer_cupac_start = time.time()
    mde = pwr.mde_time_line(
        aggregated_data,
        pre_experiment_aggregated_data,
        powers=[0.8],
        experiment_length=[experiment_length],
        n_simulations=5
    )
    timer_cupac_end = time.time()
    print(f"Cupac MDE calculation for length {experiment_length} took {timer_cupac_end - timer_cupac_start:.2f} seconds")

    timer_ols_start = time.time()
    mde_ols = pwr_ols.mde_time_line(
        filtered_data,
        pre_experiment_df,
        powers=[0.8],
        experiment_length=[experiment_length],
        n_simulations=5
    )
    timer_ols_end = time.time()
    print(f"Cupac OLS MDE calculation for length {experiment_length} took {timer_ols_end - timer_ols_start:.2f} seconds")
    mdes_cupac.append(mde[0])
    mde_cupac_ols.append(mde_ols[0])


plot_mdes(mdes_cupac, y_value=mde_value, x_lim=30)

Cupac MDE calculation for length 7 took 0.62 seconds
Cupac OLS MDE calculation for length 7 took 0.73 seconds
Cupac MDE calculation for length 14 took 0.06 seconds
Cupac OLS MDE calculation for length 14 took 0.15 seconds
Cupac MDE calculation for length 21 took 0.06 seconds
Cupac OLS MDE calculation for length 21 took 0.21 seconds
Cupac MDE calculation for length 28 took 0.06 seconds
Cupac OLS MDE calculation for length 28 took 0.26 seconds

In [16]:

Copied!

plot_mdes(mde_cupac_ols, y_value=mde_value, x_lim=30)
plot_mdes(mde_cupac_ols, y_value=mde_value, x_lim=30)

In [17]:

Copied!

get_length_print(mdes_cupac, mde_value)
get_length_print(mdes_cupac, mde_value)

Minimum experiment length to detect MDE of 1.4: 14

Analysis¶

Now we run analysis assuming that the experiment run after 2024-03-01 for 3 weeks. We simulate some fake effects (1.5 in order value and 0.2 in delivery time). We use functionalities in cluster_experiments to simulate the experiment.

In [18]:

Copied!





real_experiment_data = experiment_data.query("date >= '2024-03-01' and date < '2024-03-21'")
real_pre_experiment_data = experiment_data.query("date < '2024-03-01'")
real_experiment_data

from cluster_experiments import ClusteredSplitter, ConstantPerturbator


# Add effect on the order value
splitter = ClusteredSplitter(
    cluster_cols=["customer_id"],
)
perturbator = ConstantPerturbator(
    target_col="order_value",
)

real_experiment_data = splitter.assign_treatment_df(real_experiment_data)
real_experiment_data = perturbator.perturbate(real_experiment_data, average_effect=1.5)

# Add effect on the delivery time
perturbator = ConstantPerturbator(
    target_col="delivery_time",
)
real_experiment_data = perturbator.perturbate(real_experiment_data, average_effect=.2)
real_experiment_data
real_experiment_data = experiment_data.query("date >= '2024-03-01' and date < '2024-03-21'")
real_pre_experiment_data = experiment_data.query("date < '2024-03-01'")
real_experiment_data

from cluster_experiments import ClusteredSplitter, ConstantPerturbator


# Add effect on the order value
splitter = ClusteredSplitter(
    cluster_cols=["customer_id"],
)
perturbator = ConstantPerturbator(
    target_col="order_value",
)

real_experiment_data = splitter.assign_treatment_df(real_experiment_data)
real_experiment_data = perturbator.perturbate(real_experiment_data, average_effect=1.5)

# Add effect on the delivery time
perturbator = ConstantPerturbator(
    target_col="delivery_time",
)
real_experiment_data = perturbator.perturbate(real_experiment_data, average_effect=.2)
real_experiment_data

Out[18]:

	customer_id	customer_age	historical_orders	order_value	delivery_time	city	date	one	treatment
0	4162	49	3	179.306483	10.075481	LA	2024-03-08	1	B
1	1076	58	5	196.404074	8.837389	LA	2024-03-08	1	B
2	5142	31	6	139.203140	7.468698	LA	2024-03-14	1	A
3	4825	54	2	184.653481	7.267328	LA	2024-03-16	1	A
4	2569	36	4	154.003198	7.125005	NYC	2024-03-14	1	A
...	...	...	...	...	...	...	...	...	...
22071	3869	38	3	171.082504	7.156785	LA	2024-03-13	1	B
22072	2598	41	4	147.810428	9.690305	NYC	2024-03-08	1	A
22073	2672	46	5	180.692472	7.126516	LA	2024-03-13	1	B
22074	3529	29	6	103.223538	9.117135	NYC	2024-03-15	1	B
22075	1834	38	3	142.912978	8.840475	NYC	2024-03-02	1	A

22076 rows × 9 columns

We run the analysis with and without covariate adjustment. We use city as a dimension, showing results in the overall population and by city.

We compare with OLS with clustered standard errors. We see that results are exactly the same.

In [19]:

Copied!





from cluster_experiments import AnalysisPlan, HypothesisTest, Variant, RatioMetric, SimpleMetric

plan = AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "numerator_name": "order_value", "denominator_name": "one"},
        {"alias": "delivery_time", "numerator_name": "delivery_time", "denominator_name": "one"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "dimensions": [
        {"name": "city", "values": ["NYC", "LA"]},
    ],
    "analysis_type": "delta",
    "analysis_config": {"cluster_cols": ["customer_id"]},
})
from cluster_experiments import AnalysisPlan, HypothesisTest, Variant, RatioMetric, SimpleMetric

plan = AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "numerator_name": "order_value", "denominator_name": "one"},
        {"alias": "delivery_time", "numerator_name": "delivery_time", "denominator_name": "one"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "dimensions": [
        {"name": "city", "values": ["NYC", "LA"]},
    ],
    "analysis_type": "delta",
    "analysis_config": {"cluster_cols": ["customer_id"]},
})

We see that CIs contain the true effect, and CIs are pretty big.

In [20]:

Copied!

# Run the analysis plan
df_plan = plan.analyze(real_experiment_data).to_dataframe()
df_plan
# Run the analysis plan
df_plan = plan.analyze(real_experiment_data).to_dataframe()
df_plan

Out[20]:

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	A	B	149.944691	150.758469	delta	0.813778	-1.102842	2.730398	0.405307	0.977885	__total_dimension	total	0.05
1	AOV	A	B	150.312273	151.164539	delta	0.852266	-1.279811	2.984343	0.433353	1.087814	city	NYC	0.05
2	AOV	A	B	149.584125	150.363999	delta	0.779873	-1.365121	2.924867	0.476093	1.094405	city	LA	0.05
3	delivery_time	A	B	7.996075	8.209871	delta	0.213796	0.171630	0.255962	0.000000	0.021514	__total_dimension	total	0.05
4	delivery_time	A	B	7.991050	8.202004	delta	0.210954	0.163212	0.258696	0.000000	0.024359	city	NYC	0.05
5	delivery_time	A	B	8.001005	8.217514	delta	0.216509	0.168266	0.264753	0.000000	0.024615	city	LA	0.05

In [21]:

Copied!





AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "name": "order_value"},
        {"alias": "delivery_time", "name": "delivery_time"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "analysis_type": "clustered_ols",
    "analysis_config": {"cluster_cols": ["customer_id"]},
}).analyze(real_experiment_data).to_dataframe()
AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "name": "order_value"},
        {"alias": "delivery_time", "name": "delivery_time"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "analysis_type": "clustered_ols",
    "analysis_config": {"cluster_cols": ["customer_id"]},
}).analyze(real_experiment_data).to_dataframe()

Out[21]:

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	A	B	149.944691	150.758469	clustered_ols	0.813778	-1.103050	2.730606	4.053575e-01	0.977992	__total_dimension	total	0.05
1	delivery_time	A	B	7.996075	8.209871	clustered_ols	0.213796	0.171626	0.255967	2.885569e-23	0.021516	__total_dimension	total	0.05

Now we use covariate adjustment with customer age. We see that the effect in order value is closer to the true effect, but the effect in delivery time is just as bad. This is because in our data generation process we used customer age as a covariate for order value but not for delivery time.

In [22]:

Copied!





plan_covariates = AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "numerator_name": "order_value", "denominator_name": "one"},
        {"alias": "delivery_time", "numerator_name": "delivery_time", "denominator_name": "one"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "analysis_type": "delta",
    "analysis_config": {"cluster_cols": ["customer_id"], "covariates": ["customer_age"]},
})
plan_covariates = AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "numerator_name": "order_value", "denominator_name": "one"},
        {"alias": "delivery_time", "numerator_name": "delivery_time", "denominator_name": "one"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "analysis_type": "delta",
    "analysis_config": {"cluster_cols": ["customer_id"], "covariates": ["customer_age"]},
})

In [23]:

Copied!





# Run the analysis plan -> Need to aggregate by customer_id for cuped to work with variance reduction
real_experiment_data_aggregated = real_experiment_data.groupby("customer_id").agg({
    "order_value": "sum",
    "one": "sum",
    "delivery_time": "sum",
    "customer_age": "mean",
    "historical_orders": "mean",
    "date": "min",
    "treatment": "first",
    "city": "first"
}).reset_index()

pre_real_experiment_data_aggregated = real_pre_experiment_data.groupby("customer_id").agg({
    "order_value": "sum",
    "one": "sum",
    "delivery_time": "sum",
    "customer_age": "mean",
    "historical_orders": "mean",
    "date": "min",
    "city": "first"
}).reset_index()


plan_covariates.analyze(real_experiment_data_aggregated).to_dataframe()
# Run the analysis plan -> Need to aggregate by customer_id for cuped to work with variance reduction
real_experiment_data_aggregated = real_experiment_data.groupby("customer_id").agg({
    "order_value": "sum",
    "one": "sum",
    "delivery_time": "sum",
    "customer_age": "mean",
    "historical_orders": "mean",
    "date": "min",
    "treatment": "first",
    "city": "first"
}).reset_index()

pre_real_experiment_data_aggregated = real_pre_experiment_data.groupby("customer_id").agg({
    "order_value": "sum",
    "one": "sum",
    "delivery_time": "sum",
    "customer_age": "mean",
    "historical_orders": "mean",
    "date": "min",
    "city": "first"
}).reset_index()


plan_covariates.analyze(real_experiment_data_aggregated).to_dataframe()

Out[23]:

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	A	B	149.944691	150.758469	delta	1.247213	0.297971	2.196455	0.010018	0.484316	__total_dimension	total	0.05
1	delivery_time	A	B	7.996075	8.209871	delta	0.213711	0.171539	0.255884	0.000000	0.021517	__total_dimension	total	0.05

Comparing with clustered OLS, pretty much the same results.

In [24]:

Copied!





AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "name": "order_value"},
        {"alias": "delivery_time", "name": "delivery_time"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "analysis_type": "clustered_ols",
    "analysis_config": {"cluster_cols": ["customer_id"], "covariates": ["customer_age"]},
}).analyze(real_experiment_data).to_dataframe()
AnalysisPlan.from_metrics_dict({
    "metrics": [
        {"alias": "AOV", "name": "order_value"},
        {"alias": "delivery_time", "name": "delivery_time"},
    ],
    "variants": [
        {"name": "A", "is_control": True},
        {"name": "B", "is_control": False},
    ],
    "variant_col": "treatment",
    "alpha": 0.05,
    "analysis_type": "clustered_ols",
    "analysis_config": {"cluster_cols": ["customer_id"], "covariates": ["customer_age"]},
}).analyze(real_experiment_data).to_dataframe()

Out[24]:

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	A	B	149.944691	150.758469	clustered_ols	1.247367	0.298029	2.196706	1.001642e-02	0.484365	__total_dimension	total	0.05
1	delivery_time	A	B	7.996075	8.209871	clustered_ols	0.213768	0.171596	0.255940	2.933129e-23	0.021517	__total_dimension	total	0.05

Now we run analysis using cupac model. We see that the CIs are smaller, and results in OLS and delta are kind of similar.

In [25]:

Copied!





plan_cupac = AnalysisPlan(
    tests=[
        HypothesisTest(
            metric=RatioMetric(alias="AOV", numerator_name="order_value", denominator_name="one"),
            analysis_type="delta",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["estimate_order_value"],
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "order_value",
                "scale_col": "one",
            },
        ),
        HypothesisTest(
            metric=RatioMetric(alias="delivery_time", numerator_name="delivery_time", denominator_name="one"),
            analysis_type="delta",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["estimate_delivery_time", "customer_age"], # adding to show how you can add more covariates
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "delivery_time",
                "scale_col": "one",
            },
        ),
    ],
    variants=[
        Variant(name="A", is_control=True),
        Variant(name="B", is_control=False),
    ],
    variant_col="treatment",
)
plan_cupac = AnalysisPlan(
    tests=[
        HypothesisTest(
            metric=RatioMetric(alias="AOV", numerator_name="order_value", denominator_name="one"),
            analysis_type="delta",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["estimate_order_value"],
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "order_value",
                "scale_col": "one",
            },
        ),
        HypothesisTest(
            metric=RatioMetric(alias="delivery_time", numerator_name="delivery_time", denominator_name="one"),
            analysis_type="delta",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["estimate_delivery_time", "customer_age"], # adding to show how you can add more covariates
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "delivery_time",
                "scale_col": "one",
            },
        ),
    ],
    variants=[
        Variant(name="A", is_control=True),
        Variant(name="B", is_control=False),
    ],
    variant_col="treatment",
)

In [26]:

Copied!





plan_cupac.analyze(
    real_experiment_data_aggregated,
    pre_real_experiment_data_aggregated
).to_dataframe()
plan_cupac.analyze(
    real_experiment_data_aggregated,
    pre_real_experiment_data_aggregated
).to_dataframe()

Out[26]:

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	A	B	149.944691	150.758469	delta	1.242198	0.361807	2.122589	0.005685	0.449187	__total_dimension	total	0.05
1	delivery_time	A	B	7.996075	8.209871	delta	0.221000	0.192007	0.249993	0.000000	0.014793	__total_dimension	total	0.05

Comparison with clustered OLS

In [27]:

Copied!





plan_cupac_ols = AnalysisPlan(
    tests=[
        HypothesisTest(
            metric=SimpleMetric(alias="AOV", name="order_value"),
            analysis_type="clustered_ols",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["estimate_order_value"],
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "order_value",
            },
        ),
        HypothesisTest(
            metric=SimpleMetric(alias="delivery_time", name="delivery_time"),
            analysis_type="clustered_ols",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["customer_age", "estimate_delivery_time"], # adding to show how you can add more covariates
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "delivery_time",
            },
        ),
    ],
    variants=[
        Variant(name="A", is_control=True),
        Variant(name="B", is_control=False),
    ],
    variant_col="treatment",
)
plan_cupac_ols = AnalysisPlan(
    tests=[
        HypothesisTest(
            metric=SimpleMetric(alias="AOV", name="order_value"),
            analysis_type="clustered_ols",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["estimate_order_value"],
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "order_value",
            },
        ),
        HypothesisTest(
            metric=SimpleMetric(alias="delivery_time", name="delivery_time"),
            analysis_type="clustered_ols",
            analysis_config={
                "cluster_cols": ["customer_id"],
                "covariates": ["customer_age", "estimate_delivery_time"], # adding to show how you can add more covariates
            },
            cupac_config={
                "cupac_model": HistGradientBoostingRegressor(),
                "features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
                "target_col": "delivery_time",
            },
        ),
    ],
    variants=[
        Variant(name="A", is_control=True),
        Variant(name="B", is_control=False),
    ],
    variant_col="treatment",
)

In [28]:

Copied!





plan_cupac_ols.analyze(
    real_experiment_data,
    real_pre_experiment_data
).to_dataframe()
plan_cupac_ols.analyze(
    real_experiment_data,
    real_pre_experiment_data
).to_dataframe()

Out[28]:

	metric_alias	control_variant_name	treatment_variant_name	control_variant_mean	treatment_variant_mean	analysis_type	ate	ate_ci_lower	ate_ci_upper	p_value	std_error	dimension_name	dimension_value	alpha
0	AOV	A	B	149.944691	150.758469	clustered_ols	1.174836	0.315865	2.033807	7.347039e-03	0.438259	__total_dimension	total	0.05
1	delivery_time	A	B	7.996075	8.209871	clustered_ols	0.200532	0.171478	0.229585	1.066636e-41	0.014823	__total_dimension	total	0.05

In [ ]: