End-to-end example¶
We'll show the different functionalities of delta method cluster_experiments, which are:
- MDE calculation in cluster-randomized experiments with delta method and clustered standard errors.
- MDE calculation with simple covariate adjustment using delta method.
- MDE calculation with cupac (adjustment via ML models) using delta method.
- Inference for all the cases above using delta method, and comparing with OLS with clustered standard errors.
Data generation¶
We create some pre-experimental data that we could use to run power analysis.
We have a dataframe with orders and customers, each customer may have many orders, and the two target metrics are delivery time and order value.
import random
import warnings
import time
import numpy as np
import pandas as pd
import seaborn as sns
from cluster_experiments import NormalPowerAnalysis
warnings.filterwarnings('ignore')
np.random.seed(42)
random.seed(42)
# Constants
N = 100000 # Number of orders
NUM_CUSTOMERS = 6000 # Unique customers
def generate_customers(num_customers):
"""Generate unique customers with a mean order value based on age."""
customer_ids = np.arange(1, num_customers + 1)
customer_ages = np.random.randint(20, 60, size=num_customers)
customer_historical_orders = np.random.poisson(5, size=num_customers)
mean_order_values = 50 + 2.5 * customer_ages - 2 * (customer_ages <= 30) + 2 * (customer_historical_orders >= 8) + np.random.normal(0, 15, size=num_customers)
return pd.DataFrame({
"customer_id": customer_ids,
"customer_age": customer_ages,
"mean_order_value": mean_order_values,
"historical_orders": customer_historical_orders
})
def sample_orders(customers, num_orders):
"""Sample customers and generate order-level data."""
sampled_customers = np.random.choice(customers["customer_id"], size=num_orders)
return pd.DataFrame({"customer_id": sampled_customers}).merge(customers, on="customer_id", how="left")
def generate_orders(customers, num_orders):
"""Full order generation pipeline using .assign() for cleaner transformations."""
date_range = pd.date_range(start="2024-01-01", end="2024-03-31")
return (
sample_orders(customers, num_orders)
.assign(
order_value=lambda df: df["mean_order_value"] + np.random.normal(0, 15, size=len(df)),
delivery_time=lambda df: 8 + np.sin(df["customer_id"] / 10) + np.random.normal(0, 0.5, size=len(df)),
city=lambda df: np.random.choice(["NYC", "LA"], size=len(df)),
date=lambda df: np.random.choice(date_range, size=len(df))
)
.drop(columns=["mean_order_value"]) # Remove intermediate column
)
def plot_mdes(mdes, x_lim=40, y_value=3):
sns.lineplot(
data=pd.DataFrame(mdes),
x="experiment_length",
y="mde",
)
sns.lineplot(
x=[0, x_lim],
y=[y_value, y_value],
color="red",
linestyle="--",
)
def get_length(mdes, mde_value):
if any(x["mde"] <= mde_value for x in mdes):
return min(x["experiment_length"] for x in mdes if x["mde"] < mde_value)
return None
def get_length_print(mdes, mde_value):
length = get_length(mdes, mde_value)
print(f"Minimum experiment length to detect MDE of {mde_value}: {length}")
# Run the pipeline
customers = generate_customers(NUM_CUSTOMERS)
experiment_data = generate_orders(customers, N).assign(
one=1
)
print(experiment_data.head())
customer_id customer_age historical_orders order_value delivery_time \ 0 4162 49 3 177.806483 9.875481 1 1076 58 5 194.904074 8.637389 2 5361 38 2 153.913243 9.020768 3 2036 35 4 138.692537 8.016001 4 390 22 6 92.457381 8.753228 city date one 0 LA 2024-03-08 1 1 LA 2024-03-08 1 2 NYC 2024-02-06 1 3 NYC 2024-02-05 1 4 NYC 2024-03-29 1
Power analysis¶
Customer-level split¶
Assume we randomize at customer level, in this case we need to use delta to run the power analysis. The following code shows the mde on order_value given some experiment length (1, 2, 3 and 4 weeks).
mde = NormalPowerAnalysis.from_dict({
"splitter": "clustered",
"analysis": "delta",
"time_col": "date",
"target_col": "order_value",
"scale_col": "one",
"cluster_cols": ["customer_id"]
})
start = time.time()
mdes = mde.mde_time_line(
experiment_data,
powers=[0.8],
experiment_length=(7, 14, 21, 28),
n_simulations=5
)
print(f"Time taken for MDE calculation: {time.time() - start:.2f} seconds")
Time taken for MDE calculation: 0.44 seconds
mde_value = 1.4
plot_mdes(mdes, y_value=mde_value, x_lim=30)
get_length_print(mdes, mde_value)
Minimum experiment length to detect MDE of 1.4: None
Check that results with clustered ols are the same
mde = NormalPowerAnalysis.from_dict({
"splitter": "clustered",
"analysis": "clustered_ols",
"time_col": "date",
"target_col": "order_value",
"cluster_cols": ["customer_id"]
})
start = time.time()
mdes = mde.mde_time_line(
experiment_data,
powers=[0.8],
experiment_length=(7, 14, 21, 28),
n_simulations=5
)
print(f"Time taken for OLS MDE calculation: {time.time() - start:.2f} seconds")
plot_mdes(mdes, y_value=mde_value, x_lim=30)
get_length_print(mdes, mde_value)
Time taken for OLS MDE calculation: 0.63 seconds Minimum experiment length to detect MDE of 1.4: None
Results with clustered ols are the same as with delta method, but the delta method is faster and more efficient.
The age of the customer is a good predictor of the orde value that is not impacted by the treatment. We can use this covariate to adjust our analysis and decrease mde. We see that mde is smaller in this case, because we are using a covariate that is not impacted by the treatment.
In delta method, we use this form of CUPED for variance reduction:
$$ ATE = \frac{\sum_{i=1}^N T_i (Y_i - \sum_{j=1}^m \theta_j (Z_{ij} - E[Z_j]) N_i)}{\sum_{i=1}^N T_i N_i} - \frac{\sum_{i=1}^N (1 - T_i) (Y_i - \sum_{j=1}^m \theta_j (Z_{ij} - E[Z_j]) N_i)}{\sum_{i=1}^N (1 - T_i) N_i} $$
that is, we apply variance reduction to the numerator, but not the denominator. An issue is open to add the same variance reduction to the denominator, but it is not implemented yet. This follows the same notation as the Deng et al. paper on CUPED.
mde_variance_reduction = NormalPowerAnalysis.from_dict({
"splitter": "clustered",
"analysis": "delta",
"time_col": "date",
"target_col": "order_value",
"scale_col": "one",
"cluster_cols": ["customer_id"],
"covariates": ["customer_age"]
})
start = time.time()
mdes = []
# should be simplified with aggregation logic in NormalPowerAnalysis
for experiment_length in [7, 14, 21, 28]:
filtered_data = experiment_data[experiment_data["date"] <= pd.Timestamp("2024-01-01") + pd.Timedelta(days=experiment_length)]
aggregated_data = filtered_data.groupby("customer_id").agg({
"order_value": "sum",
"one": "sum",
"customer_age": "mean",
"date": "min"
}).reset_index()
mde = mde_variance_reduction.mde_time_line(
aggregated_data,
powers=[0.8],
experiment_length=[experiment_length],
n_simulations=5
)
mdes.append(mde[0])
print(f"Time taken for MDE with covariate calculation: {time.time() - start:.2f} seconds")
Time taken for MDE with covariate calculation: 0.20 seconds
plot_mdes(mdes, y_value=mde_value, x_lim=30)
get_length_print(mdes, mde_value)
Minimum experiment length to detect MDE of 1.4: 21
Now we repeat the same using clustered standard errors with OLS. We see that the results are the same as with delta method.
mde_variance_reduction_ols = NormalPowerAnalysis.from_dict({
"splitter": "clustered",
"analysis": "clustered_ols",
"time_col": "date",
"target_col": "order_value",
"cluster_cols": ["customer_id"],
"covariates": ["customer_age"]
})
start = time.time()
mdes_ols_covariates = mde_variance_reduction_ols.mde_time_line(
experiment_data,
powers=[0.8],
experiment_length=[7, 14, 21, 28],
n_simulations=5
)
print(f"Time taken for OLS with covariate MDE calculation: {time.time() - start:.2f} seconds")
Time taken for OLS with covariate MDE calculation: 0.66 seconds
plot_mdes(mdes_ols_covariates, y_value=mde_value, x_lim=30)
This is the last example of power analysis.
We assume data from Feb onwards is used to run the power analysis (it's the data we take as experimental, though it is actually pre-experimental data). Data from Jan is taken as pre-experimental data. We simulate that the experiment happened after Feb. We do this because we need pre-experimental data to train the Cupac Model, which we didn't do before.
We use cupac model with customer id, historical orders and age features (order value has a non-linear relationship with customer id, that's why we don't add it a single covariate). We use the same data as before, but we train the model with pre-experimental data. We see that mde is smaller in this case, because we are using a better covariate that is not impacted by the treatment.
In this case we cannot init by dict because we're using cupac, but happy to review a PR that includes this :) In this case we need to create splitter and analysis classes.
pre_experiment_df = experiment_data.query("date < '2024-02-01'")
experiment_df = experiment_data.query("date >= '2024-02-01'")
from cluster_experiments import ClusteredSplitter, DeltaMethodAnalysis, ClusteredOLSAnalysis
from sklearn.ensemble import HistGradientBoostingRegressor
splitter = ClusteredSplitter(
cluster_cols=["customer_id"],
)
delta = DeltaMethodAnalysis(
target_col="order_value",
scale_col="one",
covariates=["estimate_order_value"],
cluster_cols=["customer_id"],
)
ols = ClusteredOLSAnalysis(
target_col="order_value",
covariates=["estimate_order_value"],
cluster_cols=["customer_id"],
)
pwr = NormalPowerAnalysis(
splitter=splitter,
analysis=delta,
cupac_model=HistGradientBoostingRegressor(),
time_col="date",
target_col="order_value",
scale_col="one",
features_cupac_model=["customer_id", "customer_age", "historical_orders"],
)
pwr_ols = NormalPowerAnalysis(
splitter=splitter,
analysis=ols,
time_col="date",
target_col="order_value",
cupac_model=HistGradientBoostingRegressor(),
features_cupac_model=["customer_id", "customer_age", "historical_orders"],
)
mdes_cupac = []
mde_cupac_ols = []
for experiment_length in range(7, 35, 7):
filtered_data = experiment_df[experiment_df["date"] <= pd.Timestamp("2024-02-01") + pd.Timedelta(days=experiment_length)]
aggregated_data = filtered_data.groupby("customer_id").agg({
"order_value": "sum",
"one": "sum",
"customer_age": "mean",
"historical_orders": "mean",
"date": "min"
}).reset_index()
pre_experiment_aggregated_data = pre_experiment_df.groupby("customer_id").agg({
"order_value": "sum",
"one": "sum",
"customer_age": "mean",
"historical_orders": "mean",
"date": "min"
}).reset_index()
timer_cupac_start = time.time()
mde = pwr.mde_time_line(
aggregated_data,
pre_experiment_aggregated_data,
powers=[0.8],
experiment_length=[experiment_length],
n_simulations=5
)
timer_cupac_end = time.time()
print(f"Cupac MDE calculation for length {experiment_length} took {timer_cupac_end - timer_cupac_start:.2f} seconds")
timer_ols_start = time.time()
mde_ols = pwr_ols.mde_time_line(
filtered_data,
pre_experiment_df,
powers=[0.8],
experiment_length=[experiment_length],
n_simulations=5
)
timer_ols_end = time.time()
print(f"Cupac OLS MDE calculation for length {experiment_length} took {timer_ols_end - timer_ols_start:.2f} seconds")
mdes_cupac.append(mde[0])
mde_cupac_ols.append(mde_ols[0])
plot_mdes(mdes_cupac, y_value=mde_value, x_lim=30)
Cupac MDE calculation for length 7 took 0.62 seconds Cupac OLS MDE calculation for length 7 took 0.73 seconds Cupac MDE calculation for length 14 took 0.06 seconds Cupac OLS MDE calculation for length 14 took 0.15 seconds Cupac MDE calculation for length 21 took 0.06 seconds Cupac OLS MDE calculation for length 21 took 0.21 seconds Cupac MDE calculation for length 28 took 0.06 seconds Cupac OLS MDE calculation for length 28 took 0.26 seconds
plot_mdes(mde_cupac_ols, y_value=mde_value, x_lim=30)
get_length_print(mdes_cupac, mde_value)
Minimum experiment length to detect MDE of 1.4: 14
Analysis¶
Now we run analysis assuming that the experiment run after 2024-03-01 for 3 weeks. We simulate some fake effects (1.5 in order value and 0.2 in delivery time). We use functionalities in cluster_experiments to simulate the experiment.
real_experiment_data = experiment_data.query("date >= '2024-03-01' and date < '2024-03-21'")
real_pre_experiment_data = experiment_data.query("date < '2024-03-01'")
real_experiment_data
from cluster_experiments import ClusteredSplitter, ConstantPerturbator
# Add effect on the order value
splitter = ClusteredSplitter(
cluster_cols=["customer_id"],
)
perturbator = ConstantPerturbator(
target_col="order_value",
)
real_experiment_data = splitter.assign_treatment_df(real_experiment_data)
real_experiment_data = perturbator.perturbate(real_experiment_data, average_effect=1.5)
# Add effect on the delivery time
perturbator = ConstantPerturbator(
target_col="delivery_time",
)
real_experiment_data = perturbator.perturbate(real_experiment_data, average_effect=.2)
real_experiment_data
customer_id | customer_age | historical_orders | order_value | delivery_time | city | date | one | treatment | |
---|---|---|---|---|---|---|---|---|---|
0 | 4162 | 49 | 3 | 179.306483 | 10.075481 | LA | 2024-03-08 | 1 | B |
1 | 1076 | 58 | 5 | 196.404074 | 8.837389 | LA | 2024-03-08 | 1 | B |
2 | 5142 | 31 | 6 | 139.203140 | 7.468698 | LA | 2024-03-14 | 1 | A |
3 | 4825 | 54 | 2 | 184.653481 | 7.267328 | LA | 2024-03-16 | 1 | A |
4 | 2569 | 36 | 4 | 154.003198 | 7.125005 | NYC | 2024-03-14 | 1 | A |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
22071 | 3869 | 38 | 3 | 171.082504 | 7.156785 | LA | 2024-03-13 | 1 | B |
22072 | 2598 | 41 | 4 | 147.810428 | 9.690305 | NYC | 2024-03-08 | 1 | A |
22073 | 2672 | 46 | 5 | 180.692472 | 7.126516 | LA | 2024-03-13 | 1 | B |
22074 | 3529 | 29 | 6 | 103.223538 | 9.117135 | NYC | 2024-03-15 | 1 | B |
22075 | 1834 | 38 | 3 | 142.912978 | 8.840475 | NYC | 2024-03-02 | 1 | A |
22076 rows × 9 columns
We run the analysis with and without covariate adjustment. We use city as a dimension, showing results in the overall population and by city.
We compare with OLS with clustered standard errors. We see that results are exactly the same.
from cluster_experiments import AnalysisPlan, HypothesisTest, Variant, RatioMetric, SimpleMetric
plan = AnalysisPlan.from_metrics_dict({
"metrics": [
{"alias": "AOV", "numerator_name": "order_value", "denominator_name": "one"},
{"alias": "delivery_time", "numerator_name": "delivery_time", "denominator_name": "one"},
],
"variants": [
{"name": "A", "is_control": True},
{"name": "B", "is_control": False},
],
"variant_col": "treatment",
"alpha": 0.05,
"dimensions": [
{"name": "city", "values": ["NYC", "LA"]},
],
"analysis_type": "delta",
"analysis_config": {"cluster_cols": ["customer_id"]},
})
We see that CIs contain the true effect, and CIs are pretty big.
# Run the analysis plan
df_plan = plan.analyze(real_experiment_data).to_dataframe()
df_plan
metric_alias | control_variant_name | treatment_variant_name | control_variant_mean | treatment_variant_mean | analysis_type | ate | ate_ci_lower | ate_ci_upper | p_value | std_error | dimension_name | dimension_value | alpha | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AOV | A | B | 149.944691 | 150.758469 | delta | 0.813778 | -1.102842 | 2.730398 | 0.405307 | 0.977885 | __total_dimension | total | 0.05 |
1 | AOV | A | B | 150.312273 | 151.164539 | delta | 0.852266 | -1.279811 | 2.984343 | 0.433353 | 1.087814 | city | NYC | 0.05 |
2 | AOV | A | B | 149.584125 | 150.363999 | delta | 0.779873 | -1.365121 | 2.924867 | 0.476093 | 1.094405 | city | LA | 0.05 |
3 | delivery_time | A | B | 7.996075 | 8.209871 | delta | 0.213796 | 0.171630 | 0.255962 | 0.000000 | 0.021514 | __total_dimension | total | 0.05 |
4 | delivery_time | A | B | 7.991050 | 8.202004 | delta | 0.210954 | 0.163212 | 0.258696 | 0.000000 | 0.024359 | city | NYC | 0.05 |
5 | delivery_time | A | B | 8.001005 | 8.217514 | delta | 0.216509 | 0.168266 | 0.264753 | 0.000000 | 0.024615 | city | LA | 0.05 |
AnalysisPlan.from_metrics_dict({
"metrics": [
{"alias": "AOV", "name": "order_value"},
{"alias": "delivery_time", "name": "delivery_time"},
],
"variants": [
{"name": "A", "is_control": True},
{"name": "B", "is_control": False},
],
"variant_col": "treatment",
"alpha": 0.05,
"analysis_type": "clustered_ols",
"analysis_config": {"cluster_cols": ["customer_id"]},
}).analyze(real_experiment_data).to_dataframe()
metric_alias | control_variant_name | treatment_variant_name | control_variant_mean | treatment_variant_mean | analysis_type | ate | ate_ci_lower | ate_ci_upper | p_value | std_error | dimension_name | dimension_value | alpha | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AOV | A | B | 149.944691 | 150.758469 | clustered_ols | 0.813778 | -1.103050 | 2.730606 | 4.053575e-01 | 0.977992 | __total_dimension | total | 0.05 |
1 | delivery_time | A | B | 7.996075 | 8.209871 | clustered_ols | 0.213796 | 0.171626 | 0.255967 | 2.885569e-23 | 0.021516 | __total_dimension | total | 0.05 |
Now we use covariate adjustment with customer age. We see that the effect in order value is closer to the true effect, but the effect in delivery time is just as bad. This is because in our data generation process we used customer age as a covariate for order value but not for delivery time.
plan_covariates = AnalysisPlan.from_metrics_dict({
"metrics": [
{"alias": "AOV", "numerator_name": "order_value", "denominator_name": "one"},
{"alias": "delivery_time", "numerator_name": "delivery_time", "denominator_name": "one"},
],
"variants": [
{"name": "A", "is_control": True},
{"name": "B", "is_control": False},
],
"variant_col": "treatment",
"alpha": 0.05,
"analysis_type": "delta",
"analysis_config": {"cluster_cols": ["customer_id"], "covariates": ["customer_age"]},
})
# Run the analysis plan -> Need to aggregate by customer_id for cuped to work with variance reduction
real_experiment_data_aggregated = real_experiment_data.groupby("customer_id").agg({
"order_value": "sum",
"one": "sum",
"delivery_time": "sum",
"customer_age": "mean",
"historical_orders": "mean",
"date": "min",
"treatment": "first",
"city": "first"
}).reset_index()
pre_real_experiment_data_aggregated = real_pre_experiment_data.groupby("customer_id").agg({
"order_value": "sum",
"one": "sum",
"delivery_time": "sum",
"customer_age": "mean",
"historical_orders": "mean",
"date": "min",
"city": "first"
}).reset_index()
plan_covariates.analyze(real_experiment_data_aggregated).to_dataframe()
metric_alias | control_variant_name | treatment_variant_name | control_variant_mean | treatment_variant_mean | analysis_type | ate | ate_ci_lower | ate_ci_upper | p_value | std_error | dimension_name | dimension_value | alpha | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AOV | A | B | 149.944691 | 150.758469 | delta | 1.247213 | 0.297971 | 2.196455 | 0.010018 | 0.484316 | __total_dimension | total | 0.05 |
1 | delivery_time | A | B | 7.996075 | 8.209871 | delta | 0.213711 | 0.171539 | 0.255884 | 0.000000 | 0.021517 | __total_dimension | total | 0.05 |
Comparing with clustered OLS, pretty much the same results.
AnalysisPlan.from_metrics_dict({
"metrics": [
{"alias": "AOV", "name": "order_value"},
{"alias": "delivery_time", "name": "delivery_time"},
],
"variants": [
{"name": "A", "is_control": True},
{"name": "B", "is_control": False},
],
"variant_col": "treatment",
"alpha": 0.05,
"analysis_type": "clustered_ols",
"analysis_config": {"cluster_cols": ["customer_id"], "covariates": ["customer_age"]},
}).analyze(real_experiment_data).to_dataframe()
metric_alias | control_variant_name | treatment_variant_name | control_variant_mean | treatment_variant_mean | analysis_type | ate | ate_ci_lower | ate_ci_upper | p_value | std_error | dimension_name | dimension_value | alpha | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AOV | A | B | 149.944691 | 150.758469 | clustered_ols | 1.247367 | 0.298029 | 2.196706 | 1.001642e-02 | 0.484365 | __total_dimension | total | 0.05 |
1 | delivery_time | A | B | 7.996075 | 8.209871 | clustered_ols | 0.213768 | 0.171596 | 0.255940 | 2.933129e-23 | 0.021517 | __total_dimension | total | 0.05 |
Now we run analysis using cupac model. We see that the CIs are smaller, and results in OLS and delta are kind of similar.
plan_cupac = AnalysisPlan(
tests=[
HypothesisTest(
metric=RatioMetric(alias="AOV", numerator_name="order_value", denominator_name="one"),
analysis_type="delta",
analysis_config={
"cluster_cols": ["customer_id"],
"covariates": ["estimate_order_value"],
},
cupac_config={
"cupac_model": HistGradientBoostingRegressor(),
"features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
"target_col": "order_value",
"scale_col": "one",
},
),
HypothesisTest(
metric=RatioMetric(alias="delivery_time", numerator_name="delivery_time", denominator_name="one"),
analysis_type="delta",
analysis_config={
"cluster_cols": ["customer_id"],
"covariates": ["estimate_delivery_time", "customer_age"], # adding to show how you can add more covariates
},
cupac_config={
"cupac_model": HistGradientBoostingRegressor(),
"features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
"target_col": "delivery_time",
"scale_col": "one",
},
),
],
variants=[
Variant(name="A", is_control=True),
Variant(name="B", is_control=False),
],
variant_col="treatment",
)
plan_cupac.analyze(
real_experiment_data_aggregated,
pre_real_experiment_data_aggregated
).to_dataframe()
metric_alias | control_variant_name | treatment_variant_name | control_variant_mean | treatment_variant_mean | analysis_type | ate | ate_ci_lower | ate_ci_upper | p_value | std_error | dimension_name | dimension_value | alpha | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AOV | A | B | 149.944691 | 150.758469 | delta | 1.242198 | 0.361807 | 2.122589 | 0.005685 | 0.449187 | __total_dimension | total | 0.05 |
1 | delivery_time | A | B | 7.996075 | 8.209871 | delta | 0.221000 | 0.192007 | 0.249993 | 0.000000 | 0.014793 | __total_dimension | total | 0.05 |
Comparison with clustered OLS
plan_cupac_ols = AnalysisPlan(
tests=[
HypothesisTest(
metric=SimpleMetric(alias="AOV", name="order_value"),
analysis_type="clustered_ols",
analysis_config={
"cluster_cols": ["customer_id"],
"covariates": ["estimate_order_value"],
},
cupac_config={
"cupac_model": HistGradientBoostingRegressor(),
"features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
"target_col": "order_value",
},
),
HypothesisTest(
metric=SimpleMetric(alias="delivery_time", name="delivery_time"),
analysis_type="clustered_ols",
analysis_config={
"cluster_cols": ["customer_id"],
"covariates": ["customer_age", "estimate_delivery_time"], # adding to show how you can add more covariates
},
cupac_config={
"cupac_model": HistGradientBoostingRegressor(),
"features_cupac_model": ["customer_id", "customer_age", "historical_orders"],
"target_col": "delivery_time",
},
),
],
variants=[
Variant(name="A", is_control=True),
Variant(name="B", is_control=False),
],
variant_col="treatment",
)
plan_cupac_ols.analyze(
real_experiment_data,
real_pre_experiment_data
).to_dataframe()
metric_alias | control_variant_name | treatment_variant_name | control_variant_mean | treatment_variant_mean | analysis_type | ate | ate_ci_lower | ate_ci_upper | p_value | std_error | dimension_name | dimension_value | alpha | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AOV | A | B | 149.944691 | 150.758469 | clustered_ols | 1.174836 | 0.315865 | 2.033807 | 7.347039e-03 | 0.438259 | __total_dimension | total | 0.05 |
1 | delivery_time | A | B | 7.996075 | 8.209871 | clustered_ols | 0.200532 | 0.171478 | 0.229585 | 1.066636e-41 | 0.014823 | __total_dimension | total | 0.05 |