Custom classes
Examples on how to create:
- a custom perturbator
- a custom splitter
- a custom hypothesis test
The names of you custom classes don't need to be CustomX, they are completely free. The only requirement is that they inherit from the base class. For example, if you want to create a custom perturbator, you need to inherit from the Perturbator base class. The same applies to the other classes.
In [1]:
Copied!
from cluster_experiments import ExperimentAnalysis
import pandas as pd
from scipy.stats import ttest_ind
class CustomExperimentAnalysis(ExperimentAnalysis):
def analysis_pvalue(self, df: pd.DataFrame, verbose: bool = True) -> float:
treatment_data = df.query(f"{self.treatment_col} == 1")[self.target_col]
control_data = df.query(f"{self.treatment_col} == 0")[self.target_col]
t_test_results = ttest_ind(treatment_data, control_data, equal_var=False)
return t_test_results.pvalue
from cluster_experiments import ExperimentAnalysis
import pandas as pd
from scipy.stats import ttest_ind
class CustomExperimentAnalysis(ExperimentAnalysis):
def analysis_pvalue(self, df: pd.DataFrame, verbose: bool = True) -> float:
treatment_data = df.query(f"{self.treatment_col} == 1")[self.target_col]
control_data = df.query(f"{self.treatment_col} == 0")[self.target_col]
t_test_results = ttest_ind(treatment_data, control_data, equal_var=False)
return t_test_results.pvalue
In [2]:
Copied!
from cluster_experiments import RandomSplitter
import numpy as np
class CustomRandomSplitter(RandomSplitter):
def assign_treatment_df(self, df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
# Power users get treatment with 90% probability
df_power_users = df.query("power_user")
df_power_users[self.treatment_col] = np.random.choice(
["A", "B"], size=len(df_power_users), p=[0.1, 0.9]
)
# Non-power users get treatment with 10% probability
df_non_power_users = df.query("not power_user")
df_non_power_users[self.treatment_col] = np.random.choice(
["A", "B"], size=len(df_non_power_users), p=[0.9, 0.1]
)
return pd.concat([df_power_users, df_non_power_users])
from cluster_experiments import RandomSplitter
import numpy as np
class CustomRandomSplitter(RandomSplitter):
def assign_treatment_df(self, df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
# Power users get treatment with 90% probability
df_power_users = df.query("power_user")
df_power_users[self.treatment_col] = np.random.choice(
["A", "B"], size=len(df_power_users), p=[0.1, 0.9]
)
# Non-power users get treatment with 10% probability
df_non_power_users = df.query("not power_user")
df_non_power_users[self.treatment_col] = np.random.choice(
["A", "B"], size=len(df_non_power_users), p=[0.9, 0.1]
)
return pd.concat([df_power_users, df_non_power_users])
In [3]:
Copied!
from cluster_experiments import Perturbator
import pandas as pd
class CustomPerturbator(Perturbator):
def perturbate(self, df: pd.DataFrame, average_effect: float) -> pd.DataFrame:
df = df.copy().reset_index(drop=True)
n = (df[self.treatment_col] == self.treatment).sum()
df.loc[
df[self.treatment_col] == self.treatment, self.target_col
] += np.random.normal(average_effect, 1, size=n)
return df
from cluster_experiments import Perturbator
import pandas as pd
class CustomPerturbator(Perturbator):
def perturbate(self, df: pd.DataFrame, average_effect: float) -> pd.DataFrame:
df = df.copy().reset_index(drop=True)
n = (df[self.treatment_col] == self.treatment).sum()
df.loc[
df[self.treatment_col] == self.treatment, self.target_col
] += np.random.normal(average_effect, 1, size=n)
return df