Evaluation¶
Results of an experiment evaluation.
Source code in epstats/toolkit/experiment.py
class Evaluation:
"""
Results of an experiment evaluation.
"""
def __init__(self, metrics: pd.DataFrame, checks: pd.DataFrame, exposures: pd.DataFrame):
self.metrics = metrics
self.checks = checks
self.exposures = exposures
@classmethod
def metric_columns(cls) -> List[str]:
"""
`metrics` dataframe with columns:
1. `timestamp` - timestamp of evaluation
1. `exp_id` - experiment id
1. `metric_id` - metric id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
1. `metric_name` - metric name as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
1. `exp_variant_id` - variant id
1. `count` - number of exposures, value of metric denominator
1. `mean` - `sum_value` / `count`
1. `std` - sample standard deviation
1. `sum_value` - value of goals, value of metric nominator
1. `confidence_level` - current confidence level used to calculate `p_value` and `confidence_interval`
1. `diff` - relative diff between sample means of this and control variant
1. `test_stat` - value of test statistic of the relative difference in means
1. `p_value` - p-value of the test statistic under current `confidence_level`
1. `confidence_interval` - confidence interval of the `diff` under current `confidence_level`
1. `standard_error` - standard error of the `diff`
1. `degrees_of_freedom` - degrees of freedom of this variant mean
1. `sample_size` - current sample size
1. `required_sample_size` - size of the sample required to reach the required power
1. `power` - power based on the collected `sample_size`
"""
return [
"timestamp",
"exp_id",
"metric_id",
"metric_name",
"exp_variant_id",
"count",
"mean",
"std",
"sum_value",
"confidence_level",
"diff",
"test_stat",
"p_value",
"confidence_interval",
"standard_error",
"degrees_of_freedom",
"minimum_effect",
"sample_size",
"required_sample_size",
"power",
]
@classmethod
def check_columns(cls) -> List[str]:
"""
`checks` dataframe with columns:
1. `timestamp` - timestamp of evaluation
1. `exp_id` - experiment id
1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
`test_stat`, `confidence_level`
1. `value` - value of the variable
"""
return ["timestamp", "exp_id", "check_id", "check_name", "variable_id", "value"]
@classmethod
def exposure_columns(cls) -> List[str]:
"""
`exposures` dataframe with columns:
1. `timestamp` - timestamp of evaluation
1. `exp_id` - experiment id
1. `exp_variant_id` - variant id
1. `exposures` - number of exposures of this variant
"""
return ["exp_variant_id", "exposures"]
check_columns()
classmethod
¶
checks
dataframe with columns:
timestamp
- timestamp of evaluationexp_id
- experiment idcheck_id
- check id as inExperiment
definitionvariable_id
- name of the variable in check evaluation, SRM check has following variablesp_value
,test_stat
,confidence_level
value
- value of the variable
Source code in epstats/toolkit/experiment.py
@classmethod
def check_columns(cls) -> List[str]:
"""
`checks` dataframe with columns:
1. `timestamp` - timestamp of evaluation
1. `exp_id` - experiment id
1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
`test_stat`, `confidence_level`
1. `value` - value of the variable
"""
return ["timestamp", "exp_id", "check_id", "check_name", "variable_id", "value"]
exposure_columns()
classmethod
¶
exposures
dataframe with columns:
timestamp
- timestamp of evaluationexp_id
- experiment idexp_variant_id
- variant idexposures
- number of exposures of this variant
Source code in epstats/toolkit/experiment.py
@classmethod
def exposure_columns(cls) -> List[str]:
"""
`exposures` dataframe with columns:
1. `timestamp` - timestamp of evaluation
1. `exp_id` - experiment id
1. `exp_variant_id` - variant id
1. `exposures` - number of exposures of this variant
"""
return ["exp_variant_id", "exposures"]
metric_columns()
classmethod
¶
metrics
dataframe with columns:
timestamp
- timestamp of evaluationexp_id
- experiment idmetric_id
- metric id as inExperiment
definitionmetric_name
- metric name as inExperiment
definitionexp_variant_id
- variant idcount
- number of exposures, value of metric denominatormean
-sum_value
/count
std
- sample standard deviationsum_value
- value of goals, value of metric nominatorconfidence_level
- current confidence level used to calculatep_value
andconfidence_interval
diff
- relative diff between sample means of this and control varianttest_stat
- value of test statistic of the relative difference in meansp_value
- p-value of the test statistic under currentconfidence_level
confidence_interval
- confidence interval of thediff
under currentconfidence_level
standard_error
- standard error of thediff
degrees_of_freedom
- degrees of freedom of this variant meansample_size
- current sample sizerequired_sample_size
- size of the sample required to reach the required powerpower
- power based on the collectedsample_size
Source code in epstats/toolkit/experiment.py
@classmethod
def metric_columns(cls) -> List[str]:
"""
`metrics` dataframe with columns:
1. `timestamp` - timestamp of evaluation
1. `exp_id` - experiment id
1. `metric_id` - metric id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
1. `metric_name` - metric name as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
1. `exp_variant_id` - variant id
1. `count` - number of exposures, value of metric denominator
1. `mean` - `sum_value` / `count`
1. `std` - sample standard deviation
1. `sum_value` - value of goals, value of metric nominator
1. `confidence_level` - current confidence level used to calculate `p_value` and `confidence_interval`
1. `diff` - relative diff between sample means of this and control variant
1. `test_stat` - value of test statistic of the relative difference in means
1. `p_value` - p-value of the test statistic under current `confidence_level`
1. `confidence_interval` - confidence interval of the `diff` under current `confidence_level`
1. `standard_error` - standard error of the `diff`
1. `degrees_of_freedom` - degrees of freedom of this variant mean
1. `sample_size` - current sample size
1. `required_sample_size` - size of the sample required to reach the required power
1. `power` - power based on the collected `sample_size`
"""
return [
"timestamp",
"exp_id",
"metric_id",
"metric_name",
"exp_variant_id",
"count",
"mean",
"std",
"sum_value",
"confidence_level",
"diff",
"test_stat",
"p_value",
"confidence_interval",
"standard_error",
"degrees_of_freedom",
"minimum_effect",
"sample_size",
"required_sample_size",
"power",
]