Skip to content

Evaluation

Results of an experiment evaluation.

Source code in epstats/toolkit/experiment.py
class Evaluation:
    """
    Results of an experiment evaluation.
    """

    def __init__(self, metrics: pd.DataFrame, checks: pd.DataFrame, exposures: pd.DataFrame):
        self.metrics = metrics
        self.checks = checks
        self.exposures = exposures

    @classmethod
    def metric_columns(cls) -> List[str]:
        """
        `metrics` dataframe with columns:

        1. `timestamp` - timestamp of evaluation
        1. `exp_id` - experiment id
        1. `metric_id` - metric id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
        1. `metric_name` - metric name as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
        1. `exp_variant_id` - variant id
        1. `count` - number of exposures, value of metric denominator
        1. `mean` - `sum_value` / `count`
        1. `std` - sample standard deviation
        1. `sum_value` - value of goals, value of metric nominator
        1. `confidence_level` - current confidence level used to calculate `p_value` and `confidence_interval`
        1. `diff` - relative diff between sample means of this and control variant
        1. `test_stat` - value of test statistic of the relative difference in means
        1. `p_value` - p-value of the test statistic under current `confidence_level`
        1. `confidence_interval` - confidence interval of the `diff` under current `confidence_level`
        1. `standard_error` - standard error of the `diff`
        1. `degrees_of_freedom` - degrees of freedom of this variant mean
        1. `sample_size` - current sample size
        1. `required_sample_size` - size of the sample required to reach the required power
        1. `power` - power based on the collected `sample_size`
        """
        return [
            "timestamp",
            "exp_id",
            "metric_id",
            "metric_name",
            "exp_variant_id",
            "count",
            "mean",
            "std",
            "sum_value",
            "confidence_level",
            "diff",
            "test_stat",
            "p_value",
            "confidence_interval",
            "standard_error",
            "degrees_of_freedom",
            "minimum_effect",
            "sample_size",
            "required_sample_size",
            "power",
        ]

    @classmethod
    def check_columns(cls) -> List[str]:
        """
        `checks` dataframe with columns:

        1. `timestamp` - timestamp of evaluation
        1. `exp_id` - experiment id
        1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
        1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
        `test_stat`, `confidence_level`
        1. `value` - value of the variable
        """
        return ["timestamp", "exp_id", "check_id", "check_name", "variable_id", "value"]

    @classmethod
    def exposure_columns(cls) -> List[str]:
        """
        `exposures` dataframe with columns:

        1. `timestamp` - timestamp of evaluation
        1. `exp_id` - experiment id
        1. `exp_variant_id` - variant id
        1. `exposures` - number of exposures of this variant
        """
        return ["exp_variant_id", "exposures"]

check_columns() classmethod

checks dataframe with columns:

  1. timestamp - timestamp of evaluation
  2. exp_id - experiment id
  3. check_id - check id as in Experiment definition
  4. variable_id - name of the variable in check evaluation, SRM check has following variables p_value, test_stat, confidence_level
  5. value - value of the variable
Source code in epstats/toolkit/experiment.py
@classmethod
def check_columns(cls) -> List[str]:
    """
    `checks` dataframe with columns:

    1. `timestamp` - timestamp of evaluation
    1. `exp_id` - experiment id
    1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
    1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
    `test_stat`, `confidence_level`
    1. `value` - value of the variable
    """
    return ["timestamp", "exp_id", "check_id", "check_name", "variable_id", "value"]

exposure_columns() classmethod

exposures dataframe with columns:

  1. timestamp - timestamp of evaluation
  2. exp_id - experiment id
  3. exp_variant_id - variant id
  4. exposures - number of exposures of this variant
Source code in epstats/toolkit/experiment.py
@classmethod
def exposure_columns(cls) -> List[str]:
    """
    `exposures` dataframe with columns:

    1. `timestamp` - timestamp of evaluation
    1. `exp_id` - experiment id
    1. `exp_variant_id` - variant id
    1. `exposures` - number of exposures of this variant
    """
    return ["exp_variant_id", "exposures"]

metric_columns() classmethod

metrics dataframe with columns:

  1. timestamp - timestamp of evaluation
  2. exp_id - experiment id
  3. metric_id - metric id as in Experiment definition
  4. metric_name - metric name as in Experiment definition
  5. exp_variant_id - variant id
  6. count - number of exposures, value of metric denominator
  7. mean - sum_value / count
  8. std - sample standard deviation
  9. sum_value - value of goals, value of metric nominator
  10. confidence_level - current confidence level used to calculate p_value and confidence_interval
  11. diff - relative diff between sample means of this and control variant
  12. test_stat - value of test statistic of the relative difference in means
  13. p_value - p-value of the test statistic under current confidence_level
  14. confidence_interval - confidence interval of the diff under current confidence_level
  15. standard_error - standard error of the diff
  16. degrees_of_freedom - degrees of freedom of this variant mean
  17. sample_size - current sample size
  18. required_sample_size - size of the sample required to reach the required power
  19. power - power based on the collected sample_size
Source code in epstats/toolkit/experiment.py
@classmethod
def metric_columns(cls) -> List[str]:
    """
    `metrics` dataframe with columns:

    1. `timestamp` - timestamp of evaluation
    1. `exp_id` - experiment id
    1. `metric_id` - metric id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
    1. `metric_name` - metric name as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
    1. `exp_variant_id` - variant id
    1. `count` - number of exposures, value of metric denominator
    1. `mean` - `sum_value` / `count`
    1. `std` - sample standard deviation
    1. `sum_value` - value of goals, value of metric nominator
    1. `confidence_level` - current confidence level used to calculate `p_value` and `confidence_interval`
    1. `diff` - relative diff between sample means of this and control variant
    1. `test_stat` - value of test statistic of the relative difference in means
    1. `p_value` - p-value of the test statistic under current `confidence_level`
    1. `confidence_interval` - confidence interval of the `diff` under current `confidence_level`
    1. `standard_error` - standard error of the `diff`
    1. `degrees_of_freedom` - degrees of freedom of this variant mean
    1. `sample_size` - current sample size
    1. `required_sample_size` - size of the sample required to reach the required power
    1. `power` - power based on the collected `sample_size`
    """
    return [
        "timestamp",
        "exp_id",
        "metric_id",
        "metric_name",
        "exp_variant_id",
        "count",
        "mean",
        "std",
        "sum_value",
        "confidence_level",
        "diff",
        "test_stat",
        "p_value",
        "confidence_interval",
        "standard_error",
        "degrees_of_freedom",
        "minimum_effect",
        "sample_size",
        "required_sample_size",
        "power",
    ]