Skip to content

Check

Perform data quality check that accompanies metric evaluation in the experiment.

See Data Quality Checks for details about data quality checks and Evaluation for description of output.

Source code in src/epstats/toolkit/check.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class Check:
    """
    Perform data quality check that accompanies metric evaluation in the experiment.

    See [Data Quality Checks](../stats/basics.md#data-quality-checks) for details about
    data quality checks and [`Evaluation`][epstats.toolkit.experiment.Evaluation] for description of output.
    """

    def __init__(self, id: int, name: str, denominator: str, **unused_kwargs):
        self.id = id
        self.name = name
        self.denominator = denominator
        self._denominator_parser = Parser(denominator, denominator)
        self._goals = self._denominator_parser.get_goals()

    def get_goals(self) -> List:
        """
        List of all goals needed to evaluate the check in the experiment.

        Returns:
            list of parsed structured goals
        """
        return self._goals

    def evaluate_agg(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
        """
        Evaluate this check from pre-aggregated goals.

        Arguments:
            goals: one row per experiment variant
            default_exp_variant_id: default variant

        See [`Experiment.evaluate_agg`][epstats.toolkit.experiment.Experiment.evaluate_agg] for details
        on `goals` at input.

        Returns:
            `checks` dataframe with columns:

        `checks` dataframe with columns:

        1. `timestamp` - timestamp of evaluation
        1. `exp_id` - experiment id
        1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
        1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
        `test_stat`, `confidence_level`
        1. `value` - value of the variable
        """
        raise NotImplementedError()

    def evaluate_by_unit(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
        """
        Evaluate this check from goals aggregated by unit.

        Arguments:
            goals: ne row per experiment variant
            default_exp_variant_id: default variant

        See [`Experiment.evaluate_by_unit`][epstats.toolkit.experiment.Experiment.evaluate_by_unit] for details
        on `goals` at input.

        Returns:
            `checks` dataframe with columns:

        `checks` dataframe with columns:

        1. `timestamp` - timestamp of evaluation
        1. `exp_id` - experiment id
        1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
        1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
        `test_stat`, `confidence_level`
        1. `value` - value of the variable
        """
        raise NotImplementedError()

evaluate_agg(goals, default_exp_variant_id)

Evaluate this check from pre-aggregated goals.

Parameters:

Name Type Description Default
goals DataFrame

one row per experiment variant

required
default_exp_variant_id str

default variant

required

See Experiment.evaluate_agg for details on goals at input.

Returns:

Type Description
DataFrame

checks dataframe with columns:

checks dataframe with columns:

  1. timestamp - timestamp of evaluation
  2. exp_id - experiment id
  3. check_id - check id as in Experiment definition
  4. variable_id - name of the variable in check evaluation, SRM check has following variables p_value, test_stat, confidence_level
  5. value - value of the variable
Source code in src/epstats/toolkit/check.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def evaluate_agg(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
    """
    Evaluate this check from pre-aggregated goals.

    Arguments:
        goals: one row per experiment variant
        default_exp_variant_id: default variant

    See [`Experiment.evaluate_agg`][epstats.toolkit.experiment.Experiment.evaluate_agg] for details
    on `goals` at input.

    Returns:
        `checks` dataframe with columns:

    `checks` dataframe with columns:

    1. `timestamp` - timestamp of evaluation
    1. `exp_id` - experiment id
    1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
    1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
    `test_stat`, `confidence_level`
    1. `value` - value of the variable
    """
    raise NotImplementedError()

evaluate_by_unit(goals, default_exp_variant_id)

Evaluate this check from goals aggregated by unit.

Parameters:

Name Type Description Default
goals DataFrame

ne row per experiment variant

required
default_exp_variant_id str

default variant

required

See Experiment.evaluate_by_unit for details on goals at input.

Returns:

Type Description
DataFrame

checks dataframe with columns:

checks dataframe with columns:

  1. timestamp - timestamp of evaluation
  2. exp_id - experiment id
  3. check_id - check id as in Experiment definition
  4. variable_id - name of the variable in check evaluation, SRM check has following variables p_value, test_stat, confidence_level
  5. value - value of the variable
Source code in src/epstats/toolkit/check.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def evaluate_by_unit(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
    """
    Evaluate this check from goals aggregated by unit.

    Arguments:
        goals: ne row per experiment variant
        default_exp_variant_id: default variant

    See [`Experiment.evaluate_by_unit`][epstats.toolkit.experiment.Experiment.evaluate_by_unit] for details
    on `goals` at input.

    Returns:
        `checks` dataframe with columns:

    `checks` dataframe with columns:

    1. `timestamp` - timestamp of evaluation
    1. `exp_id` - experiment id
    1. `check_id` - check id as in [`Experiment`][epstats.toolkit.experiment.Experiment] definition
    1. `variable_id` - name of the variable in check evaluation, SRM check has following variables `p_value`,
    `test_stat`, `confidence_level`
    1. `value` - value of the variable
    """
    raise NotImplementedError()

get_goals()

List of all goals needed to evaluate the check in the experiment.

Returns:

Type Description
List

list of parsed structured goals

Source code in src/epstats/toolkit/check.py
25
26
27
28
29
30
31
32
def get_goals(self) -> List:
    """
    List of all goals needed to evaluate the check in the experiment.

    Returns:
        list of parsed structured goals
    """
    return self._goals

SRM Check

Bases: Check

Sample ratio mismatch check checking randomization of units to variants using Chi-square test.

Source code in src/epstats/toolkit/check.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
class SrmCheck(Check):
    """
    [Sample ratio mismatch check](../stats/basics.md#sample-ratio-mismatch-check) checking randomization
    of units to variants using [Chi-square test](https://en.wikipedia.org/wiki/Chi-squared_test).
    """

    def __init__(
        self,
        id: int,
        name: str,
        denominator: str,
        confidence_level: float = 0.999,
        **unused_kwargs,
    ):
        """
        Constructor of the SRM check.

        Arguments:
            id: check (order) id
            name: check name
            denominator: values to check
            confidence_level: confidence level of the statistical test

        Usage:
        ```python
        SrmCheck(1, 'SRM', 'count(test_unit_type.global.exposure)')
        ```
        """
        super().__init__(id, name, denominator)
        self.confidence_level = confidence_level

    def evaluate_agg(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
        """
        See [`Check.evaluate_agg`][epstats.toolkit.check.Check.evaluate_agg].
        """
        # input example:
        # test - srm, a, global.exposure, 10000, 10010, 10010, 0.0, 0.0
        # test - srm, b, global.exposure, 10010, 10010, 10010, 0.0, 0.0
        # test - srm, c, global.exposure, 10040, 10040, 10040, 0.0, 0.0

        # output example:
        # test - srm, 1, SRM, p_value, 0.20438
        # test - srm, 1, SRM, test_stat, 3.17552
        # test - srm, 1, SRM, confidence_level, 0.999

        # prepare data - we only need exposures
        exposures, _, _ = self._denominator_parser.evaluate_agg(goals)

        # chi-square test
        with np.errstate(divide="ignore", invalid="ignore"):
            # we fill in zeros, when goal data are missing for some variant.
            # There could be division by zero here which is expected as we return
            # nan or inf values to the caller.
            stat, pval = chisquare(exposures)

        r = pd.DataFrame(
            {
                "check_id": [self.id, self.id, self.id],
                "check_name": [self.name, self.name, self.name],
                "variable_id": ["p_value", "test_stat", "confidence_level"],
                "value": [pval, stat, self.confidence_level],
            }
        )
        return r

    def evaluate_by_unit(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
        """
        See [`Check.evaluate_by_unit`][epstats.toolkit.check.Check.evaluate_by_unit].
        """

        exposures, _, _ = self._denominator_parser.evaluate_by_unit(goals)

        # chi-square test
        stat, pval = chisquare(exposures)

        r = pd.DataFrame(
            {
                "check_id": [self.id, self.id, self.id],
                "check_name": [self.name, self.name, self.name],
                "variable_id": ["p_value", "test_stat", "confidence_level"],
                "value": [pval, stat, self.confidence_level],
            }
        )
        return r

__init__(id, name, denominator, confidence_level=0.999, **unused_kwargs)

Constructor of the SRM check.

Parameters:

Name Type Description Default
id int

check (order) id

required
name str

check name

required
denominator str

values to check

required
confidence_level float

confidence level of the statistical test

0.999

Usage:

SrmCheck(1, 'SRM', 'count(test_unit_type.global.exposure)')

Source code in src/epstats/toolkit/check.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def __init__(
    self,
    id: int,
    name: str,
    denominator: str,
    confidence_level: float = 0.999,
    **unused_kwargs,
):
    """
    Constructor of the SRM check.

    Arguments:
        id: check (order) id
        name: check name
        denominator: values to check
        confidence_level: confidence level of the statistical test

    Usage:
    ```python
    SrmCheck(1, 'SRM', 'count(test_unit_type.global.exposure)')
    ```
    """
    super().__init__(id, name, denominator)
    self.confidence_level = confidence_level

evaluate_agg(goals, default_exp_variant_id)

See Check.evaluate_agg.

Source code in src/epstats/toolkit/check.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def evaluate_agg(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
    """
    See [`Check.evaluate_agg`][epstats.toolkit.check.Check.evaluate_agg].
    """
    # input example:
    # test - srm, a, global.exposure, 10000, 10010, 10010, 0.0, 0.0
    # test - srm, b, global.exposure, 10010, 10010, 10010, 0.0, 0.0
    # test - srm, c, global.exposure, 10040, 10040, 10040, 0.0, 0.0

    # output example:
    # test - srm, 1, SRM, p_value, 0.20438
    # test - srm, 1, SRM, test_stat, 3.17552
    # test - srm, 1, SRM, confidence_level, 0.999

    # prepare data - we only need exposures
    exposures, _, _ = self._denominator_parser.evaluate_agg(goals)

    # chi-square test
    with np.errstate(divide="ignore", invalid="ignore"):
        # we fill in zeros, when goal data are missing for some variant.
        # There could be division by zero here which is expected as we return
        # nan or inf values to the caller.
        stat, pval = chisquare(exposures)

    r = pd.DataFrame(
        {
            "check_id": [self.id, self.id, self.id],
            "check_name": [self.name, self.name, self.name],
            "variable_id": ["p_value", "test_stat", "confidence_level"],
            "value": [pval, stat, self.confidence_level],
        }
    )
    return r

evaluate_by_unit(goals, default_exp_variant_id)

See Check.evaluate_by_unit.

Source code in src/epstats/toolkit/check.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def evaluate_by_unit(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
    """
    See [`Check.evaluate_by_unit`][epstats.toolkit.check.Check.evaluate_by_unit].
    """

    exposures, _, _ = self._denominator_parser.evaluate_by_unit(goals)

    # chi-square test
    stat, pval = chisquare(exposures)

    r = pd.DataFrame(
        {
            "check_id": [self.id, self.id, self.id],
            "check_name": [self.name, self.name, self.name],
            "variable_id": ["p_value", "test_stat", "confidence_level"],
            "value": [pval, stat, self.confidence_level],
        }
    )
    return r

Simple SRM Check

Bases: SrmCheck

Simplified definition of SRM check.

Source code in src/epstats/toolkit/check.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
class SimpleSrmCheck(SrmCheck):
    """Simplified definition of SRM check."""

    def __init__(
        self,
        id: int,
        name: str,
        denominator: str,
        confidence_level: float = 0.999,
        unit_type: str = "test_unit_type",
    ):
        """
        Constructor of the simplified SRM check.

        It modifies parameter denominator in a way that it is in line with general SRM Check definition. It adds all
        the niceties necessary for proper SrmCheck format. Finaly it calls constructor of the parent SrmCheck class.

        Arguments:
            id: check (order) id
            name: check name
            denominator: value (column) of the denominator
            confidence_level: confidence level of the statistical test
            unit_type: unit type

        Usage:
        ```python
        SimpleSrmCheck(1, 'SRM', 'exposures')
        ```
        """
        agg_type = "global"
        den = "value" + "(" + unit_type + "." + agg_type + "." + denominator + ")"
        super().__init__(id, name, den, confidence_level)

__init__(id, name, denominator, confidence_level=0.999, unit_type='test_unit_type')

Constructor of the simplified SRM check.

It modifies parameter denominator in a way that it is in line with general SRM Check definition. It adds all the niceties necessary for proper SrmCheck format. Finaly it calls constructor of the parent SrmCheck class.

Parameters:

Name Type Description Default
id int

check (order) id

required
name str

check name

required
denominator str

value (column) of the denominator

required
confidence_level float

confidence level of the statistical test

0.999
unit_type str

unit type

'test_unit_type'

Usage:

SimpleSrmCheck(1, 'SRM', 'exposures')

Source code in src/epstats/toolkit/check.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def __init__(
    self,
    id: int,
    name: str,
    denominator: str,
    confidence_level: float = 0.999,
    unit_type: str = "test_unit_type",
):
    """
    Constructor of the simplified SRM check.

    It modifies parameter denominator in a way that it is in line with general SRM Check definition. It adds all
    the niceties necessary for proper SrmCheck format. Finaly it calls constructor of the parent SrmCheck class.

    Arguments:
        id: check (order) id
        name: check name
        denominator: value (column) of the denominator
        confidence_level: confidence level of the statistical test
        unit_type: unit type

    Usage:
    ```python
    SimpleSrmCheck(1, 'SRM', 'exposures')
    ```
    """
    agg_type = "global"
    den = "value" + "(" + unit_type + "." + agg_type + "." + denominator + ")"
    super().__init__(id, name, den, confidence_level)

SumRatio Check

Bases: Check

Computes the ratio of nominator, denominator goal counts summed across all variants.

Max ratio check.

Source code in src/epstats/toolkit/check.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
class SumRatioCheck(Check):
    """
    Computes the ratio of `nominator`, `denominator` goal counts summed across all variants.

    [Max ratio check](../stats/basics.md#sum-ratio-check).
    """

    def __init__(
        self,
        id: int,
        name: str,
        nominator: str,
        denominator: str,
        max_sum_ratio: float = 0.01,
        confidence_level: float = 0.999,
        **unused_kwargs,
    ):
        """
        Constructor of the check.

        Arguments:
            id: check (order) id
            name: check name
            nominator:  goal in the ratio numerator
            denominator: goal in the ratio denominitaor
            max_sum_ratio: maximum allowed sum_ratio value
            confidence_level: confidence level of the statistical test

        Usage:
        ```python
        SumRatioCheck(
            1,
            "SumRatio",
            "count(test_unit_type.global.inconsistent_exposure)",
            "count(test_unit_type.global.exposure)"
        )
        ```
        """
        super().__init__(id, name, denominator)
        self.max_sum_ratio = max_sum_ratio
        self.confidence_level = confidence_level
        self.nominator = nominator
        self._nominator_parser = Parser(nominator, nominator)
        self._goals = self._goals.union(self._nominator_parser.get_goals())

    def evaluate_agg(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
        """
        See [`Check.evaluate_agg`][epstats.toolkit.check.Check.evaluate_agg].
        """

        denominator_counts, _, _ = self._denominator_parser.evaluate_agg(goals)
        nominator_counts, _, _ = self._nominator_parser.evaluate_agg(goals)

        # chi-square test
        with np.errstate(divide="ignore", invalid="ignore"):
            sum_ratio = nominator_counts.sum() / denominator_counts.sum()

            stat, pval = chisquare([denominator_counts.sum(), denominator_counts.sum() - nominator_counts.sum()])

        r = pd.DataFrame(
            {
                "check_id": self.id,
                "check_name": self.name,
                "variable_id": [
                    "sum_ratio",
                    "max_sum_ratio",
                    "p_value",
                    "test_stat",
                    "confidence_level",
                ],
                "value": [
                    sum_ratio,
                    self.max_sum_ratio,
                    pval,
                    stat,
                    self.confidence_level,
                ],
            }
        )
        return r

__init__(id, name, nominator, denominator, max_sum_ratio=0.01, confidence_level=0.999, **unused_kwargs)

Constructor of the check.

Parameters:

Name Type Description Default
id int

check (order) id

required
name str

check name

required
nominator str

goal in the ratio numerator

required
denominator str

goal in the ratio denominitaor

required
max_sum_ratio float

maximum allowed sum_ratio value

0.01
confidence_level float

confidence level of the statistical test

0.999

Usage:

SumRatioCheck(
    1,
    "SumRatio",
    "count(test_unit_type.global.inconsistent_exposure)",
    "count(test_unit_type.global.exposure)"
)

Source code in src/epstats/toolkit/check.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def __init__(
    self,
    id: int,
    name: str,
    nominator: str,
    denominator: str,
    max_sum_ratio: float = 0.01,
    confidence_level: float = 0.999,
    **unused_kwargs,
):
    """
    Constructor of the check.

    Arguments:
        id: check (order) id
        name: check name
        nominator:  goal in the ratio numerator
        denominator: goal in the ratio denominitaor
        max_sum_ratio: maximum allowed sum_ratio value
        confidence_level: confidence level of the statistical test

    Usage:
    ```python
    SumRatioCheck(
        1,
        "SumRatio",
        "count(test_unit_type.global.inconsistent_exposure)",
        "count(test_unit_type.global.exposure)"
    )
    ```
    """
    super().__init__(id, name, denominator)
    self.max_sum_ratio = max_sum_ratio
    self.confidence_level = confidence_level
    self.nominator = nominator
    self._nominator_parser = Parser(nominator, nominator)
    self._goals = self._goals.union(self._nominator_parser.get_goals())

evaluate_agg(goals, default_exp_variant_id)

See Check.evaluate_agg.

Source code in src/epstats/toolkit/check.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def evaluate_agg(self, goals: pd.DataFrame, default_exp_variant_id: str) -> pd.DataFrame:
    """
    See [`Check.evaluate_agg`][epstats.toolkit.check.Check.evaluate_agg].
    """

    denominator_counts, _, _ = self._denominator_parser.evaluate_agg(goals)
    nominator_counts, _, _ = self._nominator_parser.evaluate_agg(goals)

    # chi-square test
    with np.errstate(divide="ignore", invalid="ignore"):
        sum_ratio = nominator_counts.sum() / denominator_counts.sum()

        stat, pval = chisquare([denominator_counts.sum(), denominator_counts.sum() - nominator_counts.sum()])

    r = pd.DataFrame(
        {
            "check_id": self.id,
            "check_name": self.name,
            "variable_id": [
                "sum_ratio",
                "max_sum_ratio",
                "p_value",
                "test_stat",
                "confidence_level",
            ],
            "value": [
                sum_ratio,
                self.max_sum_ratio,
                pval,
                stat,
                self.confidence_level,
            ],
        }
    )
    return r