Skip to content

Test Data

Utility methods to load sample (test) data that are used in unit tests through this project.

Source code in src/epstats/toolkit/testing/test_data.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class TestData:
    """
    Utility methods to load sample (test) data that are used in unit tests through this
    project.
    """

    @classmethod
    def load_goals_agg(cls, exp_id: str = None) -> pd.DataFrame:
        """
        Load sample of aggregated test data to evaluate metrics. We use this dataset
        in unit testing and we are making it available here for other possible use-cases too.

        See `load_evaluations` set of functions to load corresponding evaluation results.

        Arguments:
            exp_id: experiment id
        """
        df_file = files(resources).joinpath("goals_agg.csv")
        goals_df = pd.read_csv(df_file)
        return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

    @classmethod
    def load_goals_simple_agg(cls) -> pd.DataFrame:
        """
        Load sample of aggregated test data in simple wide format. File `goals_simple_agg.csv` contains only one
        experiment, so it is sufficient to just open it.

        We use this dataset in unit testing and we are making it available here for other possible use-cases too.

        See `load_evaluations` set of functions to load corresponding evaluation results.
        """
        df_file = files(resources).joinpath("goals_simple_agg.csv")
        goals_df = pd.read_csv(df_file)
        return goals_df

    @classmethod
    def load_goals_by_unit(cls, exp_id: str = None) -> pd.DataFrame:
        """
        Load sample of test data by unit to evaluate metrics. We use this dataset
        in unit testing and we are making it available here for other possible use-cases too.

        See `load_evaluations` set of functions to load corresponding evaluation results.

        Arguments:
            exp_id: experiment id
        """
        df_file = files(resources).joinpath("goals_by_unit.csv")
        goals_df = pd.read_csv(df_file)
        return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

    @classmethod
    def load_evaluations_checks(cls, exp_id: str = None) -> pd.DataFrame:
        """
        Load checks (SRM) evaluations results. This data can be used to do asserts against
        after running evaluation on [pre-aggregated][epstats.toolkit.testing.test_data.TestData.load_goals_agg]
        or [by-unit][epstats.toolkit.testing.test_data.TestData.load_goals_by_unit] test data.

        Arguments:
            exp_id: experiment id
        """
        df_file = files(resources).joinpath("evaluations_checks.csv")
        goals_df = pd.read_csv(df_file)
        return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

    @classmethod
    def load_evaluations_exposures(cls, exp_id: str = None) -> pd.DataFrame:
        """
        Load exposures evaluations results. This data can be used to do asserts against
        after running evaluation on [pre-aggregated][epstats.toolkit.testing.test_data.TestData.load_goals_agg]
        or [by-unit][epstats.toolkit.testing.test_data.TestData.load_goals_by_unit] test data.

        Arguments:
            exp_id: experiment id
        """
        df_file = files(resources).joinpath("evaluations_exposures.csv")
        exposures_df = pd.read_csv(df_file)
        return exposures_df[exposures_df.exp_id == exp_id] if exp_id is not None else exposures_df

    @classmethod
    def load_evaluations_metrics(cls, exp_id: str = None) -> pd.DataFrame:
        """
        Load metric evaluations results. This data can be used to do asserts against
        after running evaluation on [pre-aggregated][epstats.toolkit.testing.test_data.TestData.load_goals_agg]
        or [by-unit][epstats.toolkit.testing.test_data.TestData.load_goals_by_unit] test data.

        Arguments:
            exp_id: experiment id
        """
        df_file = files(resources).joinpath("evaluations_metrics.csv")
        goals_df = pd.read_csv(df_file)
        return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

load_evaluations_checks(exp_id=None) classmethod

Load checks (SRM) evaluations results. This data can be used to do asserts against after running evaluation on pre-aggregated or by-unit test data.

Parameters:

Name Type Description Default
exp_id str

experiment id

None
Source code in src/epstats/toolkit/testing/test_data.py
58
59
60
61
62
63
64
65
66
67
68
69
70
@classmethod
def load_evaluations_checks(cls, exp_id: str = None) -> pd.DataFrame:
    """
    Load checks (SRM) evaluations results. This data can be used to do asserts against
    after running evaluation on [pre-aggregated][epstats.toolkit.testing.test_data.TestData.load_goals_agg]
    or [by-unit][epstats.toolkit.testing.test_data.TestData.load_goals_by_unit] test data.

    Arguments:
        exp_id: experiment id
    """
    df_file = files(resources).joinpath("evaluations_checks.csv")
    goals_df = pd.read_csv(df_file)
    return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

load_evaluations_exposures(exp_id=None) classmethod

Load exposures evaluations results. This data can be used to do asserts against after running evaluation on pre-aggregated or by-unit test data.

Parameters:

Name Type Description Default
exp_id str

experiment id

None
Source code in src/epstats/toolkit/testing/test_data.py
72
73
74
75
76
77
78
79
80
81
82
83
84
@classmethod
def load_evaluations_exposures(cls, exp_id: str = None) -> pd.DataFrame:
    """
    Load exposures evaluations results. This data can be used to do asserts against
    after running evaluation on [pre-aggregated][epstats.toolkit.testing.test_data.TestData.load_goals_agg]
    or [by-unit][epstats.toolkit.testing.test_data.TestData.load_goals_by_unit] test data.

    Arguments:
        exp_id: experiment id
    """
    df_file = files(resources).joinpath("evaluations_exposures.csv")
    exposures_df = pd.read_csv(df_file)
    return exposures_df[exposures_df.exp_id == exp_id] if exp_id is not None else exposures_df

load_evaluations_metrics(exp_id=None) classmethod

Load metric evaluations results. This data can be used to do asserts against after running evaluation on pre-aggregated or by-unit test data.

Parameters:

Name Type Description Default
exp_id str

experiment id

None
Source code in src/epstats/toolkit/testing/test_data.py
86
87
88
89
90
91
92
93
94
95
96
97
98
@classmethod
def load_evaluations_metrics(cls, exp_id: str = None) -> pd.DataFrame:
    """
    Load metric evaluations results. This data can be used to do asserts against
    after running evaluation on [pre-aggregated][epstats.toolkit.testing.test_data.TestData.load_goals_agg]
    or [by-unit][epstats.toolkit.testing.test_data.TestData.load_goals_by_unit] test data.

    Arguments:
        exp_id: experiment id
    """
    df_file = files(resources).joinpath("evaluations_metrics.csv")
    goals_df = pd.read_csv(df_file)
    return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

load_goals_agg(exp_id=None) classmethod

Load sample of aggregated test data to evaluate metrics. We use this dataset in unit testing and we are making it available here for other possible use-cases too.

See load_evaluations set of functions to load corresponding evaluation results.

Parameters:

Name Type Description Default
exp_id str

experiment id

None
Source code in src/epstats/toolkit/testing/test_data.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
@classmethod
def load_goals_agg(cls, exp_id: str = None) -> pd.DataFrame:
    """
    Load sample of aggregated test data to evaluate metrics. We use this dataset
    in unit testing and we are making it available here for other possible use-cases too.

    See `load_evaluations` set of functions to load corresponding evaluation results.

    Arguments:
        exp_id: experiment id
    """
    df_file = files(resources).joinpath("goals_agg.csv")
    goals_df = pd.read_csv(df_file)
    return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

load_goals_by_unit(exp_id=None) classmethod

Load sample of test data by unit to evaluate metrics. We use this dataset in unit testing and we are making it available here for other possible use-cases too.

See load_evaluations set of functions to load corresponding evaluation results.

Parameters:

Name Type Description Default
exp_id str

experiment id

None
Source code in src/epstats/toolkit/testing/test_data.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
@classmethod
def load_goals_by_unit(cls, exp_id: str = None) -> pd.DataFrame:
    """
    Load sample of test data by unit to evaluate metrics. We use this dataset
    in unit testing and we are making it available here for other possible use-cases too.

    See `load_evaluations` set of functions to load corresponding evaluation results.

    Arguments:
        exp_id: experiment id
    """
    df_file = files(resources).joinpath("goals_by_unit.csv")
    goals_df = pd.read_csv(df_file)
    return goals_df[goals_df.exp_id == exp_id] if exp_id is not None else goals_df

load_goals_simple_agg() classmethod

Load sample of aggregated test data in simple wide format. File goals_simple_agg.csv contains only one experiment, so it is sufficient to just open it.

We use this dataset in unit testing and we are making it available here for other possible use-cases too.

See load_evaluations set of functions to load corresponding evaluation results.

Source code in src/epstats/toolkit/testing/test_data.py
29
30
31
32
33
34
35
36
37
38
39
40
41
@classmethod
def load_goals_simple_agg(cls) -> pd.DataFrame:
    """
    Load sample of aggregated test data in simple wide format. File `goals_simple_agg.csv` contains only one
    experiment, so it is sufficient to just open it.

    We use this dataset in unit testing and we are making it available here for other possible use-cases too.

    See `load_evaluations` set of functions to load corresponding evaluation results.
    """
    df_file = files(resources).joinpath("goals_simple_agg.csv")
    goals_df = pd.read_csv(df_file)
    return goals_df