Skip to content

bgc_data_processing.core.loaders.csv_loaders

CSV Loaders.

CSVLoader(provider_name, category, exclude, variables, read_params=None)

Bases: BaseLoader

Loader class to use with csv files.

Parameters:

Name Type Description Default
provider_name str

Data provider name.

required
category str

Category provider belongs to.

required
exclude list[str]

Filenames to exclude from loading.

required
variables SourceVariableSet

Storer object containing all variables to consider for this data, both the one in the data file but and the one not represented in the file.

required
read_params dict | None

Additional parameter to pass to pandas.read_csv., by default None

None
Source code in src/bgc_data_processing/core/loaders/csv_loaders.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def __init__(
    self,
    provider_name: str,
    category: str,
    exclude: list[str],
    variables: "SourceVariableSet",
    read_params: dict | None = None,
) -> None:
    if read_params is None:
        self._read_params = {}
    else:
        self._read_params = read_params
    super().__init__(
        provider_name=provider_name,
        category=category,
        exclude=exclude,
        variables=variables,
    )

load(filepath, constraints=None)

Load a csv file from filepath.

Parameters:

Name Type Description Default
filepath Path | str

Path to the file to load.

required
constraints Constraints | None

Constraints slicer., by default None

None

Returns:

Type Description
DataFrame

DataFrame corresponding to the file.

Source code in src/bgc_data_processing/core/loaders/csv_loaders.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def load(
    self,
    filepath: Path | str,
    constraints: Constraints | None = None,
) -> pd.DataFrame:
    """Load a csv file from filepath.

    Parameters
    ----------
    filepath: Path | str
        Path to the file to load.
    constraints : Constraints| None, optional
        Constraints slicer., by default None

    Returns
    -------
    pd.DataFrame
        DataFrame corresponding to the file.
    """
    if constraints is None:
        constraints = Constraints()
    df_raw = self._read(Path(filepath))
    df_form = self._format(df_raw)
    df_type = self._convert_types(df_form)
    df_corr = self._correct(df_type)
    df_sliced = constraints.apply_constraints_to_dataframe(df_corr)
    return self.remove_nan_rows(df_sliced)