`bgc_data_processing.core.io.readers`¶

Read generated files.

`Reader(filepath, providers_column_label='PROVIDER', expocode_column_label='EXPOCODE', date_column_label='DATE', year_column_label='YEAR', month_column_label='MONTH', day_column_label='DAY', hour_column_label='HOUR', latitude_column_label='LATITUDE', longitude_column_label='LONGITUDE', depth_column_label='DEPH', variables_reference=None, category='in_situ', unit_row_index=1, delim_whitespace=True)` ¶

Reading routine to parse csv files.

Parameters:

Name	Type	Description	Default
`filepath`	`Path \| str`	Path to the file to read.	required
`providers_column_label`	`str`	Provider column in the dataframe., by default "PROVIDER"	`'PROVIDER'`
`expocode_column_label`	`str`	Expocode column in the dataframe., by default "EXPOCODE"	`'EXPOCODE'`
`date_column_label`	`str`	Date column in the dataframe., by default "DATE"	`'DATE'`
`year_column_label`	`str`	Year column in the dataframe., by default "YEAR"	`'YEAR'`
`month_column_label`	`str`	Month column in the dataframe., by default "MONTH"	`'MONTH'`
`day_column_label`	`str`	Day column in the dataframe., by default "DAY"	`'DAY'`
`hour_column_label`	`str`	Hour column in the dataframe., by default "HOUR"	`'HOUR'`
`latitude_column_label`	`str`	Latitude column in the dataframe., by default "LATITUDE"	`'LATITUDE'`
`longitude_column_label`	`str`	Longitude column in the dataframe., by default "LONGITUDE"	`'LONGITUDE'`
`depth_column_label`	`str`	Depth column in the dataframe., by default "DEPH"	`'DEPH'`
`variables_reference`	`list[BaseVar] \| None`	List of variable to use as reference. If a variable label is a column name, this variable will be used for the output storer., by default None	`None`
`category`	`str`	Category of the loaded file., by default "in_situ"	`'in_situ'`
`unit_row_index`	`int`	Index of the row with the units, None if there's no unit row., by default 1	`1`
`delim_whitespace`	`bool`	Whether to use whitespace as delimiters., by default True	`True`

Examples:

Loading from a file:

>>> filepath = "path/to/file"
>>> reader = Reader(filepath, providers="providers_column_name")

Getting the storer:

>>> storer = reader.get_storer()

Source code in src/bgc_data_processing/core/io/readers.py

def __init__(
    self,
    filepath: Path | str,
    providers_column_label: str = "PROVIDER",
    expocode_column_label: str = "EXPOCODE",
    date_column_label: str = "DATE",
    year_column_label: str = "YEAR",
    month_column_label: str = "MONTH",
    day_column_label: str = "DAY",
    hour_column_label: str = "HOUR",
    latitude_column_label: str = "LATITUDE",
    longitude_column_label: str = "LONGITUDE",
    depth_column_label: str = "DEPH",
    variables_reference: list[BaseVar] | None = None,
    category: str = "in_situ",
    unit_row_index: int = 1,
    delim_whitespace: bool = True,
):
    if variables_reference is None:
        variables_reference: dict[str, BaseVar] = {}
    else:
        self._reference_vars = {var.label: var for var in variables_reference}

    raw_df, unit_row = self._read(
        filepath=Path(filepath),
        unit_row_index=unit_row_index,
        delim_whitespace=delim_whitespace,
    )
    mandatory_vars = {
        providers_column_label: "provider",
        expocode_column_label: "expocode",
        date_column_label: "date",
        year_column_label: "year",
        month_column_label: "month",
        day_column_label: "day",
        hour_column_label: "hour",
        latitude_column_label: "latitude",
        longitude_column_label: "longitude",
        depth_column_label: "depth",
    }
    self._category = category
    if providers_column_label is not None:
        self._providers = raw_df[providers_column_label].unique().tolist()
    else:
        self._providers = ["????"]
    self._data = self._add_date_columns(
        raw_df,
        year_column_label,
        month_column_label,
        day_column_label,
        date_column_label,
    )
    self._variables = self._get_variables(raw_df, unit_row, mandatory_vars)

`get_storer()` ¶

Return the Storer storing the data loaded.

Returns:

Type	Description
`Storer`	Contains the data from the csv.

Source code in src/bgc_data_processing/core/io/readers.py

def get_storer(self) -> "Storer":
    """Return the Storer storing the data loaded.

    Returns
    -------
    Storer
        Contains the data from the csv.
    """
    return Storer(
        data=self._data,
        category=self._category,
        providers=self._providers,
        variables=self._variables.storing_variables,
    )

`read_files(filepath, providers_column_label='PROVIDER', expocode_column_label='EXPOCODE', date_column_label='DATE', year_column_label='YEAR', month_column_label='MONTH', day_column_label='DAY', hour_column_label='HOUR', latitude_column_label='LATITUDE', longitude_column_label='LONGITUDE', depth_column_label='DEPH', variables_reference=None, category='in_situ', unit_row_index=1, delim_whitespace=True)` ¶

Build Storer reading data from csv or txt files.

Parameters:

Name	Type	Description	Default
`filepath`	`Path \| str \| list[Path] \| list[str]`	Path to the file to read.	required
`providers_column_label`	`str`	Provider column in the dataframe., by default "PROVIDER"	`'PROVIDER'`
`expocode_column_label`	`str`	Expocode column in the dataframe., by default "EXPOCODE"	`'EXPOCODE'`
`date_column_label`	`str`	Date column in the dataframe., by default "DATE"	`'DATE'`
`year_column_label`	`str`	Year column in the dataframe., by default "YEAR"	`'YEAR'`
`month_column_label`	`str`	Month column in the dataframe., by default "MONTH"	`'MONTH'`
`day_column_label`	`str`	Day column in the dataframe., by default "DAY"	`'DAY'`
`hour_column_label`	`str`	Hour column in the dataframe., by default "HOUR"	`'HOUR'`
`latitude_column_label`	`str`	Latitude column in the dataframe., by default "LATITUDE"	`'LATITUDE'`
`longitude_column_label`	`str`	Longitude column in the dataframe., by default "LONGITUDE"	`'LONGITUDE'`
`depth_column_label`	`str`	Depth column in the dataframe., by default "DEPH"	`'DEPH'`
`variables_reference`	`list[BaseVar] \| None`	List of variable to use as reference. If a variable label is a column name, this variable will be used for the output storer., by default None	`None`
`category`	`str`	Category of the loaded file., by default "in_situ"	`'in_situ'`
`unit_row_index`	`int`	Index of the row with the units, None if there's no unit row., by default 1	`1`
`delim_whitespace`	`bool`	Whether to use whitespace as delimiters., by default True	`True`

Returns:

Type	Description
`Storer`	Storer aggregating the data from all the files

Raises:

Type	Description
`TypeError`	If filepath argument is not an instance of string or list.

Examples:

Loading from a single file:

>>> filepath = "path/to/file"
>>> storer = read_files(filepath, providers="providers_column_name")

Loading from multiple files:

>>> filepaths = [
...     "path/to/file1",
...     "path/to/file2",
... ]
>>> storer = read_files(
...     filepaths,
... )

Source code in src/bgc_data_processing/core/io/readers.py

def read_files(
    filepath: Path | str | list[Path] | list[str],
    providers_column_label: str = "PROVIDER",
    expocode_column_label: str = "EXPOCODE",
    date_column_label: str = "DATE",
    year_column_label: str = "YEAR",
    month_column_label: str = "MONTH",
    day_column_label: str = "DAY",
    hour_column_label: str = "HOUR",
    latitude_column_label: str = "LATITUDE",
    longitude_column_label: str = "LONGITUDE",
    depth_column_label: str = "DEPH",
    variables_reference: list[BaseVar] | None = None,
    category: str = "in_situ",
    unit_row_index: int = 1,
    delim_whitespace: bool = True,
) -> "Storer":
    """Build Storer reading data from csv or txt files.

    Parameters
    ----------
    filepath : Path | str | list[Path] | list[str]
        Path to the file to read.
    providers_column_label : str, optional
        Provider column in the dataframe., by default "PROVIDER"
    expocode_column_label : str, optional
        Expocode column in the dataframe., by default "EXPOCODE"
    date_column_label : str, optional
        Date column in the dataframe., by default "DATE"
    year_column_label : str, optional
        Year column in the dataframe., by default "YEAR"
    month_column_label : str, optional
        Month column in the dataframe., by default "MONTH"
    day_column_label : str, optional
        Day column in the dataframe., by default "DAY"
    hour_column_label : str, optional
        Hour column in the dataframe., by default "HOUR"
    latitude_column_label : str, optional
        Latitude column in the dataframe., by default "LATITUDE"
    longitude_column_label : str, optional
        Longitude column in the dataframe., by default "LONGITUDE"
    depth_column_label : str, optional
        Depth column in the dataframe., by default "DEPH"
    variables_reference: list[BaseVar] | None
        List of variable to use as reference. If a variable label is a column name,
         this variable will be used for the output storer., by default None
    category : str, optional
        Category of the loaded file., by default "in_situ"
    unit_row_index : int, optional
        Index of the row with the units, None if there's no unit row., by default 1
    delim_whitespace : bool, optional
        Whether to use whitespace as delimiters., by default True

    Returns
    -------
    Storer
        Storer aggregating the data from all the files

    Raises
    ------
    TypeError
        If filepath argument is not an instance of string or list.

    Examples
    --------
    Loading from a single file:
    >>> filepath = "path/to/file"
    >>> storer = read_files(filepath, providers="providers_column_name")

    Loading from multiple files:
    >>> filepaths = [
    ...     "path/to/file1",
    ...     "path/to/file2",
    ... ]
    >>> storer = read_files(
    ...     filepaths,
    ... )
    """
    if isinstance(filepath, list):
        storers = []
        for path in filepath:
            storer = read_files(
                filepath=path,
                providers_column_label=providers_column_label,
                expocode_column_label=expocode_column_label,
                date_column_label=date_column_label,
                year_column_label=year_column_label,
                month_column_label=month_column_label,
                day_column_label=day_column_label,
                hour_column_label=hour_column_label,
                latitude_column_label=latitude_column_label,
                longitude_column_label=longitude_column_label,
                depth_column_label=depth_column_label,
                variables_reference=variables_reference,
                category=category,
                unit_row_index=unit_row_index,
                delim_whitespace=delim_whitespace,
            )

            storers.append(storer)
        return sum(storers)
    if isinstance(filepath, Path):
        path = filepath
    elif isinstance(filepath, str):
        path = Path(filepath)
    else:
        error_msg = (
            f"Can't read filepaths from {filepath}. Accepted types are Path or str."
        )
        raise TypeError(error_msg)
    reader = Reader(
        filepath=path,
        providers_column_label=providers_column_label,
        expocode_column_label=expocode_column_label,
        date_column_label=date_column_label,
        year_column_label=year_column_label,
        month_column_label=month_column_label,
        day_column_label=day_column_label,
        hour_column_label=hour_column_label,
        latitude_column_label=latitude_column_label,
        longitude_column_label=longitude_column_label,
        depth_column_label=depth_column_label,
        variables_reference=variables_reference,
        category=category,
        unit_row_index=unit_row_index,
        delim_whitespace=delim_whitespace,
    )
    return reader.get_storer()

bgc_data_processing.core.io.readers¶

get_storer() ¶

`bgc_data_processing.core.io.readers`¶

`get_storer()` ¶