Skip to content

weather_data

Weather data module implementing functionality for working with binary weather files (.bin.json).

DataFrameInfo

The object containing info about dataframe columns and content. Used to pass dataframe info between methods working with weather dataframes.

Source code in emodpy_malaria/weather/weather_data.py
class DataFrameInfo:
    """
    The object containing info about dataframe columns and content. Used to pass dataframe info between methods
    working with weather dataframes.
    """
    _variable_values = [str(v.value).lower() for v in WeatherVariable.list()]
    _default_column_candidates = {
        "node": ["nodes", "node", "node_id", "node_ids", "nodeid", "id", "ids"],
        "step": ["steps", "step", "time"],
        "value": ["values", "value", "series", "data"] + _variable_values}

    def __init__(self,
                 node_column: str = None,
                 step_column: str = None,
                 value_column: str = None,
                 only_unique_series: bool = False):
        """
        Initializes dataframe info object. If no info is provided the defaults are used.

        Args:
            node_column: (Optional) Node column name. The default is "nodes".
            step_column: (Optional) Step column name.
            value_column: (Optional) Value column name.
            only_unique_series: (Optional) Flag indicating weather only distinct weather time series are needed.
        """
        self._node_column: str = node_column
        self._step_column: str = step_column
        self._value_column: str = value_column
        self.only_unique_series = only_unique_series
        self._set_defaults()

    def __str__(self) -> str:
        """String representation used to print or debug DataFrameInfo objects."""
        return str(self.__dict__.values())

    def __eq__(self, other: DataFrameInfo):
        """Equality operator for DataFrameInfo objects."""
        if other is None:
            return False

        cols_eq = self._node_column == other.node_column and self._step_column == other.step_column
        cols_eq = cols_eq and self._value_column == other.value_column
        is_eq = cols_eq and self.only_unique_series == other.only_unique_series
        return is_eq

    @property
    def node_column(self):
        return self._node_column

    @property
    def step_column(self):
        return self._step_column

    @property
    def value_column(self):
        return self._value_column

    def _set_defaults(self) -> DataFrameInfo:
        """Create a dataframe info object and initialize variables with defaults."""
        self._node_column = self.node_column or self._default_column_candidates["node"][0]
        self._step_column = self.step_column or self._default_column_candidates["step"][0]
        self._value_column = self.value_column or self._default_column_candidates["value"][0]
        return self

    @classmethod
    def detect_columns(cls, df: pd.DataFrame, column_candidates: Dict[str, List[str]] = None) -> DataFrameInfo:
        """
        Auto-detect required column names (nodes, time-steps and weather time series) for the DataFrameInfo object.

        Args:
            df: The dataframe containing nodes, time-steps and weather time series.
            column_candidates: (Optional) Dictionary of candidate column names to be used instead of defaults.

        Returns:
            DataFrameInfo object with detected column names.
        """
        column_candidates = column_candidates or cls._default_column_candidates

        # Detect columns
        column_types = ["node", "step", "value"]
        columns = [cls._detect_column(df, column_candidates[name]) for name in column_types]
        if not all(columns):
            not_found = [name for name, col in zip(column_types, columns) if col in None]
            raise NameError(f"Unable to detect columns {not_found}.")

        info = DataFrameInfo(*columns)
        return info

    @staticmethod
    def _detect_column(df, column_candidates):
        """
        Detect which of the candidate column names is used in the given dataframe.

        Args:
            df: The dataframe containing nodes, time-steps and weather time series.
            column_candidates: (Optional) Dictionary of candidate column names to be used instead of defaults.

        Returns:
            The detected column name.
        """
        cols = [c for c in df.columns.values if str(c).strip().lower() in column_candidates]
        found_col = None if len(cols) == 0 else cols[0]
        assert found_col is not None and found_col in df.columns.values, "Unable to detect node column."
        return found_col

__eq__(other)

Equality operator for DataFrameInfo objects.

Source code in emodpy_malaria/weather/weather_data.py
def __eq__(self, other: DataFrameInfo):
    """Equality operator for DataFrameInfo objects."""
    if other is None:
        return False

    cols_eq = self._node_column == other.node_column and self._step_column == other.step_column
    cols_eq = cols_eq and self._value_column == other.value_column
    is_eq = cols_eq and self.only_unique_series == other.only_unique_series
    return is_eq

__init__(node_column=None, step_column=None, value_column=None, only_unique_series=False)

Initializes dataframe info object. If no info is provided the defaults are used.

Parameters:

Name Type Description Default
node_column str

(Optional) Node column name. The default is "nodes".

None
step_column str

(Optional) Step column name.

None
value_column str

(Optional) Value column name.

None
only_unique_series bool

(Optional) Flag indicating weather only distinct weather time series are needed.

False
Source code in emodpy_malaria/weather/weather_data.py
def __init__(self,
             node_column: str = None,
             step_column: str = None,
             value_column: str = None,
             only_unique_series: bool = False):
    """
    Initializes dataframe info object. If no info is provided the defaults are used.

    Args:
        node_column: (Optional) Node column name. The default is "nodes".
        step_column: (Optional) Step column name.
        value_column: (Optional) Value column name.
        only_unique_series: (Optional) Flag indicating weather only distinct weather time series are needed.
    """
    self._node_column: str = node_column
    self._step_column: str = step_column
    self._value_column: str = value_column
    self.only_unique_series = only_unique_series
    self._set_defaults()

__str__()

String representation used to print or debug DataFrameInfo objects.

Source code in emodpy_malaria/weather/weather_data.py
def __str__(self) -> str:
    """String representation used to print or debug DataFrameInfo objects."""
    return str(self.__dict__.values())

detect_columns(df, column_candidates=None) classmethod

Auto-detect required column names (nodes, time-steps and weather time series) for the DataFrameInfo object.

Parameters:

Name Type Description Default
df DataFrame

The dataframe containing nodes, time-steps and weather time series.

required
column_candidates Dict[str, List[str]]

(Optional) Dictionary of candidate column names to be used instead of defaults.

None

Returns:

Type Description
DataFrameInfo

DataFrameInfo object with detected column names.

Source code in emodpy_malaria/weather/weather_data.py
@classmethod
def detect_columns(cls, df: pd.DataFrame, column_candidates: Dict[str, List[str]] = None) -> DataFrameInfo:
    """
    Auto-detect required column names (nodes, time-steps and weather time series) for the DataFrameInfo object.

    Args:
        df: The dataframe containing nodes, time-steps and weather time series.
        column_candidates: (Optional) Dictionary of candidate column names to be used instead of defaults.

    Returns:
        DataFrameInfo object with detected column names.
    """
    column_candidates = column_candidates or cls._default_column_candidates

    # Detect columns
    column_types = ["node", "step", "value"]
    columns = [cls._detect_column(df, column_candidates[name]) for name in column_types]
    if not all(columns):
        not_found = [name for name, col in zip(column_types, columns) if col in None]
        raise NameError(f"Unable to detect columns {not_found}.")

    info = DataFrameInfo(*columns)
    return info

WeatherData

Functionality for working with binary weather files (.bin.json).

Source code in emodpy_malaria/weather/weather_data.py
class WeatherData:
    """
    Functionality for working with binary weather files (.bin.json).
    """
    def __init__(self, data: np.ndarray, metadata: WeatherMetadata = None):
        """
        Instantiate a weather object from data numpy array and a weather metadata object.

        Args:
            data: Numpy array of unique weather time series, in the order they appear in a .bin file.
                  Shape can either be a single dimension array, or a 2d array having series stored as rows.
                  This means that the number of rows corresponds to the number of unique series and number of
                  columns corresponds to a series length (e.g. 365).
            metadata: (Optional) WeatherMetadata object containing metadata from .bin.json.
        """
        data = self._ensure_data_type(data)
        self._data: np.ndarray = data

        if metadata is not None:
            # If metadata is provided ensure data shape matches metadata info
            self._metadata = metadata
            expected_shape = self._expected_shape()
            if data.shape != expected_shape:
                self._data = data.reshape(expected_shape)
        else:
            # If metadata object is not provided data must be in the correct shape.
            self._metadata = WeatherMetadata(node_ids=list(range(1, data.shape[0] + 1)), series_len=data.shape[1])

        self.validate()

    def __eq__(self, other: WeatherData):
        """Equality operator for WeatherData objects."""
        meta_eq = self.metadata == other.metadata
        data_eq = np.array_equal(self.data, other.data)
        return meta_eq and data_eq

    def _expected_shape(self) -> Tuple[int, int]:
        """Returns the expected shape of data numpy array based on series count and len. """
        return self.metadata.series_unique_count, self.metadata.series_len

    def validate(self):
        """Validate data and metadata relationship."""
        expected_shape = self._expected_shape()
        assert self._data.shape == expected_shape, "Data numpy array shape is not matching metadata counts."

    @property
    def metadata(self) -> WeatherMetadata:
        """Metadata property, exposing weather metadata object."""
        return self._metadata

    @property
    def data(self) -> np.ndarray:
        """Raw data, reshaped in one row per node weather time series."""
        return self._data

    # Import/Export members

    @classmethod
    def from_dict(cls,
                  node_series: Dict[int, Union[np.ndarray[np.float32], List[float]]],
                  same_nodes: Dict[int, List[int]] = None,
                  attributes: WeatherAttributes = None) -> WeatherData:
        """
        Creates a WeatherData object from a dictionary mapping nodes and node weather time series.
        The method identifies unique node weather time series and produces a corresponding node-offset dictionary.

        Args:
            node_series: Dictionary with node ids as keys and weather time series as values (don't have to be unique).
            same_nodes: (Optional) Dictionary, mapping nodes from 'node_series' dictionary to additional nodes
                                   which series are the same. Keys are node ids, values are lists of node ids.
            attributes: (Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

        Returns:
            WeatherData object.
        """
        # Initialize
        if not isinstance(node_series, Dict) or len(node_series) == 0:
            exception = TypeError if not isinstance(node_series, Dict) else ValueError
            raise exception("The node time series argument must be a non-empty dictionary of node ids and time series.")

        # Check weather time series by converting to an array and validate shape
        try:
            series_values = np.array(list(node_series.values()), dtype=np.float32)
        except ValueError:
            raise ValueError("Time series contains values which are not numbers.")

        if np.any(np.isinf(np.abs(series_values))):
            raise ValueError("Time series contains 'inf' values which indicates failed conversion into np.float32.")

        if len(series_values.shape) != 2:
            raise ValueError("Time series must be a non-empty lists or array of float or integer values. "
                             "All time series must be of the same length.")

        # Check there are no NaN values in node ids
        if any(np.isnan(list(node_series))):
            raise ValueError("Node id list contains 'NaN' values.")

        # Check there are no NaN values in weather time series
        if any(np.isnan(series_values.reshape(-1))):    #
            raise ValueError("Time series contains 'NaN' values.")

        same_nodes = same_nodes or {}

        # Identify unique node weather time series, make sure node ids are int.
        node_series_hashes = {int(n): hash_series(s) for n, s in node_series.items()}       # Create node->hash dict
        unique_nodes = {h: nn[0] for h, nn in invert_dict(node_series_hashes).items()}      # Invert into hash->nodes
        unique_series = [node_series[n] for n in unique_nodes.values()]                     # List unique time series

        # Calculate offset increment per node as time series length x number of bytes per value
        offset_increment = series_values.shape[1] * SERIES_BYTE_VALUE_SIZE
        # Create node->offset dict, for nodes with unique weather time series
        node_offsets = {n: (i * offset_increment) for i, n in enumerate(unique_nodes.values())}
        # Update node->offset dict, add nodes sharing same offsets
        node_offsets.update({n: node_offsets[unique_nodes[h]] for n, h in node_series_hashes.items()})

        # Add other nodes, if specified
        # Invert dict from "unique node"->"list of nodes with that same offset" to "...same..."->"unique node"
        same_nodes = invert_dict(same_nodes, single_value=True)
        node_offsets.update({same: node_offsets[unique] for same, unique in same_nodes.items()})
        # Sort by node, offset
        node_offsets = dict(sorted(node_offsets.items()))

        # Convert the list of weather timeseries into a NumPy array and init WeatherMetadata and WeatherData objects
        data = np.array(unique_series, dtype=np.float32)
        wm = WeatherMetadata(node_ids=node_offsets, series_len=data.shape[1], attributes=attributes)
        wd = WeatherData(data=data, metadata=wm)

        return wd

    def to_dict(self, only_unique_series: bool = False, copy_data: bool = True) -> Dict[int, np.ndarray[np.float32]]:
        """
        Create a node-to-series dictionary from the current object. This method can be used to edit weather data.

        Args:
            only_unique_series: (Optional) A flag controlling whether the output dictionary will contain series for all
                                nodes (if set to true) or only unique series.
            copy_data: (Optional) Flag indicating whether to copy data numpy array to prevent unintentional changes.
        Returns:
            A dictionary with node ids and keys and node weather time series as values.
        """
        data_dict = {}
        node_groups = self.metadata.offset_nodes.values()
        series_list = np.copy(self._data) if copy_data else self._data
        for ng, s in zip(node_groups, series_list):
            ng = ng[:1] if only_unique_series else ng
            data_dict.update(dict(zip(ng, [s] * len(ng))))

        data_dict = dict(sorted(data_dict.items()))

        return data_dict

    @classmethod
    def from_csv(cls, file_path: Union[str, Path], info: DataFrameInfo = None, attributes: WeatherAttributes = None) -> WeatherData:
        """
        Creates a WeatherData object from a csv file. Used for creating or editing weather files.
        The method identifies unique node weather time series and produces a corresponding node-offset dictionary.

        Args:
            file_path: The csv file path from which weather data is loaded (expected columns: node, step, value).
            info: (Optional) Dataframe info object describing dataframe columns and content.
            attributes: (Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

        Returns:
            WeatherData object.
        """
        assert Path(file_path).is_file(), f"Weather file not found: {file_path}."
        df = pd.read_csv(file_path)
        wd = cls.from_dataframe(df, info=info, attributes=attributes)
        return wd

    def to_csv(self, file_path: Union[str, Path], info: DataFrameInfo = None) -> pd.DataFrame:
        """
        Creates a csv file and stores node ids, time steps and weather node weather time series as separate columns.

        Args:
            file_path: The csv file path into which weather data will be stored.
            info: (Optional) Dataframe info object describing dataframe columns and content.

        Returns:
            Dataframe created as an intermediate object used to save data to a csv file.
        """
        make_path(Path(file_path).parent)
        df = self.to_dataframe(info=info)
        df.to_csv(file_path, index=False)
        return df

    @classmethod
    def from_dataframe(cls,
                       df: pd.DateFrame,
                       info: DataFrameInfo = None,
                       attributes: WeatherAttributes = None) -> WeatherData:
        """
        Creates WeatherData object from the Pandas dataframe. The dataframe is expected to contain
        node ids, time steps and weather node weather time series as separate columns.

        Args:
            df: Dataframe containing nodes and weather time series (expected columns: node, step, value).
            info: (Optional) Dataframe info object describing dataframe columns and content.
            attributes: (Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

        Returns:
            WeatherData object.
        """
        if not isinstance(df, pd.DataFrame) or len(df) == 0:
            exception = TypeError if not isinstance(df, pd.DataFrame) else ValueError
            raise exception("df argument must be a non-empty pandas DataFrame")

        info = info or DataFrameInfo.detect_columns(df=df)
        nc, sc, vc = [info.node_column, info.step_column, info.value_column]

        # Test for "nan" values in target columns.
        for c in [nc, sc, vc]:
            if df[c].hasnans:
                raise ValueError(f"Column {c} contains 'NaN' values.")

        df = df[[nc, sc, vc]].sort_values(by=[nc, sc])
        df = df[[nc, vc]].set_index(nc)
        node_series = df.groupby(nc).apply(lambda r: r.to_dict('records')).to_dict()
        node_series = {node: [list(d.values())[0] for d in rw] for node, rw in node_series.items()}

        wd = cls.from_dict(node_series=node_series, attributes=attributes)
        return wd

    def to_dataframe(self, info: DataFrameInfo = None) -> pd.DataFrame:
        """
        Creates a dataframe containing node ids, time steps and weather time series as separate columns.

        Args:
            info: (Optional) Dataframe info object describing dataframe columns and content.

        Returns:
            Dataframe containing node ids and weather time series.
        """
        info = info or DataFrameInfo()
        data_dict = self.to_dict(only_unique_series=info.only_unique_series)

        actual_nodes = list(data_dict.keys())
        series_len = self.metadata.series_len
        nodes = np.repeat(actual_nodes, series_len)
        steps = list(range(1, series_len + 1)) * len(actual_nodes)
        values = np.array(list(data_dict.values())).reshape(len(data_dict) * self.metadata.series_len)

        assert len(nodes) == len(steps) == len(values), "Dataframe series lengths don't match"
        # assert all([n == nodes[0] for n in nodes[:series_len] and  == steps[series_len:series_len + 1],
        if len(data_dict) > 1:  # Skip validation in case of a single node.
            assert steps[:series_len] == steps[series_len:series_len * 2], "Steps series is not valid."

        column_series_dict = {info.node_column: nodes, info.step_column: steps, info.value_column: values}
        df = pd.DataFrame(column_series_dict)
        # Set data types
        df[info.node_column] = df[info.node_column].astype(int)
        df[info.step_column] = df[info.step_column].astype(int)
        df[info.value_column] = df[info.value_column].astype(np.float32)
        df.sort_values(by=[info.node_column, info.step_column])
        return df

    @classmethod
    def from_file(cls, file_path: Union[str, Path]) -> WeatherData:
        """
        Create WeatherData object by reading weather data from binary (.bin) and metadata (.bin.json) files.

        Args:
            file_path: The weather binary (.bin) file path. The metadata file path is constructed by appending ".json".

        Returns:
            WeatherData object.
        """
        file_path = str(file_path)
        wm: WeatherMetadata = WeatherMetadata.from_file(f"{file_path}.json")
        assert Path(file_path).is_file(), f"Data file not found: {file_path}."
        data = np.fromfile(file_path, dtype=np.float32)
        data_len = len(data)
        msg = f"Data length {data_len} doesn't match metadata"
        msg += f" ({wm.series_count} * {wm.series_len} = {wm.total_value_count})"
        assert wm.total_value_count == data_len, msg
        wd = WeatherData(data=data, metadata=wm)
        return wd

    def to_file(self, file_path: Union[str, Path]) -> NoReturn:
        """
        Create weather binary (.bin) and metadata (.json) files, containing weather data and metadata.

        Args:
            file_path: The weather binary (.bin) file path. The metadata file path is constructed by adding ".json".

        Returns:
            None.
        """
        file_path = str(file_path)
        self.validate()
        make_path(Path(file_path).parent)
        self._ensure_data_type(self._data)
        with open(file_path, "wb") as bf:
            self._data.reshape(self.metadata.total_value_count).tofile(bf)

        self._metadata.to_file(f"{file_path}.json")

    @classmethod
    def _ensure_data_type(cls, data: Iterable) -> np.ndarray[np.float32]:
        """
        Ensures node weather time series is of the type compatible with weather binary file format.
        The method validates the data object is iterable and if needed it converts it to the NumPy float32 array.

        Args:
            data: Iterable object containing node weather time series. Usually a list or array of floats values.

        Returns:
            Node weather time series as a NumPy float32 array.
        """
        is_iter_ok = isinstance(data, Iterable) and len(list(data)) > 0
        assert data is not None and is_iter_ok, "Data must have at least one item"
        data = np.array(data, dtype=np.float32)
        return data

data property

Raw data, reshaped in one row per node weather time series.

metadata property

Metadata property, exposing weather metadata object.

__eq__(other)

Equality operator for WeatherData objects.

Source code in emodpy_malaria/weather/weather_data.py
def __eq__(self, other: WeatherData):
    """Equality operator for WeatherData objects."""
    meta_eq = self.metadata == other.metadata
    data_eq = np.array_equal(self.data, other.data)
    return meta_eq and data_eq

__init__(data, metadata=None)

Instantiate a weather object from data numpy array and a weather metadata object.

Parameters:

Name Type Description Default
data ndarray

Numpy array of unique weather time series, in the order they appear in a .bin file. Shape can either be a single dimension array, or a 2d array having series stored as rows. This means that the number of rows corresponds to the number of unique series and number of columns corresponds to a series length (e.g. 365).

required
metadata WeatherMetadata

(Optional) WeatherMetadata object containing metadata from .bin.json.

None
Source code in emodpy_malaria/weather/weather_data.py
def __init__(self, data: np.ndarray, metadata: WeatherMetadata = None):
    """
    Instantiate a weather object from data numpy array and a weather metadata object.

    Args:
        data: Numpy array of unique weather time series, in the order they appear in a .bin file.
              Shape can either be a single dimension array, or a 2d array having series stored as rows.
              This means that the number of rows corresponds to the number of unique series and number of
              columns corresponds to a series length (e.g. 365).
        metadata: (Optional) WeatherMetadata object containing metadata from .bin.json.
    """
    data = self._ensure_data_type(data)
    self._data: np.ndarray = data

    if metadata is not None:
        # If metadata is provided ensure data shape matches metadata info
        self._metadata = metadata
        expected_shape = self._expected_shape()
        if data.shape != expected_shape:
            self._data = data.reshape(expected_shape)
    else:
        # If metadata object is not provided data must be in the correct shape.
        self._metadata = WeatherMetadata(node_ids=list(range(1, data.shape[0] + 1)), series_len=data.shape[1])

    self.validate()

from_csv(file_path, info=None, attributes=None) classmethod

Creates a WeatherData object from a csv file. Used for creating or editing weather files. The method identifies unique node weather time series and produces a corresponding node-offset dictionary.

Parameters:

Name Type Description Default
file_path Union[str, Path]

The csv file path from which weather data is loaded (expected columns: node, step, value).

required
info DataFrameInfo

(Optional) Dataframe info object describing dataframe columns and content.

None
attributes WeatherAttributes

(Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

None

Returns:

Type Description
WeatherData

WeatherData object.

Source code in emodpy_malaria/weather/weather_data.py
@classmethod
def from_csv(cls, file_path: Union[str, Path], info: DataFrameInfo = None, attributes: WeatherAttributes = None) -> WeatherData:
    """
    Creates a WeatherData object from a csv file. Used for creating or editing weather files.
    The method identifies unique node weather time series and produces a corresponding node-offset dictionary.

    Args:
        file_path: The csv file path from which weather data is loaded (expected columns: node, step, value).
        info: (Optional) Dataframe info object describing dataframe columns and content.
        attributes: (Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

    Returns:
        WeatherData object.
    """
    assert Path(file_path).is_file(), f"Weather file not found: {file_path}."
    df = pd.read_csv(file_path)
    wd = cls.from_dataframe(df, info=info, attributes=attributes)
    return wd

from_dataframe(df, info=None, attributes=None) classmethod

Creates WeatherData object from the Pandas dataframe. The dataframe is expected to contain node ids, time steps and weather node weather time series as separate columns.

Parameters:

Name Type Description Default
df DateFrame

Dataframe containing nodes and weather time series (expected columns: node, step, value).

required
info DataFrameInfo

(Optional) Dataframe info object describing dataframe columns and content.

None
attributes WeatherAttributes

(Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

None

Returns:

Type Description
WeatherData

WeatherData object.

Source code in emodpy_malaria/weather/weather_data.py
@classmethod
def from_dataframe(cls,
                   df: pd.DateFrame,
                   info: DataFrameInfo = None,
                   attributes: WeatherAttributes = None) -> WeatherData:
    """
    Creates WeatherData object from the Pandas dataframe. The dataframe is expected to contain
    node ids, time steps and weather node weather time series as separate columns.

    Args:
        df: Dataframe containing nodes and weather time series (expected columns: node, step, value).
        info: (Optional) Dataframe info object describing dataframe columns and content.
        attributes: (Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

    Returns:
        WeatherData object.
    """
    if not isinstance(df, pd.DataFrame) or len(df) == 0:
        exception = TypeError if not isinstance(df, pd.DataFrame) else ValueError
        raise exception("df argument must be a non-empty pandas DataFrame")

    info = info or DataFrameInfo.detect_columns(df=df)
    nc, sc, vc = [info.node_column, info.step_column, info.value_column]

    # Test for "nan" values in target columns.
    for c in [nc, sc, vc]:
        if df[c].hasnans:
            raise ValueError(f"Column {c} contains 'NaN' values.")

    df = df[[nc, sc, vc]].sort_values(by=[nc, sc])
    df = df[[nc, vc]].set_index(nc)
    node_series = df.groupby(nc).apply(lambda r: r.to_dict('records')).to_dict()
    node_series = {node: [list(d.values())[0] for d in rw] for node, rw in node_series.items()}

    wd = cls.from_dict(node_series=node_series, attributes=attributes)
    return wd

from_dict(node_series, same_nodes=None, attributes=None) classmethod

Creates a WeatherData object from a dictionary mapping nodes and node weather time series. The method identifies unique node weather time series and produces a corresponding node-offset dictionary.

Parameters:

Name Type Description Default
node_series Dict[int, Union[ndarray[float32], List[float]]]

Dictionary with node ids as keys and weather time series as values (don't have to be unique).

required
same_nodes Dict[int, List[int]]

(Optional) Dictionary, mapping nodes from 'node_series' dictionary to additional nodes which series are the same. Keys are node ids, values are lists of node ids.

None
attributes WeatherAttributes

(Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

None

Returns:

Type Description
WeatherData

WeatherData object.

Source code in emodpy_malaria/weather/weather_data.py
@classmethod
def from_dict(cls,
              node_series: Dict[int, Union[np.ndarray[np.float32], List[float]]],
              same_nodes: Dict[int, List[int]] = None,
              attributes: WeatherAttributes = None) -> WeatherData:
    """
    Creates a WeatherData object from a dictionary mapping nodes and node weather time series.
    The method identifies unique node weather time series and produces a corresponding node-offset dictionary.

    Args:
        node_series: Dictionary with node ids as keys and weather time series as values (don't have to be unique).
        same_nodes: (Optional) Dictionary, mapping nodes from 'node_series' dictionary to additional nodes
                               which series are the same. Keys are node ids, values are lists of node ids.
        attributes: (Optional) Attributes used to initiate weather metadata. If not provided, defaults are used.

    Returns:
        WeatherData object.
    """
    # Initialize
    if not isinstance(node_series, Dict) or len(node_series) == 0:
        exception = TypeError if not isinstance(node_series, Dict) else ValueError
        raise exception("The node time series argument must be a non-empty dictionary of node ids and time series.")

    # Check weather time series by converting to an array and validate shape
    try:
        series_values = np.array(list(node_series.values()), dtype=np.float32)
    except ValueError:
        raise ValueError("Time series contains values which are not numbers.")

    if np.any(np.isinf(np.abs(series_values))):
        raise ValueError("Time series contains 'inf' values which indicates failed conversion into np.float32.")

    if len(series_values.shape) != 2:
        raise ValueError("Time series must be a non-empty lists or array of float or integer values. "
                         "All time series must be of the same length.")

    # Check there are no NaN values in node ids
    if any(np.isnan(list(node_series))):
        raise ValueError("Node id list contains 'NaN' values.")

    # Check there are no NaN values in weather time series
    if any(np.isnan(series_values.reshape(-1))):    #
        raise ValueError("Time series contains 'NaN' values.")

    same_nodes = same_nodes or {}

    # Identify unique node weather time series, make sure node ids are int.
    node_series_hashes = {int(n): hash_series(s) for n, s in node_series.items()}       # Create node->hash dict
    unique_nodes = {h: nn[0] for h, nn in invert_dict(node_series_hashes).items()}      # Invert into hash->nodes
    unique_series = [node_series[n] for n in unique_nodes.values()]                     # List unique time series

    # Calculate offset increment per node as time series length x number of bytes per value
    offset_increment = series_values.shape[1] * SERIES_BYTE_VALUE_SIZE
    # Create node->offset dict, for nodes with unique weather time series
    node_offsets = {n: (i * offset_increment) for i, n in enumerate(unique_nodes.values())}
    # Update node->offset dict, add nodes sharing same offsets
    node_offsets.update({n: node_offsets[unique_nodes[h]] for n, h in node_series_hashes.items()})

    # Add other nodes, if specified
    # Invert dict from "unique node"->"list of nodes with that same offset" to "...same..."->"unique node"
    same_nodes = invert_dict(same_nodes, single_value=True)
    node_offsets.update({same: node_offsets[unique] for same, unique in same_nodes.items()})
    # Sort by node, offset
    node_offsets = dict(sorted(node_offsets.items()))

    # Convert the list of weather timeseries into a NumPy array and init WeatherMetadata and WeatherData objects
    data = np.array(unique_series, dtype=np.float32)
    wm = WeatherMetadata(node_ids=node_offsets, series_len=data.shape[1], attributes=attributes)
    wd = WeatherData(data=data, metadata=wm)

    return wd

from_file(file_path) classmethod

Create WeatherData object by reading weather data from binary (.bin) and metadata (.bin.json) files.

Parameters:

Name Type Description Default
file_path Union[str, Path]

The weather binary (.bin) file path. The metadata file path is constructed by appending ".json".

required

Returns:

Type Description
WeatherData

WeatherData object.

Source code in emodpy_malaria/weather/weather_data.py
@classmethod
def from_file(cls, file_path: Union[str, Path]) -> WeatherData:
    """
    Create WeatherData object by reading weather data from binary (.bin) and metadata (.bin.json) files.

    Args:
        file_path: The weather binary (.bin) file path. The metadata file path is constructed by appending ".json".

    Returns:
        WeatherData object.
    """
    file_path = str(file_path)
    wm: WeatherMetadata = WeatherMetadata.from_file(f"{file_path}.json")
    assert Path(file_path).is_file(), f"Data file not found: {file_path}."
    data = np.fromfile(file_path, dtype=np.float32)
    data_len = len(data)
    msg = f"Data length {data_len} doesn't match metadata"
    msg += f" ({wm.series_count} * {wm.series_len} = {wm.total_value_count})"
    assert wm.total_value_count == data_len, msg
    wd = WeatherData(data=data, metadata=wm)
    return wd

to_csv(file_path, info=None)

Creates a csv file and stores node ids, time steps and weather node weather time series as separate columns.

Parameters:

Name Type Description Default
file_path Union[str, Path]

The csv file path into which weather data will be stored.

required
info DataFrameInfo

(Optional) Dataframe info object describing dataframe columns and content.

None

Returns:

Type Description
DataFrame

Dataframe created as an intermediate object used to save data to a csv file.

Source code in emodpy_malaria/weather/weather_data.py
def to_csv(self, file_path: Union[str, Path], info: DataFrameInfo = None) -> pd.DataFrame:
    """
    Creates a csv file and stores node ids, time steps and weather node weather time series as separate columns.

    Args:
        file_path: The csv file path into which weather data will be stored.
        info: (Optional) Dataframe info object describing dataframe columns and content.

    Returns:
        Dataframe created as an intermediate object used to save data to a csv file.
    """
    make_path(Path(file_path).parent)
    df = self.to_dataframe(info=info)
    df.to_csv(file_path, index=False)
    return df

to_dataframe(info=None)

Creates a dataframe containing node ids, time steps and weather time series as separate columns.

Parameters:

Name Type Description Default
info DataFrameInfo

(Optional) Dataframe info object describing dataframe columns and content.

None

Returns:

Type Description
DataFrame

Dataframe containing node ids and weather time series.

Source code in emodpy_malaria/weather/weather_data.py
def to_dataframe(self, info: DataFrameInfo = None) -> pd.DataFrame:
    """
    Creates a dataframe containing node ids, time steps and weather time series as separate columns.

    Args:
        info: (Optional) Dataframe info object describing dataframe columns and content.

    Returns:
        Dataframe containing node ids and weather time series.
    """
    info = info or DataFrameInfo()
    data_dict = self.to_dict(only_unique_series=info.only_unique_series)

    actual_nodes = list(data_dict.keys())
    series_len = self.metadata.series_len
    nodes = np.repeat(actual_nodes, series_len)
    steps = list(range(1, series_len + 1)) * len(actual_nodes)
    values = np.array(list(data_dict.values())).reshape(len(data_dict) * self.metadata.series_len)

    assert len(nodes) == len(steps) == len(values), "Dataframe series lengths don't match"
    # assert all([n == nodes[0] for n in nodes[:series_len] and  == steps[series_len:series_len + 1],
    if len(data_dict) > 1:  # Skip validation in case of a single node.
        assert steps[:series_len] == steps[series_len:series_len * 2], "Steps series is not valid."

    column_series_dict = {info.node_column: nodes, info.step_column: steps, info.value_column: values}
    df = pd.DataFrame(column_series_dict)
    # Set data types
    df[info.node_column] = df[info.node_column].astype(int)
    df[info.step_column] = df[info.step_column].astype(int)
    df[info.value_column] = df[info.value_column].astype(np.float32)
    df.sort_values(by=[info.node_column, info.step_column])
    return df

to_dict(only_unique_series=False, copy_data=True)

Create a node-to-series dictionary from the current object. This method can be used to edit weather data.

Parameters:

Name Type Description Default
only_unique_series bool

(Optional) A flag controlling whether the output dictionary will contain series for all nodes (if set to true) or only unique series.

False
copy_data bool

(Optional) Flag indicating whether to copy data numpy array to prevent unintentional changes.

True

Returns: A dictionary with node ids and keys and node weather time series as values.

Source code in emodpy_malaria/weather/weather_data.py
def to_dict(self, only_unique_series: bool = False, copy_data: bool = True) -> Dict[int, np.ndarray[np.float32]]:
    """
    Create a node-to-series dictionary from the current object. This method can be used to edit weather data.

    Args:
        only_unique_series: (Optional) A flag controlling whether the output dictionary will contain series for all
                            nodes (if set to true) or only unique series.
        copy_data: (Optional) Flag indicating whether to copy data numpy array to prevent unintentional changes.
    Returns:
        A dictionary with node ids and keys and node weather time series as values.
    """
    data_dict = {}
    node_groups = self.metadata.offset_nodes.values()
    series_list = np.copy(self._data) if copy_data else self._data
    for ng, s in zip(node_groups, series_list):
        ng = ng[:1] if only_unique_series else ng
        data_dict.update(dict(zip(ng, [s] * len(ng))))

    data_dict = dict(sorted(data_dict.items()))

    return data_dict

to_file(file_path)

Create weather binary (.bin) and metadata (.json) files, containing weather data and metadata.

Parameters:

Name Type Description Default
file_path Union[str, Path]

The weather binary (.bin) file path. The metadata file path is constructed by adding ".json".

required

Returns:

Type Description
NoReturn

None.

Source code in emodpy_malaria/weather/weather_data.py
def to_file(self, file_path: Union[str, Path]) -> NoReturn:
    """
    Create weather binary (.bin) and metadata (.json) files, containing weather data and metadata.

    Args:
        file_path: The weather binary (.bin) file path. The metadata file path is constructed by adding ".json".

    Returns:
        None.
    """
    file_path = str(file_path)
    self.validate()
    make_path(Path(file_path).parent)
    self._ensure_data_type(self._data)
    with open(file_path, "wb") as bf:
        self._data.reshape(self.metadata.total_value_count).tofile(bf)

    self._metadata.to_file(f"{file_path}.json")

validate()

Validate data and metadata relationship.

Source code in emodpy_malaria/weather/weather_data.py
def validate(self):
    """Validate data and metadata relationship."""
    expected_shape = self._expected_shape()
    assert self._data.shape == expected_shape, "Data numpy array shape is not matching metadata counts."