Skip to content

weather_set

Weather set module is implementing functionality for working with sets of weather files.

WeatherSet

Representation of a set of weather files required by EMOD, for all or a subset of weather variables. Automate tasks for working with multiple weather files using WeatherData and WeatherMetadata objects. WeatherSet contains a dictionary of weather variables to WeatherData and WeatherMetadata objects. Supports: 1. Conversion from/to csv, dataframe (from_csv, to_csv, from_dataframe, to_dataframe) 2. Conversion from/to EMOD weather files, .bin and .bin.json (from_file, to_file)

Source code in emodpy_malaria/weather/weather_set.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
class WeatherSet:
    """
    Representation of a set of weather files required by EMOD, for all or a subset of weather variables.
    Automate tasks for working with multiple weather files using WeatherData and WeatherMetadata objects.
    WeatherSet contains a dictionary of weather variables to WeatherData and WeatherMetadata objects.
    Supports:
    1. Conversion from/to csv, dataframe (from_csv, to_csv, from_dataframe, to_dataframe)
    2. Conversion from/to EMOD weather files, .bin and .bin.json (from_file, to_file)
    """

    def __init__(self,
                 dir_path: Union[str, Path] = None,
                 file_names: Dict[WeatherVariable, str] = None,
                 weather_columns: Dict[WeatherVariable, str] = None):
        """
        Initializes a WeatherSet object.

        Args:
            dir_path: (Optional) Path to the directory containing weather files.
            file_names: (Optional) Dictionary of weather variables (keys) and file names (values).
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                             Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

        """
        self._dir_path: Union[str, Path] = dir_path
        self._file_names: Dict[WeatherVariable, str] = file_names or {}
        self._weather_columns: Dict[WeatherVariable, str] = weather_columns or {}
        self._weather_dict: Dict[WeatherVariable, WeatherData] = {}

    # Dictionary methods

    def __getitem__(self, weather_variable: WeatherVariable):
        """Getter method for the weather dictionary, to return WeatherData object for the given weather variable."""
        return self._weather_dict[weather_variable]

    def __setitem__(self, weather_variable: WeatherVariable, weather_object: WeatherData):
        """Setter method for the weather dictionary, to set WeatherData object for the given weather variable."""
        self._weather_dict[weather_variable] = weather_object

    def __len__(self):
        """Method to return the number of items in the weather dictionary."""
        return len(self._weather_dict)

    def __str__(self):
        """String representation used to print or debug WeatherSet objects."""
        return str(self.weather_variables)

    def __eq__(self, other: WeatherSet):
        """Equality operator for WeatherSet objects"""
        if self.weather_variables != other.weather_variables:
            return False
        data_eq = [self[v] == other[v] for v in self.weather_variables]
        return all(data_eq)

    def keys(self):
        """Returns the list of WeatherVariables."""
        return self._weather_dict.keys()

    def values(self) -> List[WeatherData]:
        """Returns the list of WeatherData objects."""
        return list(self._weather_dict.values())

    def items(self) -> Dict[WeatherVariable, WeatherData].items:
        """Returns an iterator for weather dictionary items."""
        return self._weather_dict.items()

    # Properties

    @property
    def dir_path(self) -> str:
        """Directory path containing weather files."""
        return str(self._dir_path)

    @property
    def file_names(self) -> Dict[WeatherVariable, str]:
        """Dictionary of weather variables (keys) and weather file names (values)."""
        return self._file_names

    @property
    def attributes(self) -> WeatherAttributes:
        if len(self.weather_variables) > 0:  # if any extract WeatherAttributes (common to all)
            wa = self.values()[0].metadata.attributes
        else:
            wa = None

        return wa

    @property
    def weather_variables(self) -> List[WeatherVariable]:
        """The list of weather variables the weather set covers."""
        return list(self._weather_dict)

    @property
    def weather_columns(self) -> Dict[WeatherVariable, str]:
        """The list of weather columns."""
        return self._weather_columns

    # Export/import

    @classmethod
    def from_dataframe(cls,
                       df: pd.DateFrame,
                       node_column: str = None,
                       step_column: str = None,
                       weather_columns: Dict[WeatherVariable, str] = None,
                       attributes: WeatherAttributes = None) -> WeatherSet:
        """
        Initializes WeatherSet object from a dataframe containing weather time series.
        The dataframe must have node ids, step and weather columns.

        Args:
            df: Dataframe containing weather data.
            node_column: (Optional) Column containing node ids. The default is "nodes".
            step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
            attributes: (Optional) Weather attribute object containing metadata for WeatherMetadata object.

        Returns:
            WeatherSet object.
        """
        assert isinstance(df, pd.DataFrame), f"Unsupported dataframe argument type {type(df)}."
        args = {k: v for k, v in locals().items() if k not in ["cls", "df"]}
        args["data_csv"] = df
        return cls._from_csv_data(**args)

    @classmethod
    def from_csv(cls,
                 file_path: Union[str, Path],
                 node_column: str = None,
                 step_column: str = None,
                 weather_columns: Dict[WeatherVariable, str] = None,
                 attributes: WeatherAttributes = None) -> WeatherSet:
        """
        Initializes WeatherSet object from a dataframe containing weather time series.
        The csv file must have node ids, step and weather columns.

        Args:
            file_path: The csv file path.
            node_column: (Optional) Column containing node ids. The default is "nodes".
            step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
            attributes: (Optional) The weather attribute object containing metadata for WeatherMetadata object.

        Returns:
            WeatherSet object.
        """
        assert Path(file_path).is_file(), f"The csv file not found: {str(file_path)}."
        args = {k: v for k, v in locals().items() if k not in ["cls", "file_path"]}
        args["data_csv"] = str(file_path)
        return cls._from_csv_data(**args)

    @classmethod
    def _from_csv_data(cls,
                       data_csv: Union[str, pd.DataFrame],
                       node_column: str = None,
                       step_column: str = None,
                       weather_columns: Dict[WeatherVariable, str] = None,
                       attributes: WeatherAttributes = None) -> WeatherSet:
        """
         Creates WeatherSet from a csv file or dataframe by instantiating WeatherData object for each weather variable.
         Column arguments are used to interpret input file/dataframe. Weather attribute argument is used for
         instantiating weather metadata objects.

        Args:
            data_csv: Dataframe or a csv file containing weather time series.
            node_column: (Optional) Column containing node ids. The default is "nodes".
            step_column: (Optional) Column containing node index for weather time series values. The default is "steps". The default is "steps".
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
            attributes: (Optional) The weather attribute object containing metadata for WeatherMetadata object.

        Returns:
            WeatherSet object.
        """
        # Obtain dataframe info objects, to name dataframe columns
        infos, weather_columns = cls._init_dataframe_info_dict(node_column, step_column, weather_columns)
        # Construct the final weather column dictionary (relevant if weather_columns was None or None column names)
        attributes = attributes or WeatherAttributes()
        ws = WeatherSet(weather_columns=weather_columns)
        for v, info in infos.items():
            if isinstance(data_csv, str):
                ws[v] = WeatherData.from_csv(file_path=data_csv, info=info, attributes=attributes)
            elif isinstance(data_csv, pd.DataFrame):
                ws[v] = WeatherData.from_dataframe(df=data_csv, info=info, attributes=attributes)
            else:
                raise TypeError(f"Unsupported argument type {type(data_csv)}. Only string or dataframe are expected.")

        ws.validate()
        return ws

    def to_dataframe(self,
                     node_column: str = None,
                     step_column: str = None,
                     weather_columns: Dict[WeatherVariable, str] = None) -> pd.DataFrame:
        """
        Creates a dataframe containing node ids, time steps and weather columns.

        Args:
            node_column: (Optional) Column containing node ids. The default is "nodes".
            step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
        Returns:
            Dataframe containing node ids and weather time series.
        """
        # If no columns, init keys to filter variables
        weather_columns = weather_columns or {v: None for v in self.weather_variables}
        not_available = [v for v in weather_columns if v.value not in [w.value for w in self.weather_variables]]

        if len(not_available) > 0:
            raise ValueError(f"weather_columns contain unavailable weather variables: {not_available}")

        # Obtain dataframe info objects, to name dataframe columns
        infos, weather_columns = self._init_dataframe_info_dict(node_column, step_column, weather_columns)
        self._weather_columns = weather_columns
        df = None                                   # used to collect all weather columns in a single df
        for v in infos:                             # for each dataframe info (weather variable)
            df2 = self[v].to_dataframe(infos[v])    # get dataframe for current weather variable
            if df is None:                          # if first iteration
                df = df2                            # init outer dataframe
            else:                                   # if 2nd or higher iteration
                col = infos[v].value_column         # take column name
                df[col] = df2[col]                  # add weather column to the outer dataframe

        return df

    def to_csv(self,
               file_path: Union[str, Path],
               node_column: str = None,
               step_column: str = None,
               weather_columns: Dict[WeatherVariable, str] = None) -> pd.DataFrame:
        """
        Creates a csv file containing node ids, time steps and weather columns.

        Args:
            file_path: The path of a csv file to be generated.
            node_column: (Optional) Column containing node ids. The default is "nodes".
            step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

        Returns:
            Dataframe containing node ids and weather time series, used to create the csv file.
        """
        df = self.to_dataframe(node_column, step_column, weather_columns)
        df.to_csv(file_path, index=False)
        return df

    # Save/load DTK files

    def _load(self) -> WeatherSet:
        """Loads weather files based on weather set attributes."""
        assert self.dir_path and Path(self.dir_path).is_dir(), "A valid dir is a required argument."
        assert isinstance(self.file_names, Dict) and len(self.file_names) > 0, "File names dictionary is required."
        for v, n in self.file_names.items():
            bin_path = self._weather_file_path(n)
            self[v] = WeatherData.from_file(bin_path)

        self.validate()

        return self

    def _save(self) -> NoReturn:
        """Saves weather data and metadata into weather files based on weather set attributes."""
        assert self._dir_path, "Directory is a required argument."
        assert self._file_names and len(self._file_names) > 0, "File names are required."

        make_path(self._dir_path)
        for v, wd in self._weather_dict.items():
            bin_path = self._weather_file_path(self._file_names[v])
            wd.to_file(bin_path)
            wd.metadata.to_file(f"{bin_path}.json")

    @classmethod
    def from_files(cls,
                   dir_path: Union[str, Path],
                   prefix: str = "",
                   file_names: Dict[WeatherVariable, str] = None) -> WeatherSet:
        """
        Instantiates WeatherSet from to weather files which paths are determined based on given arguments.

        Args:
            dir_path: Directory path containing weather files.
            prefix: Weather files prefix, e.g. "dtk_15arcmin"
            file_names: Dictionary of weather variables (keys) and weather .bin file names (values).

        Returns:
            WeatherSet object.
        """
        WeatherVariable.validate_types(file_names, [str, Path])
        file_names = file_names or cls.select_weather_files(dir_path=dir_path, prefix=prefix)
        ws = WeatherSet(dir_path=dir_path, file_names=file_names)
        ws._load()

        return ws

    def to_files(self,
                 dir_path: Union[str, Path],
                 file_names: Dict[WeatherVariable, str] = None) -> NoReturn:
        """Saves WeatherSet to weather files which paths are determined based on given arguments."""
        file_names = file_names or self.make_file_paths()
        self._dir_path = Path(dir_path)
        self._file_names = file_names
        self._save()

    # Helpers

    @classmethod
    def _init_weather_columns(cls, weather_columns: Dict[WeatherVariable, Union[str, None]] = None
                              ) -> Dict[WeatherVariable, str]:
        """
        Initializes a weather_columns dictionary from defaults or a partially populated weather_columns dictionary.
        The following cases are supported in respect to weather_columns argument:
        - all columns names are specified -> returns unchanged weather_columns
        - some columns names are None: column names are set to WeatherVariable values.
        - weather_columns is  None: all weather columns are set to WeatherVariable values.

        Args:
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
        Returns:
            Dictionary of weather variables (keys) to weather column names (values).
        """
        WeatherVariable.validate_types(weather_columns, [str, None])
        # Get the list of weather variables - keys from weather_columns or all weather variables
        weather_variables = list(weather_columns) if weather_columns else WeatherVariable.list()
        # If not provided set to empty dict - this will make the following line set WeatherVariable values as defaults.
        weather_columns = weather_columns or {}
        # Transform or construct weather_columns dictionary and fill in missing column names with defaults.
        weather_columns = {v: weather_columns.get(v, None) or v.value for v in weather_variables}
        return weather_columns

    @classmethod
    def _init_dataframe_info_dict(cls,
                                  node_column: str = None,
                                  step_column: str = None,
                                  weather_columns: Dict[WeatherVariable, str] = None
                                  ) -> Tuple[Dict[WeatherVariable, DataFrameInfo], Dict[WeatherVariable, str]]:
        """
        Initializes dataframe info objects containing column names.

        Args:
            node_column: (Optional) Column containing node ids. The default is "nodes".
            step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
            weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
        Returns:
            Tuple of two dictionaries mapping weather variables to dataframe info and weather columns.
        """
        weather_columns = cls._init_weather_columns(weather_columns)
        info_dict = {}
        for v in weather_columns:
            info_dict[v] = DataFrameInfo(node_column=node_column,
                                         step_column=step_column,
                                         value_column=weather_columns[v])

        return info_dict, weather_columns

    @classmethod
    def _make_file_templates(cls,
                             prefix: str = "*",
                             suffix: str = "*{}*.bin",
                             weather_variables: List[WeatherVariable] = None,
                             weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
        """
        Construct file name templates using weather file name prefix/suffix and weather variable names.
        The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
        convenient for working with weather files:
        - if prefix/suffix are not specified, defaults are used (see method arguments).
        - if suffix doesn't end with ".bin" or "*", "*.bin" is added (since, otherwise, no matches can be found).

        Used for two scenarios:
        1. Get expected weather file patsh.
        2. Select weather files from a dir, when exact names are not known, e.g. Path.glob("dtk_*{tag}*.bin").

        Args:
            prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk_15arcmin".
            suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "*{tag}*.bin").
            weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
            weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
                In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.

        Returns:
            Dictionary of weather variables (keys) and weather file name templates.
                For example, air temperature could be represented as:
                - exact name:   WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin" or
                - name pattern: WeatherVariable.AIR_TEMPERATURE: "dtk_air_temperature.bin"
        """
        # Validate arguments
        if prefix is None:
            raise ValueError("Prefix cannot be None.")

        if suffix is None:
            raise ValueError("fFile pattern cannot be None.")

        WeatherVariable.validate_types(weather_names, [str])

        is_ok = weather_variables is None or isinstance(weather_variables, List) and len(weather_variables) > 0
        assert is_ok, "If specified weather variables must be a nonempty list."

        # Append *.bin if missing
        if not suffix.endswith(".bin") and not suffix.endswith("*"):
            suffix += "*.bin"

        template = prefix + suffix
        template = template.replace("**", "*")

        # Init default weather name dictionary, if not provided.
        weather_variables = weather_variables or WeatherVariable.list()
        weather_names = weather_names or {v: v.name.lower() for v in weather_variables}

        names = {}
        # Create dictionary of weather variable and file name templates
        for v, t in weather_names.items():
            names[v] = template.format(weather_names[v])

        return names

    @classmethod
    def make_file_paths(cls,
                        dir_path: Union[str, Path] = None,
                        prefix: str = "dtk_15arcmin_",
                        suffix: str = "{}_daily.bin",
                        weather_variables: List[WeatherVariable] = None,
                        weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
        """
        Construct file paths using the weather directory path, file name prefix/suffix and weather variable names.
        The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
        convenient for working with weather files:
        - if prefix/suffix are not specified, defaults are used (see method arguments).
        - if suffix doesn't end with ".bin" or "\\*", "\\*.bin" is added (since, otherwise, no matches can be found).

        Args:
            dir_path: (Optional) Directory path containing weather files.
            prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk_15arcmin".
            suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "\\*{tag}\\*.bin").
            weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
            weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
                In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.

        Returns:
            Dictionary of weather variables (keys) and weather file paths.
                For example, air temperature could be represented as:
                WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin"
        """
        names = cls._make_file_templates(prefix=prefix,
                                         suffix=suffix,
                                         weather_names=weather_names,
                                         weather_variables=weather_variables)

        if dir_path is not None:
            names = {v: str(Path(dir_path).joinpath(n)) for v, n in names.items()}

        return names

    @classmethod
    def select_weather_files(cls,
                             dir_path: Union[str, Path],
                             prefix: str = "*",
                             suffix: str = "*{}*.bin",
                             weather_variables: List[WeatherVariable] = None,
                             weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
        """
        Select a set of weather files using the weather directory path, file name prefix/suffix and weather variable names.
        The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
        convenient for working with weather files:
        - if prefix/suffix are not specified, defaults are used (see method arguments).
        - if suffix doesn't end with ".bin" or "\\*", "\\*.bin" is added (since, otherwise, no matches can be found).

        Args:
            dir_path: (Optional) Directory path containing weather files.
            prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk_15arcmin".
            suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "\\*{tag}\\*.bin").
            weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
            weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
                In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.

         Returns:
             Dictionary of weather variables (keys) and weather file names.
             For example, WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin"
         """
        assert dir_path is not None, "Directory path cannot be None."
        templates = cls._make_file_templates(prefix=prefix,
                                             suffix=suffix,
                                             weather_names=weather_names,
                                             weather_variables=weather_variables)
        names = {}
        # Use name patterns to pick up files via Path.glob().
        for v, pattern in templates.items():
            files = list(Path(dir_path).glob(pattern))
            assert len(files) < 2, f"More than one weather file matches name pattern {pattern}"
            if len(files) == 1:
                names[v] = files[0].name

        return names

    def _weather_file_path(self, file_name: Union[str, Path]) -> Path:
        """Construct a weather file path."""
        return Path(self.dir_path).joinpath(str(file_name))

    def validate(self) -> NoReturn:
        """Validate WeatherSet object."""

        series_len0: Union[int, None] = None
        node_count0: Union[int, None] = None
        if_reference0: Union[str, None] = None
        resolution0: Union[str, None] = None
        years0: Union[str, None] = None

        for v, wd in self._weather_dict.items():
            wm = wd.metadata
            # Validate each weather data and metadata object
            wd.validate()
            wd.metadata.validate()

            # Validate weather objects consistency
            series_len = wm.series_len
            node_count = wm.node_count
            if_reference = wm.id_reference
            resolution = wm.spatial_resolution
            years = wm.data_years
            # total_values = wm.total_value_count

            series_len0 = series_len0 or series_len
            node_count0 = node_count0 or node_count
            if_reference0 = if_reference0 or if_reference
            resolution0 = resolution0 or resolution
            years0 = years0 or years
            # total_values0 = total_values0 or total_values

            file_name = f": {self.file_names[v]}(.json)" if v in self.file_names else ""
            msg = "WeatherSet {} mismatch for " + str(v) + file_name
            assert series_len0 == series_len, msg.format("series_len")
            assert node_count0 == node_count, msg.format("node_count")
            assert if_reference0 == if_reference, msg.format("if_reference")
            assert resolution0 == resolution, msg.format("resolution")
            assert years0 == years, msg.format("data years")
            # assert total_values0 == total_values, msg.format("total_values")

        # Validate that if weather columns are specified they match weather set variables.
        if len(self._weather_columns) > 0:
            for v in WeatherVariable.list():
                both_has = v in self._weather_dict and v in self._weather_columns
                none_has = v not in self._weather_dict and v not in self._weather_columns
                assert both_has or none_has, ""

dir_path property

Directory path containing weather files.

file_names property

Dictionary of weather variables (keys) and weather file names (values).

weather_columns property

The list of weather columns.

weather_variables property

The list of weather variables the weather set covers.

__eq__(other)

Equality operator for WeatherSet objects

Source code in emodpy_malaria/weather/weather_set.py
def __eq__(self, other: WeatherSet):
    """Equality operator for WeatherSet objects"""
    if self.weather_variables != other.weather_variables:
        return False
    data_eq = [self[v] == other[v] for v in self.weather_variables]
    return all(data_eq)

__getitem__(weather_variable)

Getter method for the weather dictionary, to return WeatherData object for the given weather variable.

Source code in emodpy_malaria/weather/weather_set.py
def __getitem__(self, weather_variable: WeatherVariable):
    """Getter method for the weather dictionary, to return WeatherData object for the given weather variable."""
    return self._weather_dict[weather_variable]

__init__(dir_path=None, file_names=None, weather_columns=None)

Initializes a WeatherSet object.

Parameters:

Name Type Description Default
dir_path Union[str, Path]

(Optional) Path to the directory containing weather files.

None
file_names Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and file names (values).

None
weather_columns Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and weather column names (values). Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

None
Source code in emodpy_malaria/weather/weather_set.py
def __init__(self,
             dir_path: Union[str, Path] = None,
             file_names: Dict[WeatherVariable, str] = None,
             weather_columns: Dict[WeatherVariable, str] = None):
    """
    Initializes a WeatherSet object.

    Args:
        dir_path: (Optional) Path to the directory containing weather files.
        file_names: (Optional) Dictionary of weather variables (keys) and file names (values).
        weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
                         Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

    """
    self._dir_path: Union[str, Path] = dir_path
    self._file_names: Dict[WeatherVariable, str] = file_names or {}
    self._weather_columns: Dict[WeatherVariable, str] = weather_columns or {}
    self._weather_dict: Dict[WeatherVariable, WeatherData] = {}

__len__()

Method to return the number of items in the weather dictionary.

Source code in emodpy_malaria/weather/weather_set.py
def __len__(self):
    """Method to return the number of items in the weather dictionary."""
    return len(self._weather_dict)

__setitem__(weather_variable, weather_object)

Setter method for the weather dictionary, to set WeatherData object for the given weather variable.

Source code in emodpy_malaria/weather/weather_set.py
def __setitem__(self, weather_variable: WeatherVariable, weather_object: WeatherData):
    """Setter method for the weather dictionary, to set WeatherData object for the given weather variable."""
    self._weather_dict[weather_variable] = weather_object

__str__()

String representation used to print or debug WeatherSet objects.

Source code in emodpy_malaria/weather/weather_set.py
def __str__(self):
    """String representation used to print or debug WeatherSet objects."""
    return str(self.weather_variables)

from_csv(file_path, node_column=None, step_column=None, weather_columns=None, attributes=None) classmethod

Initializes WeatherSet object from a dataframe containing weather time series. The csv file must have node ids, step and weather columns.

Parameters:

Name Type Description Default
file_path Union[str, Path]

The csv file path.

required
node_column str

(Optional) Column containing node ids. The default is "nodes".

None
step_column str

(Optional) Column containing node index for weather time series values. The default is "steps".

None
weather_columns Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and weather column names (values). Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

None
attributes WeatherAttributes

(Optional) The weather attribute object containing metadata for WeatherMetadata object.

None

Returns:

Type Description
WeatherSet

WeatherSet object.

Source code in emodpy_malaria/weather/weather_set.py
@classmethod
def from_csv(cls,
             file_path: Union[str, Path],
             node_column: str = None,
             step_column: str = None,
             weather_columns: Dict[WeatherVariable, str] = None,
             attributes: WeatherAttributes = None) -> WeatherSet:
    """
    Initializes WeatherSet object from a dataframe containing weather time series.
    The csv file must have node ids, step and weather columns.

    Args:
        file_path: The csv file path.
        node_column: (Optional) Column containing node ids. The default is "nodes".
        step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
        weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
            Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
        attributes: (Optional) The weather attribute object containing metadata for WeatherMetadata object.

    Returns:
        WeatherSet object.
    """
    assert Path(file_path).is_file(), f"The csv file not found: {str(file_path)}."
    args = {k: v for k, v in locals().items() if k not in ["cls", "file_path"]}
    args["data_csv"] = str(file_path)
    return cls._from_csv_data(**args)

from_dataframe(df, node_column=None, step_column=None, weather_columns=None, attributes=None) classmethod

Initializes WeatherSet object from a dataframe containing weather time series. The dataframe must have node ids, step and weather columns.

Parameters:

Name Type Description Default
df DateFrame

Dataframe containing weather data.

required
node_column str

(Optional) Column containing node ids. The default is "nodes".

None
step_column str

(Optional) Column containing node index for weather time series values. The default is "steps".

None
weather_columns Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and weather column names (values). Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

None
attributes WeatherAttributes

(Optional) Weather attribute object containing metadata for WeatherMetadata object.

None

Returns:

Type Description
WeatherSet

WeatherSet object.

Source code in emodpy_malaria/weather/weather_set.py
@classmethod
def from_dataframe(cls,
                   df: pd.DateFrame,
                   node_column: str = None,
                   step_column: str = None,
                   weather_columns: Dict[WeatherVariable, str] = None,
                   attributes: WeatherAttributes = None) -> WeatherSet:
    """
    Initializes WeatherSet object from a dataframe containing weather time series.
    The dataframe must have node ids, step and weather columns.

    Args:
        df: Dataframe containing weather data.
        node_column: (Optional) Column containing node ids. The default is "nodes".
        step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
        weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
            Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
        attributes: (Optional) Weather attribute object containing metadata for WeatherMetadata object.

    Returns:
        WeatherSet object.
    """
    assert isinstance(df, pd.DataFrame), f"Unsupported dataframe argument type {type(df)}."
    args = {k: v for k, v in locals().items() if k not in ["cls", "df"]}
    args["data_csv"] = df
    return cls._from_csv_data(**args)

from_files(dir_path, prefix='', file_names=None) classmethod

Instantiates WeatherSet from to weather files which paths are determined based on given arguments.

Parameters:

Name Type Description Default
dir_path Union[str, Path]

Directory path containing weather files.

required
prefix str

Weather files prefix, e.g. "dtk_15arcmin"

''
file_names Dict[WeatherVariable, str]

Dictionary of weather variables (keys) and weather .bin file names (values).

None

Returns:

Type Description
WeatherSet

WeatherSet object.

Source code in emodpy_malaria/weather/weather_set.py
@classmethod
def from_files(cls,
               dir_path: Union[str, Path],
               prefix: str = "",
               file_names: Dict[WeatherVariable, str] = None) -> WeatherSet:
    """
    Instantiates WeatherSet from to weather files which paths are determined based on given arguments.

    Args:
        dir_path: Directory path containing weather files.
        prefix: Weather files prefix, e.g. "dtk_15arcmin"
        file_names: Dictionary of weather variables (keys) and weather .bin file names (values).

    Returns:
        WeatherSet object.
    """
    WeatherVariable.validate_types(file_names, [str, Path])
    file_names = file_names or cls.select_weather_files(dir_path=dir_path, prefix=prefix)
    ws = WeatherSet(dir_path=dir_path, file_names=file_names)
    ws._load()

    return ws

items()

Returns an iterator for weather dictionary items.

Source code in emodpy_malaria/weather/weather_set.py
def items(self) -> Dict[WeatherVariable, WeatherData].items:
    """Returns an iterator for weather dictionary items."""
    return self._weather_dict.items()

keys()

Returns the list of WeatherVariables.

Source code in emodpy_malaria/weather/weather_set.py
def keys(self):
    """Returns the list of WeatherVariables."""
    return self._weather_dict.keys()

make_file_paths(dir_path=None, prefix='dtk_15arcmin_', suffix='{}_daily.bin', weather_variables=None, weather_names=None) classmethod

Construct file paths using the weather directory path, file name prefix/suffix and weather variable names. The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more convenient for working with weather files: - if prefix/suffix are not specified, defaults are used (see method arguments). - if suffix doesn't end with ".bin" or "*", "*.bin" is added (since, otherwise, no matches can be found).

Parameters:

Name Type Description Default
dir_path Union[str, Path]

(Optional) Directory path containing weather files.

None
prefix str

(Optional) Weather file name prefix, usually a fixed string like "dtk_15arcmin".

'dtk_15arcmin_'
suffix str

(Optional) Weather file name suffix, usually containing a weather variable name parameter like "*{tag}*.bin").

'{}_daily.bin'
weather_names Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and custom weather variable names (values).

None
weather_variables List[WeatherVariable]

(Optional) Weather variables to be used in case custom weather names are not specified. In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.

None

Returns:

Type Description
Dict[WeatherVariable, str]

Dictionary of weather variables (keys) and weather file paths. For example, air temperature could be represented as: WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin"

Source code in emodpy_malaria/weather/weather_set.py
@classmethod
def make_file_paths(cls,
                    dir_path: Union[str, Path] = None,
                    prefix: str = "dtk_15arcmin_",
                    suffix: str = "{}_daily.bin",
                    weather_variables: List[WeatherVariable] = None,
                    weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
    """
    Construct file paths using the weather directory path, file name prefix/suffix and weather variable names.
    The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
    convenient for working with weather files:
    - if prefix/suffix are not specified, defaults are used (see method arguments).
    - if suffix doesn't end with ".bin" or "\\*", "\\*.bin" is added (since, otherwise, no matches can be found).

    Args:
        dir_path: (Optional) Directory path containing weather files.
        prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk_15arcmin".
        suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "\\*{tag}\\*.bin").
        weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
        weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
            In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.

    Returns:
        Dictionary of weather variables (keys) and weather file paths.
            For example, air temperature could be represented as:
            WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin"
    """
    names = cls._make_file_templates(prefix=prefix,
                                     suffix=suffix,
                                     weather_names=weather_names,
                                     weather_variables=weather_variables)

    if dir_path is not None:
        names = {v: str(Path(dir_path).joinpath(n)) for v, n in names.items()}

    return names

select_weather_files(dir_path, prefix='*', suffix='*{}*.bin', weather_variables=None, weather_names=None) classmethod

Select a set of weather files using the weather directory path, file name prefix/suffix and weather variable names. The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more convenient for working with weather files: - if prefix/suffix are not specified, defaults are used (see method arguments). - if suffix doesn't end with ".bin" or "*", "*.bin" is added (since, otherwise, no matches can be found).

Parameters:

Name Type Description Default
dir_path Union[str, Path]

(Optional) Directory path containing weather files.

required
prefix str

(Optional) Weather file name prefix, usually a fixed string like "dtk_15arcmin".

'*'
suffix str

(Optional) Weather file name suffix, usually containing a weather variable name parameter like "*{tag}*.bin").

'*{}*.bin'
weather_names Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and custom weather variable names (values).

None
weather_variables List[WeatherVariable]

(Optional) Weather variables to be used in case custom weather names are not specified. In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.

None

Returns: Dictionary of weather variables (keys) and weather file names. For example, WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin"

Source code in emodpy_malaria/weather/weather_set.py
@classmethod
def select_weather_files(cls,
                         dir_path: Union[str, Path],
                         prefix: str = "*",
                         suffix: str = "*{}*.bin",
                         weather_variables: List[WeatherVariable] = None,
                         weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
    """
    Select a set of weather files using the weather directory path, file name prefix/suffix and weather variable names.
    The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
    convenient for working with weather files:
    - if prefix/suffix are not specified, defaults are used (see method arguments).
    - if suffix doesn't end with ".bin" or "\\*", "\\*.bin" is added (since, otherwise, no matches can be found).

    Args:
        dir_path: (Optional) Directory path containing weather files.
        prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk_15arcmin".
        suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "\\*{tag}\\*.bin").
        weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
        weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
            In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.

     Returns:
         Dictionary of weather variables (keys) and weather file names.
         For example, WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin"
     """
    assert dir_path is not None, "Directory path cannot be None."
    templates = cls._make_file_templates(prefix=prefix,
                                         suffix=suffix,
                                         weather_names=weather_names,
                                         weather_variables=weather_variables)
    names = {}
    # Use name patterns to pick up files via Path.glob().
    for v, pattern in templates.items():
        files = list(Path(dir_path).glob(pattern))
        assert len(files) < 2, f"More than one weather file matches name pattern {pattern}"
        if len(files) == 1:
            names[v] = files[0].name

    return names

to_csv(file_path, node_column=None, step_column=None, weather_columns=None)

Creates a csv file containing node ids, time steps and weather columns.

Parameters:

Name Type Description Default
file_path Union[str, Path]

The path of a csv file to be generated.

required
node_column str

(Optional) Column containing node ids. The default is "nodes".

None
step_column str

(Optional) Column containing node index for weather time series values. The default is "steps".

None
weather_columns Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and weather column names (values). Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

None

Returns:

Type Description
DataFrame

Dataframe containing node ids and weather time series, used to create the csv file.

Source code in emodpy_malaria/weather/weather_set.py
def to_csv(self,
           file_path: Union[str, Path],
           node_column: str = None,
           step_column: str = None,
           weather_columns: Dict[WeatherVariable, str] = None) -> pd.DataFrame:
    """
    Creates a csv file containing node ids, time steps and weather columns.

    Args:
        file_path: The path of a csv file to be generated.
        node_column: (Optional) Column containing node ids. The default is "nodes".
        step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
        weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
            Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

    Returns:
        Dataframe containing node ids and weather time series, used to create the csv file.
    """
    df = self.to_dataframe(node_column, step_column, weather_columns)
    df.to_csv(file_path, index=False)
    return df

to_dataframe(node_column=None, step_column=None, weather_columns=None)

Creates a dataframe containing node ids, time steps and weather columns.

Parameters:

Name Type Description Default
node_column str

(Optional) Column containing node ids. The default is "nodes".

None
step_column str

(Optional) Column containing node index for weather time series values. The default is "steps".

None
weather_columns Dict[WeatherVariable, str]

(Optional) Dictionary of weather variables (keys) and weather column names (values). Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".

None

Returns: Dataframe containing node ids and weather time series.

Source code in emodpy_malaria/weather/weather_set.py
def to_dataframe(self,
                 node_column: str = None,
                 step_column: str = None,
                 weather_columns: Dict[WeatherVariable, str] = None) -> pd.DataFrame:
    """
    Creates a dataframe containing node ids, time steps and weather columns.

    Args:
        node_column: (Optional) Column containing node ids. The default is "nodes".
        step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
        weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
            Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
    Returns:
        Dataframe containing node ids and weather time series.
    """
    # If no columns, init keys to filter variables
    weather_columns = weather_columns or {v: None for v in self.weather_variables}
    not_available = [v for v in weather_columns if v.value not in [w.value for w in self.weather_variables]]

    if len(not_available) > 0:
        raise ValueError(f"weather_columns contain unavailable weather variables: {not_available}")

    # Obtain dataframe info objects, to name dataframe columns
    infos, weather_columns = self._init_dataframe_info_dict(node_column, step_column, weather_columns)
    self._weather_columns = weather_columns
    df = None                                   # used to collect all weather columns in a single df
    for v in infos:                             # for each dataframe info (weather variable)
        df2 = self[v].to_dataframe(infos[v])    # get dataframe for current weather variable
        if df is None:                          # if first iteration
            df = df2                            # init outer dataframe
        else:                                   # if 2nd or higher iteration
            col = infos[v].value_column         # take column name
            df[col] = df2[col]                  # add weather column to the outer dataframe

    return df

to_files(dir_path, file_names=None)

Saves WeatherSet to weather files which paths are determined based on given arguments.

Source code in emodpy_malaria/weather/weather_set.py
def to_files(self,
             dir_path: Union[str, Path],
             file_names: Dict[WeatherVariable, str] = None) -> NoReturn:
    """Saves WeatherSet to weather files which paths are determined based on given arguments."""
    file_names = file_names or self.make_file_paths()
    self._dir_path = Path(dir_path)
    self._file_names = file_names
    self._save()

validate()

Validate WeatherSet object.

Source code in emodpy_malaria/weather/weather_set.py
def validate(self) -> NoReturn:
    """Validate WeatherSet object."""

    series_len0: Union[int, None] = None
    node_count0: Union[int, None] = None
    if_reference0: Union[str, None] = None
    resolution0: Union[str, None] = None
    years0: Union[str, None] = None

    for v, wd in self._weather_dict.items():
        wm = wd.metadata
        # Validate each weather data and metadata object
        wd.validate()
        wd.metadata.validate()

        # Validate weather objects consistency
        series_len = wm.series_len
        node_count = wm.node_count
        if_reference = wm.id_reference
        resolution = wm.spatial_resolution
        years = wm.data_years
        # total_values = wm.total_value_count

        series_len0 = series_len0 or series_len
        node_count0 = node_count0 or node_count
        if_reference0 = if_reference0 or if_reference
        resolution0 = resolution0 or resolution
        years0 = years0 or years
        # total_values0 = total_values0 or total_values

        file_name = f": {self.file_names[v]}(.json)" if v in self.file_names else ""
        msg = "WeatherSet {} mismatch for " + str(v) + file_name
        assert series_len0 == series_len, msg.format("series_len")
        assert node_count0 == node_count, msg.format("node_count")
        assert if_reference0 == if_reference, msg.format("if_reference")
        assert resolution0 == resolution, msg.format("resolution")
        assert years0 == years, msg.format("data years")
        # assert total_values0 == total_values, msg.format("total_values")

    # Validate that if weather columns are specified they match weather set variables.
    if len(self._weather_columns) > 0:
        for v in WeatherVariable.list():
            both_has = v in self._weather_dict and v in self._weather_columns
            none_has = v not in self._weather_dict and v not in self._weather_columns
            assert both_has or none_has, ""

values()

Returns the list of WeatherData objects.

Source code in emodpy_malaria/weather/weather_set.py
def values(self) -> List[WeatherData]:
    """Returns the list of WeatherData objects."""
    return list(self._weather_dict.values())