Commit b57db286 authored by lukas leufen's avatar lukas leufen

Merge branch 'develop' into 'release_v1.1.0'

include Develop

See merge request !192
parents 79658233 80dd9f4f
Pipeline #52393 passed with stages
in 13 minutes and 58 seconds
join_settings.py
\ No newline at end of file
join_settings.py
join_rest
\ No newline at end of file
__author__ = "Lukas Leufen"
__date__ = '2020-06-25'
from mlair.helpers.statistics import TransformationClass
DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
......@@ -13,8 +14,7 @@ DEFAULT_START = "1997-01-01"
DEFAULT_END = "2017-12-31"
DEFAULT_WINDOW_HISTORY_SIZE = 13
DEFAULT_OVERWRITE_LOCAL_DATA = False
# DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"}
DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise"}
DEFAULT_TRANSFORMATION = TransformationClass(inputs_method="standardise", targets_method="standardise")
DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin)
DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute).
DEFAULT_CREATE_NEW_MODEL = True
......@@ -46,13 +46,13 @@ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True
DEFAULT_EVALUATE_BOOTSTRAPS = True
DEFAULT_CREATE_NEW_BOOTSTRAPS = False
DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
#DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
# "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
# "PlotAvailability"]
DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore",
DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
"PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
"PlotAvailability"]
"PlotAvailability", "PlotSeparationOfScales"]
DEFAULT_SAMPLING = "daily"
DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA",
"temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
"pm10": "", "so2": ""}
def get_defaults():
......
......@@ -20,7 +20,7 @@ def prepare_host(create_new=True, data_path=None, sampling="daily") -> str:
:param create_new: Create new path if enabled
:param data_path: Parse your custom path (and therefore ignore preset paths fitting to known hosts)
:param sampling: sampling rate to separate data physically by temporal resolution
:param sampling: sampling rate to separate data physically by temporal resolution (deprecated)
:return: full path to data
"""
......@@ -32,17 +32,14 @@ def prepare_host(create_new=True, data_path=None, sampling="daily") -> str:
data_path = f"/home/{user}/Data/toar_{sampling}/"
elif hostname == "zam347":
data_path = f"/home/{user}/Data/toar_{sampling}/"
elif hostname == "linux-aa9b":
data_path = f"/home/{user}/mlair/data/toar_{sampling}/"
elif (len(hostname) > 2) and (hostname[:2] == "jr"):
data_path = f"/p/project/cjjsc42/{user}/DATA/toar_{sampling}/"
elif (len(hostname) > 2) and (hostname[:2] in ['jw', 'ju'] or hostname[:5] in ['hdfml']):
data_path = f"/p/project/deepacf/intelliaq/{user}/DATA/toar_{sampling}/"
data_path = f"/p/project/deepacf/intelliaq/{user}/DATA/MLAIR/"
elif runner_regex.match(hostname) is not None:
data_path = f"/home/{user}/mlair/data/toar_{sampling}/"
data_path = f"/home/{user}/mlair/data/"
else:
data_path = os.path.join(os.getcwd(), "data", sampling)
# raise OSError(f"unknown host '{hostname}'")
data_path = os.path.join(os.getcwd(), "data")
if not os.path.exists(data_path):
try:
......@@ -97,7 +94,7 @@ def set_experiment_name(name: str = None, sampling: str = None) -> str:
return experiment_name
def set_bootstrap_path(bootstrap_path: str, data_path: str, sampling: str) -> str:
def set_bootstrap_path(bootstrap_path: str, data_path: str) -> str:
"""
Set path for bootstrap input data.
......@@ -105,12 +102,11 @@ def set_bootstrap_path(bootstrap_path: str, data_path: str, sampling: str) -> st
:param bootstrap_path: custom path to store bootstrap data
:param data_path: path of data for default bootstrap path
:param sampling: sampling rate to add, if path is set to default
:return: full bootstrap path
"""
if bootstrap_path is None:
bootstrap_path = os.path.join(data_path, "..", f"bootstrap_{sampling}")
bootstrap_path = os.path.join(data_path, "bootstrap")
check_path_and_create(bootstrap_path)
return os.path.abspath(bootstrap_path)
......
......@@ -13,4 +13,4 @@ from .bootstraps import BootStraps
from .iterator import KerasIterator, DataCollection
from .default_data_handler import DefaultDataHandler
from .abstract_data_handler import AbstractDataHandler
from .data_preparation_neighbors import DataHandlerNeighbors
from .data_handler_neighbors import DataHandlerNeighbors
......@@ -27,7 +27,10 @@ class AbstractDataHandler:
@classmethod
def own_args(cls, *args):
return remove_items(inspect.getfullargspec(cls).args, ["self"] + list(args))
"""Return all arguments (including kwonlyargs)."""
arg_spec = inspect.getfullargspec(cls)
list_of_args = arg_spec.args + arg_spec.kwonlyargs
return remove_items(list_of_args, ["self"] + list(args))
@classmethod
def transformation(cls, *args, **kwargs):
......
......@@ -10,15 +10,18 @@ import datetime as dt
from mlair.data_handler import AbstractDataHandler
from typing import Union, List
from typing import Union, List, Tuple, Dict
import logging
from functools import reduce
from mlair.helpers.join import EmptyQueryResult
from mlair.helpers import TimeTracking
number = Union[float, int]
num_or_list = Union[number, List[number]]
def run_data_prep():
from .data_preparation_neighbors import DataHandlerNeighbors
from .data_handler_neighbors import DataHandlerNeighbors
data = DummyDataHandler("main_class")
data.get_X()
data.get_Y()
......@@ -33,8 +36,7 @@ def run_data_prep():
def create_data_prep():
from .data_preparation_neighbors import DataHandlerNeighbors
from .data_handler_neighbors import DataHandlerNeighbors
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
station_type = None
network = 'UBA'
......@@ -98,7 +100,7 @@ class DummyDataHandler(AbstractDataHandler):
if __name__ == "__main__":
from mlair.data_handler.station_preparation import DataHandlerSingleStation
from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
from mlair.data_handler.iterator import KerasIterator, DataCollection
data_prep = create_data_prep()
data_collection = DataCollection(data_prep)
......
"""Data Handler using kz-filtered data."""
__author__ = 'Lukas Leufen'
__date__ = '2020-08-26'
import inspect
import numpy as np
import pandas as pd
import xarray as xr
from typing import List, Union
from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
from mlair.data_handler import DefaultDataHandler
from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
from mlair.helpers.statistics import KolmogorovZurbenkoFilterMovingWindow as KZFilter
# define a more general date type for type hinting
str_or_list = Union[str, List[str]]
class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
"""Data handler for a single station to be used by a superior data handler. Inputs are kz filtered."""
_requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
def __init__(self, *args, kz_filter_length, kz_filter_iter, **kwargs):
self._check_sampling(**kwargs)
# self.original_data = None # ToDo: implement here something to store unfiltered data
self.kz_filter_length = to_list(kz_filter_length)
self.kz_filter_iter = to_list(kz_filter_iter)
self.cutoff_period = None
self.cutoff_period_days = None
super().__init__(*args, **kwargs)
def _check_sampling(self, **kwargs):
assert kwargs.get("sampling") == "hourly" # This data handler requires hourly data resolution
def setup_samples(self):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
"""
data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
self.station_type, self.network, self.store_data_locally, self.data_origin)
self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
limit=self.interpolation_limit)
self.set_inputs_and_targets()
self.apply_kz_filter()
# this is just a code snippet to check the results of the kz filter
# import matplotlib
# matplotlib.use("TkAgg")
# import matplotlib.pyplot as plt
# self.input_data.data.sel(filter="74d", variables="temp", Stations="DEBW107").plot()
# self.input_data.data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
if self.do_transformation is True:
self.call_transform()
self.make_samples()
@TimeTrackingWrapper
def apply_kz_filter(self):
"""Apply kolmogorov zurbenko filter only on inputs."""
kz = KZFilter(self.input_data.data, wl=self.kz_filter_length, itr=self.kz_filter_iter, filter_dim="datetime")
filtered_data: List[xr.DataArray] = kz.run()
self.cutoff_period = kz.period_null()
self.cutoff_period_days = kz.period_null_days()
self.input_data.data = xr.concat(filtered_data, pd.Index(self.create_filter_index(), name="filter"))
def create_filter_index(self) -> pd.Index:
"""
Round cut off periods in days and append 'res' for residuum index.
Round small numbers (<10) to single decimal, and higher numbers to int. Transform as list of str and append
'res' for residuum index.
"""
index = np.round(self.cutoff_period_days, 1)
f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1)
index = list(map(f, index.tolist()))
index = list(map(lambda x: str(x) + "d", index)) + ["res"]
return pd.Index(index, name="filter")
def get_transposed_history(self) -> xr.DataArray:
"""Return history.
:return: history with dimensions datetime, window, Stations, variables.
"""
return self.history.transpose("datetime", "window", "Stations", "variables", "filter").copy()
class DataHandlerKzFilter(DefaultDataHandler):
"""Data handler using kz filtered data."""
data_handler = DataHandlerKzFilterSingleStation
data_handler_transformation = DataHandlerKzFilterSingleStation
_requirements = data_handler.requirements()
__author__ = 'Lukas Leufen'
__date__ = '2020-11-05'
from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
from mlair.data_handler.data_handler_kz_filter import DataHandlerKzFilterSingleStation
from mlair.data_handler import DefaultDataHandler
from mlair import helpers
from mlair.helpers import remove_items
from mlair.configuration.defaults import DEFAULT_SAMPLING
import inspect
from typing import Callable
import datetime as dt
import numpy as np
import pandas as pd
import xarray as xr
class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation):
_requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
def __init__(self, *args, sampling_inputs, **kwargs):
sampling = (sampling_inputs, kwargs.get("sampling", DEFAULT_SAMPLING))
kwargs.update({"sampling": sampling})
super().__init__(*args, **kwargs)
def setup_samples(self):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
"""
self._data = list(map(self.load_and_interpolate, [0, 1])) # load input (0) and target (1) data
self.set_inputs_and_targets()
if self.do_transformation is True:
self.call_transform()
self.make_samples()
def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
data, self.meta = self.load_data(self.path[ind], self.station, self.statistics_per_var, self.sampling[ind],
self.station_type, self.network, self.store_data_locally, self.data_origin,
self.start, self.end)
data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
limit=self.interpolation_limit)
return data
def set_inputs_and_targets(self):
inputs = self._data[0].sel({self.target_dim: helpers.to_list(self.variables)})
targets = self._data[1].sel({self.target_dim: self.target_var})
self.input_data.data = inputs
self.target_data.data = targets
def setup_data_path(self, data_path, sampling):
"""Sets two paths instead of single path. Expects sampling arg to be a list with two entries"""
assert len(sampling) == 2
return list(map(lambda x: super(__class__, self).setup_data_path(data_path, x), sampling))
class DataHandlerMixedSampling(DefaultDataHandler):
"""Data handler using mixed sampling for input and target."""
data_handler = DataHandlerMixedSamplingSingleStation
data_handler_transformation = DataHandlerMixedSamplingSingleStation
_requirements = data_handler.requirements()
class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSingleStation,
DataHandlerKzFilterSingleStation):
_requirements1 = DataHandlerKzFilterSingleStation.requirements()
_requirements2 = DataHandlerMixedSamplingSingleStation.requirements()
_requirements = list(set(_requirements1 + _requirements2))
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _check_sampling(self, **kwargs):
assert kwargs.get("sampling") == ("hourly", "daily")
def setup_samples(self):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
A KZ filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values
with daily resolution.
"""
self._data = list(map(self.load_and_interpolate, [0, 1])) # load input (0) and target (1) data
self.set_inputs_and_targets()
self.apply_kz_filter()
if self.do_transformation is True:
self.call_transform()
self.make_samples()
def estimate_filter_width(self):
"""
f = 0.5 / (len * sqrt(itr)) -> T = 1 / f
:return:
"""
return int(self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2)
@staticmethod
def _add_time_delta(date, delta):
new_date = dt.datetime.strptime(date, "%Y-%m-%d") + dt.timedelta(hours=delta)
return new_date.strftime("%Y-%m-%d")
def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
if ind == 0: # for inputs
estimated_filter_width = self.estimate_filter_width()
start = self._add_time_delta(self.start, -estimated_filter_width)
end = self._add_time_delta(self.end, estimated_filter_width)
else: # target
start, end = self.start, self.end
data, self.meta = self.load_data(self.path[ind], self.station, self.statistics_per_var, self.sampling[ind],
self.station_type, self.network, self.store_data_locally, self.data_origin,
start, end)
data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
limit=self.interpolation_limit)
return data
class DataHandlerMixedSamplingWithFilter(DefaultDataHandler):
"""Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
data_handler = DataHandlerMixedSamplingWithFilterSingleStation
data_handler_transformation = DataHandlerMixedSamplingWithFilterSingleStation
_requirements = data_handler.requirements()
class DataHandlerMixedSamplingSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFilterSingleStation):
"""
Data handler using mixed sampling for input and target. Inputs are temporal filtered and depending on the
separation frequency of a filtered time series the time step delta for input data is adjusted (see image below).
.. image:: ../../../../../_source/_plots/separation_of_scales.png
:width: 400
"""
_requirements = DataHandlerMixedSamplingWithFilterSingleStation.requirements()
def __init__(self, *args, time_delta=np.sqrt, **kwargs):
assert isinstance(time_delta, Callable)
self.time_delta = time_delta
super().__init__(*args, **kwargs)
def make_history_window(self, dim_name_of_inputs: str, window: int, dim_name_of_shift: str) -> None:
"""
Create a xr.DataArray containing history data.
Shift the data window+1 times and return a xarray which has a new dimension 'window' containing the shifted
data. This is used to represent history in the data. Results are stored in history attribute.
:param dim_name_of_inputs: Name of dimension which contains the input variables
:param window: number of time steps to look back in history
Note: window will be treated as negative value. This should be in agreement with looking back on
a time line. Nonetheless positive values are allowed but they are converted to its negative
expression
:param dim_name_of_shift: Dimension along shift will be applied
"""
window = -abs(window)
data = self.input_data.data
self.history = self.stride(data, dim_name_of_shift, window)
def stride(self, data: xr.DataArray, dim: str, window: int) -> xr.DataArray:
# this is just a code snippet to check the results of the kz filter
# import matplotlib
# matplotlib.use("TkAgg")
# import matplotlib.pyplot as plt
# xr.concat(res, dim="filter").sel({"variables":"temp", "Stations":"DEBW107", "datetime":"2010-01-01T00:00:00"}).plot.line(hue="filter")
time_deltas = np.round(self.time_delta(self.cutoff_period)).astype(int)
start, end = window, 1
res = []
window_array = self.create_index_array('window', range(start, end), squeeze_dim=self.target_dim)
for delta, filter_name in zip(np.append(time_deltas, 1), data.coords["filter"]):
res_filter = []
data_filter = data.sel({"filter": filter_name})
for w in range(start, end):
res_filter.append(data_filter.shift({dim: -w * delta}))
res_filter = xr.concat(res_filter, dim=window_array).chunk()
res.append(res_filter)
res = xr.concat(res, dim="filter")
return res
def estimate_filter_width(self):
"""
Attention: this method returns the maximum value of
* either estimated filter width f = 0.5 / (len * sqrt(itr)) -> T = 1 / f or
* time delta method applied on the estimated filter width mupliplied by window_history_size
to provide a sufficiently wide filter width.
"""
est = self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2
return int(max([self.time_delta(est) * self.window_history_size, est]))
class DataHandlerMixedSamplingSeparationOfScales(DefaultDataHandler):
"""Data handler using mixed sampling for input and target. Inputs are temporal filtered and different time step
sizes are applied in relation to frequencies."""
data_handler = DataHandlerMixedSamplingSeparationOfScalesSingleStation
data_handler_transformation = DataHandlerMixedSamplingSeparationOfScalesSingleStation
_requirements = data_handler.requirements()
......@@ -4,9 +4,9 @@ __date__ = '2020-07-17'
from mlair.helpers import to_list
from mlair.data_handler.station_preparation import DataHandlerSingleStation
from mlair.data_handler import DefaultDataHandler
import os
import copy
from typing import Union, List
......@@ -15,6 +15,7 @@ num_or_list = Union[number, List[number]]
class DataHandlerNeighbors(DefaultDataHandler):
"""Data handler including neighboring stations."""
def __init__(self, id_class, data_path, neighbors=None, min_length=0,
extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False):
......@@ -24,14 +25,14 @@ class DataHandlerNeighbors(DefaultDataHandler):
@classmethod
def build(cls, station, **kwargs):
sp_keys = {k: kwargs[k] for k in cls._requirements if k in kwargs}
sp = DataHandlerSingleStation(station, **sp_keys)
sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
sp = cls.data_handler(station, **sp_keys)
n_list = []
for neighbor in kwargs.get("neighbors", []):
n_list.append(DataHandlerSingleStation(neighbor, **sp_keys))
n_list.append(cls.data_handler(neighbor, **sp_keys))
else:
kwargs["neighbors"] = n_list if len(n_list) > 0 else None
dp_args = {k: kwargs[k] for k in cls.own_args("id_class") if k in kwargs}
dp_args = {k: copy.deepcopy(kwargs[k]) for k in cls.own_args("id_class") if k in kwargs}
return cls(sp, **dp_args)
def _create_collection(self):
......
......@@ -4,6 +4,7 @@ __date__ = '2020-09-21'
import copy
import inspect
import gc
import logging
import os
import pickle
......@@ -15,7 +16,6 @@ import numpy as np
import xarray as xr
from mlair.data_handler.abstract_data_handler import AbstractDataHandler
from mlair.data_handler.station_preparation import DataHandlerSingleStation
from mlair.helpers import remove_items, to_list
from mlair.helpers.join import EmptyQueryResult
......@@ -25,11 +25,14 @@ num_or_list = Union[number, List[number]]
class DefaultDataHandler(AbstractDataHandler):
from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation as data_handler
from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation as data_handler_transformation
_requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
_requirements = remove_items(inspect.getfullargspec(data_handler).args, ["self", "station"])
def __init__(self, id_class: DataHandlerSingleStation, data_path: str, min_length: int = 0,
extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None):
def __init__(self, id_class: data_handler, experiment_path: str, min_length: int = 0,
extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None,
store_processed_data=True):
super().__init__()
self.id_class = id_class
self.interpolation_dim = "datetime"
......@@ -39,16 +42,16 @@ class DefaultDataHandler(AbstractDataHandler):
self._X_extreme = None
self._Y_extreme = None
_name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self))
self._save_file = os.path.join(data_path, f"data_preparation_{_name_affix}.pickle")
self._save_file = os.path.join(experiment_path, "data", f"{_name_affix}.pickle")
self._collection = self._create_collection()
self.harmonise_X()
self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolation_dim)
self._store(fresh_store=True)
self._store(fresh_store=True, store_processed_data=store_processed_data)
@classmethod
def build(cls, station: str, **kwargs):
sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
sp = DataHandlerSingleStation(station, **sp_keys)
sp = cls.data_handler(station, **sp_keys)
dp_args = {k: copy.deepcopy(kwargs[k]) for k in cls.own_args("id_class") if k in kwargs}
return cls(sp, **dp_args)
......@@ -61,6 +64,7 @@ class DefaultDataHandler(AbstractDataHandler):
def _reset_data(self):
self._X, self._Y, self._X_extreme, self._Y_extreme = None, None, None, None
gc.collect()
def _cleanup(self):
directory = os.path.dirname(self._save_file)
......@@ -69,13 +73,14 @@ class DefaultDataHandler(AbstractDataHandler):
if os.path.exists(self._save_file):
shutil.rmtree(self._save_file, ignore_errors=True)
def _store(self, fresh_store=False):
self._cleanup() if fresh_store is True else None
data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
with open(self._save_file, "wb") as f:
pickle.dump(data, f)
logging.debug(f"save pickle data to {self._save_file}")
self._reset_data()
def _store(self, fresh_store=False, store_processed_data=True):
if store_processed_data is True:
self._cleanup() if fresh_store is True else None
data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
with open(self._save_file, "wb") as f:
pickle.dump(data, f)
logging.debug(f"save pickle data to {self._save_file}")
self._reset_data()
def _load(self):
try:
......@@ -140,7 +145,7 @@ class DefaultDataHandler(AbstractDataHandler):
return self.id_class.observation.copy().squeeze()
def get_transformation_Y(self):
return self.id_class.get_transformation_information()
return self.id_class.get_transformation_targets()
def multiply_extremes(self, extreme_values: num_or_list = 1., extremes_on_right_tail_only: bool = False,
timedelta: Tuple[int, str] = (1, 'm'), dim="datetime"):
......@@ -212,27 +217,55 @@ class DefaultDataHandler(AbstractDataHandler):
@classmethod
def transformation(cls, set_stations, **kwargs):
"""
### supported transformation methods
Currently supported methods are:
* standardise (default, if method is not given)
* centre
### mean and std estimation
Mean and std (depending on method) are estimated. For each station, mean and std are calculated and afterwards