Commit 7f4ee48c authored by lukas leufen's avatar lukas leufen

Merge branch 'lukas_issue151_refac_rename-interpolation-dim-with-time-dim'...

Merge branch 'lukas_issue151_refac_rename-interpolation-dim-with-time-dim' into 'lukas_issue144_feat_workflow-with-advanced-data-handling'

Resolve "rename interpolation dimension by time dimension"

See merge request !131
parents dbc30525 29dc24b0
Pipeline #41989 passed with stages
in 5 minutes and 57 seconds
......@@ -29,7 +29,7 @@ DEFAULT_TARGET_VAR = "o3"
DEFAULT_TARGET_DIM = "variables"
DEFAULT_WINDOW_LEAD_TIME = 3
DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]}
DEFAULT_INTERPOLATION_DIM = "datetime"
DEFAULT_TIME_DIM = "datetime"
DEFAULT_INTERPOLATION_METHOD = "linear"
DEFAULT_LIMIT_NAN_FILL = 1
DEFAULT_TRAIN_START = "1997-01-01"
......
......@@ -53,7 +53,7 @@ if __name__ == "__main__":
"sampling": 'daily',
"target_dim": 'variables',
"target_var": 'o3',
"interpolation_dim": 'datetime',
"time_dim": 'datetime',
"window_history_size": 7,
"window_lead_time": 3,
"neighbors": ["DEBW034"],
......
......@@ -39,7 +39,7 @@ class AbstractStationPrep(object):
class StationPrep(AbstractStationPrep):
def __init__(self, station, data_path, statistics_per_var, station_type, network, sampling,
target_dim, target_var, interpolation_dim, window_history_size, window_lead_time,
target_dim, target_var, time_dim, window_history_size, window_lead_time,
overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
min_length: int = 0, start=None, end=None, **kwargs):
super().__init__() # path, station, statistics_per_var, transformation, **kwargs)
......@@ -53,7 +53,7 @@ class StationPrep(AbstractStationPrep):
self.sampling = sampling
self.target_dim = target_dim
self.target_var = target_var
self.interpolation_dim = interpolation_dim
self.time_dim = time_dim
self.window_history_size = window_history_size
self.window_lead_time = window_lead_time
self.overwrite_local_data = overwrite_local_data
......@@ -99,7 +99,7 @@ class StationPrep(AbstractStationPrep):
f"statistics_per_var={self.statistics_per_var}, " \
f"station_type='{self.station_type}', network='{self.network}', " \
f"sampling='{self.sampling}', target_dim='{self.target_dim}', target_var='{self.target_var}', " \
f"interpolate_dim='{self.interpolation_dim}', window_history_size={self.window_history_size}, " \
f"time_dim='{self.time_dim}', window_history_size={self.window_history_size}, " \
f"window_lead_time={self.window_lead_time}, overwrite_local_data={self.overwrite_local_data}, " \
f"transformation={self._print_transformation_as_string}, **{self.kwargs})"
......@@ -144,7 +144,7 @@ class StationPrep(AbstractStationPrep):
return coords.rename(index={"station_lon": "lon", "station_lat": "lat"}).to_dict()[str(self)]
def call_transform(self, inverse=False):
self.transform(dim=self.interpolation_dim, method=self.transformation["method"],
self.transform(dim=self.time_dim, method=self.transformation["method"],
mean=self.transformation['mean'], std=self.transformation["std"],
min_val=self.transformation["min"], max_val=self.transformation["max"],
inverse=inverse
......@@ -164,10 +164,10 @@ class StationPrep(AbstractStationPrep):
self.make_samples()
def make_samples(self):
self.make_history_window(self.target_dim, self.window_history_size, self.interpolation_dim)
self.make_labels(self.target_dim, self.target_var, self.interpolation_dim, self.window_lead_time)
self.make_observation(self.target_dim, self.target_var, self.interpolation_dim)
self.remove_nan(self.interpolation_dim)
self.make_history_window(self.target_dim, self.window_history_size, self.time_dim)
self.make_labels(self.target_dim, self.target_var, self.time_dim, self.window_lead_time)
self.make_observation(self.target_dim, self.target_var, self.time_dim)
self.remove_nan(self.time_dim)
def read_data_from_disk(self, source_name=""):
"""
......@@ -658,13 +658,13 @@ if __name__ == "__main__":
sp = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
statistics_per_var=statistics_per_var, station_type='background',
network='UBA', sampling='daily', target_dim='variables', target_var='o3',
interpolation_dim='datetime', window_history_size=7, window_lead_time=3,
time_dim='datetime', window_history_size=7, window_lead_time=3,
) # transformation={'method': 'standardise'})
# sp.set_transformation({'method': 'standardise', 'mean': sp.mean+2, 'std': sp.std+1})
sp2 = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
statistics_per_var=statistics_per_var, station_type='background',
network='UBA', sampling='daily', target_dim='variables', target_var='o3',
interpolation_dim='datetime', window_history_size=7, window_lead_time=3,
time_dim='datetime', window_history_size=7, window_lead_time=3,
transformation={'method': 'standardise'})
sp2.transform(inverse=True)
sp.get_X()
......
......@@ -13,7 +13,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \
DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \
DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \
DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATION_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \
DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \
DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \
DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
......@@ -66,7 +66,7 @@ class ExperimentSetup(RunEnvironment):
# interpolation
self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']})
self._set_param("interpolation_dim", interpolation_dim, default='datetime')
self._set_param("time_dim", time_dim, default='datetime')
self._set_param("interpolation_method", interpolation_method, default='linear')
self._set_param("limit_nan_fill", limit_nan_fill, default=1)
......@@ -140,7 +140,7 @@ class ExperimentSetup(RunEnvironment):
:param window_lead_time: number of time steps to predict by model (default 3). Time steps `t_0+1` to `t_0+w` are
predicted.
:param dimensions:
:param interpolation_dim:
:param time_dim:
:param interpolation_method:
:param limit_nan_fill:
:param train_start:
......@@ -220,7 +220,7 @@ class ExperimentSetup(RunEnvironment):
target_dim=None,
window_lead_time: int = None,
dimensions=None,
interpolation_dim=None,
time_dim=None,
interpolation_method=None,
limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None,
test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None,
......@@ -309,7 +309,7 @@ class ExperimentSetup(RunEnvironment):
# interpolation
self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS)
self._set_param("interpolation_dim", interpolation_dim, default=DEFAULT_INTERPOLATION_DIM)
self._set_param("time_dim", time_dim, default=DEFAULT_TIME_DIM)
self._set_param("interpolation_method", interpolation_method, default=DEFAULT_INTERPOLATION_METHOD)
self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL)
......
......@@ -264,7 +264,7 @@ class PostProcessing(RunEnvironment):
path = self.data_store.get("forecast_path")
plot_list = self.data_store.get("plot_list", "postprocessing")
time_dimension = self.data_store.get("interpolation_dim")
time_dimension = self.data_store.get("time_dim")
if self.bootstrap_skill_scores is not None and "PlotBootstrapSkillScore" in plot_list:
PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path, model_setup="CNN")
......@@ -317,7 +317,7 @@ class PostProcessing(RunEnvironment):
be found inside `forecast_path`.
"""
logging.debug("start make_prediction")
time_dimension = self.data_store.get("interpolation_dim")
time_dimension = self.data_store.get("time_dim")
for i, data in enumerate(self.test_data):
input_data = data.get_X()
target_data = data.get_Y(as_numpy=False)
......
......@@ -16,7 +16,7 @@ from mlair.configuration import path_config
from mlair.helpers.join import EmptyQueryResult
from mlair.run_modules.run_environment import RunEnvironment
DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolation_dim", "target_dim", "target_var"]
DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "time_dim", "target_dim", "target_var"]
DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length",
"station_type", "overwrite_local_data", "start", "end", "sampling", "transformation",
"extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"]
......@@ -203,7 +203,7 @@ class PreProcessing(RunEnvironment):
loading time are logged in debug mode.
:param args: Dictionary with required parameters for DataGenerator class (`data_path`, `network`, `stations`,
`variables`, `interpolation_dim`, `target_dim`, `target_var`).
`variables`, `time_dim`, `target_dim`, `target_var`).
:param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolation_method`,
`window_lead_time`).
:param all_stations: All stations to check.
......
......@@ -24,7 +24,7 @@ class DefaultWorkflow(Workflow):
target_var=None, target_dim=None,
window_lead_time=None,
dimensions=None,
interpolate_method=None, interpolate_dim=None, limit_nan_fill=None,
interpolate_method=None, time_dim=None, limit_nan_fill=None,
train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None,
use_all_stations_on_all_data_sets=None, fraction_of_train=None,
experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None,
......@@ -69,7 +69,7 @@ class DefaultWorkflowHPC(Workflow):
target_var=None, target_dim=None,
window_lead_time=None,
dimensions=None,
interpolate_method=None, interpolate_dim=None, limit_nan_fill=None,
interpolate_method=None, time_dim=None, limit_nan_fill=None,
train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None,
use_all_stations_on_all_data_sets=None, fraction_of_train=None,
experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None,
......
......@@ -79,7 +79,7 @@ class TestDataGenerator:
assert gen.stations == ['DEBW107']
assert gen.variables == ['o3', 'temp']
assert gen.station_type is None
assert gen.interpolation_dim == 'datetime'
assert gen.time_dim == 'datetime'
assert gen.target_dim == 'variables'
assert gen.target_var == 'o3'
assert gen.interpolation_method == "linear"
......
......@@ -64,7 +64,7 @@ class TestExperimentSetup:
assert data_store.get("window_lead_time", "general") == 3
# interpolation
assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']}
assert data_store.get("interpolation_dim", "general") == "datetime"
assert data_store.get("time_dim", "general") == "datetime"
assert data_store.get("interpolation_method", "general") == "linear"
assert data_store.get("limit_nan_fill", "general") == 1
# train parameters
......@@ -93,7 +93,7 @@ class TestExperimentSetup:
stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background",
variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4,
target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1",
interpolation_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01",
time_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01",
train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05",
test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False,
fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20)
......@@ -125,7 +125,7 @@ class TestExperimentSetup:
assert data_store.get("window_lead_time", "general") == 10
# interpolation
assert data_store.get("dimensions", "general") == "dim1"
assert data_store.get("interpolation_dim", "general") == "int_dim"
assert data_store.get("time_dim", "general") == "int_dim"
assert data_store.get("interpolation_method", "general") == "cubic"
assert data_store.get("limit_nan_fill", "general") == 5
# train parameters
......
......@@ -128,7 +128,7 @@ class TestTraining:
data_prep = DefaultDataPreparation.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'),
statistics_per_var=statistics_per_var, station_type="background",
network="AIRBASE", sampling="daily", target_dim="variables",
target_var="o3", interpolation_dim="datetime",
target_var="o3", time_dim="datetime",
window_history_size=window_history_size,
window_lead_time=window_lead_time, name_affix="train")
return DataCollection([data_prep])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment