Commit f17fbfdb authored by lukas leufen's avatar lukas leufen

fixed remaining interpolate... misspellings, limit nan fill is now called...

fixed remaining interpolate... misspellings, limit nan fill is now called interpolation limit, minor fixes to pass exp setup tests
parent a9a2a681
Pipeline #43668 failed with stages
in 2 minutes and 13 seconds
......@@ -31,7 +31,7 @@ DEFAULT_WINDOW_LEAD_TIME = 3
DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]}
DEFAULT_TIME_DIM = "datetime"
DEFAULT_INTERPOLATION_METHOD = "linear"
DEFAULT_LIMIT_NAN_FILL = 1
DEFAULT_INTERPOLATION_LIMIT = 1
DEFAULT_TRAIN_START = "1997-01-01"
DEFAULT_TRAIN_END = "2007-12-31"
DEFAULT_TRAIN_MIN_LENGTH = 90
......
......@@ -95,7 +95,7 @@ class DefaultDataPreparation(AbstractDataPreparation):
extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None):
super().__init__()
self.id_class = id_class
self.interpolate_dim = "datetime"
self.interpolation_dim = "datetime"
self.min_length = min_length
self._X = None
self._Y = None
......@@ -105,7 +105,7 @@ class DefaultDataPreparation(AbstractDataPreparation):
self._save_file = os.path.join(data_path, f"data_preparation_{_name_affix}.pickle")
self._collection = self._create_collection()
self.harmonise_X()
self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolate_dim)
self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolation_dim)
self._store(fresh_store=True)
@classmethod
......@@ -190,7 +190,7 @@ class DefaultDataPreparation(AbstractDataPreparation):
def harmonise_X(self):
X_original, Y_original = self.get_X_original(), self.get_Y_original()
dim = self.interpolate_dim
dim = self.interpolation_dim
intersect = reduce(np.intersect1d, map(lambda x: x.coords[dim].values, X_original))
if len(intersect) < max(self.min_length, 1):
X, Y = None, None
......@@ -326,15 +326,15 @@ def create_data_prep():
sampling = 'daily'
target_dim = 'variables'
target_var = 'o3'
interpolate_dim = 'datetime'
interpolation_dim = 'datetime'
window_history_size = 7
window_lead_time = 3
central_station = StationPrep("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {},station_type, network, sampling, target_dim,
target_var, interpolate_dim, window_history_size, window_lead_time)
target_var, interpolation_dim, window_history_size, window_lead_time)
neighbor1 = StationPrep("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {},station_type, network, sampling, target_dim,
target_var, interpolate_dim, window_history_size, window_lead_time)
target_var, interpolation_dim, window_history_size, window_lead_time)
neighbor2 = StationPrep("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
target_var, interpolate_dim, window_history_size, window_lead_time)
target_var, interpolation_dim, window_history_size, window_lead_time)
data_prep = []
data_prep.append(DataPreparationNeighbors(central_station, path, neighbors=[neighbor1, neighbor2]))
......
......@@ -13,7 +13,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \
DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \
DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \
DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \
DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_INTERPOLATION_LIMIT, \
DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \
DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
......@@ -214,7 +214,7 @@ class ExperimentSetup(RunEnvironment):
dimensions=None,
time_dim=None,
interpolation_method=None,
limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None,
interpolation_limit=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None,
test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None,
experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily",
create_new_model = None, bootstrap_path=None, permute_data_on_training = None, transformation=None,
......@@ -280,8 +280,6 @@ class ExperimentSetup(RunEnvironment):
# setup for data
self._set_param("stations", stations, default=DEFAULT_STATIONS)
# self._set_param("network", network, default=DEFAULT_NETWORK)
# self._set_param("station_type", station_type, default=DEFAULT_STATION_TYPE)
self._set_param("statistics_per_var", statistics_per_var, default=DEFAULT_VAR_ALL_DICT)
self._set_param("variables", variables, default=list(self.data_store.get("statistics_per_var").keys()))
self._set_param("start", start, default=DEFAULT_START)
......@@ -303,7 +301,7 @@ class ExperimentSetup(RunEnvironment):
self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS)
self._set_param("time_dim", time_dim, default=DEFAULT_TIME_DIM)
self._set_param("interpolation_method", interpolation_method, default=DEFAULT_INTERPOLATION_METHOD)
self._set_param("interpolation_limit", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL)
self._set_param("interpolation_limit", interpolation_limit, default=DEFAULT_INTERPOLATION_LIMIT)
# train set parameters
self._set_param("start", train_start, default=DEFAULT_TRAIN_START, scope="train")
......@@ -352,7 +350,7 @@ class ExperimentSetup(RunEnvironment):
if len(kwargs) > 0:
for k, v in kwargs.items():
if len(self.data_store.search_name(k)) == 0:
self._set_param("k", v)
self._set_param(k, v)
else:
raise KeyError(f"Given argument {k} with value {v} cannot be set for this experiment due to a "
f"conflict with an existing entry with same naming: {k}={self.data_store.get(k)}")
......
......@@ -5,7 +5,7 @@ __date__ = '2019-11-25'
import logging
import os
from typing import Tuple, Dict, List
from typing import Tuple
import numpy as np
import pandas as pd
......@@ -16,11 +16,6 @@ from mlair.configuration import path_config
from mlair.helpers.join import EmptyQueryResult
from mlair.run_modules.run_environment import RunEnvironment
DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "time_dim", "target_dim", "target_var"]
DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length",
"station_type", "overwrite_local_data", "start", "end", "sampling", "transformation",
"extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"]
class PreProcessing(RunEnvironment):
"""
......
......@@ -14,7 +14,7 @@ def run(stations=None,
target_var=None, target_dim=None,
window_lead_time=None,
dimensions=None,
interpolate_method=None, interpolate_dim=None, limit_nan_fill=None,
interpolation_method=None, interpolation_dim=None, interpolation_limit=None,
train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None,
use_all_stations_on_all_data_sets=None, fraction_of_train=None,
experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None,
......
......@@ -22,7 +22,7 @@ class DefaultWorkflow(Workflow):
target_var=None, target_dim=None,
window_lead_time=None,
dimensions=None,
interpolate_method=None, time_dim=None, limit_nan_fill=None,
interpolation_method=None, time_dim=None, limit_nan_fill=None,
train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None,
use_all_stations_on_all_data_sets=None, fraction_of_train=None,
experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None,
......@@ -66,7 +66,7 @@ class DefaultWorkflowHPC(Workflow):
target_var=None, target_dim=None,
window_lead_time=None,
dimensions=None,
interpolate_method=None, time_dim=None, limit_nan_fill=None,
interpolation_method=None, time_dim=None, limit_nan_fill=None,
train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None,
use_all_stations_on_all_data_sets=None, fraction_of_train=None,
experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None,
......
......@@ -51,8 +51,6 @@ class TestExperimentSetup:
# setup for data
default_stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
assert data_store.get("stations", "general") == default_stations
assert data_store.get("network", "general") == "AIRBASE"
assert data_store.get("station_type", "general") == "background"
assert data_store.get("variables", "general") == list(default_statistics_per_var.keys())
assert data_store.get("statistics_per_var", "general") == default_statistics_per_var
assert data_store.get("start", "general") == "1997-01-01"
......@@ -66,7 +64,7 @@ class TestExperimentSetup:
assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']}
assert data_store.get("time_dim", "general") == "datetime"
assert data_store.get("interpolation_method", "general") == "linear"
assert data_store.get("limit_nan_fill", "general") == 1
assert data_store.get("interpolation_limit", "general") == 1
# train parameters
assert data_store.get("start", "general.train") == "1997-01-01"
assert data_store.get("end", "general.train") == "2007-12-31"
......@@ -93,7 +91,7 @@ class TestExperimentSetup:
stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background",
variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4,
target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1",
time_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01",
time_dim="int_dim", interpolation_method="cubic", interpolation_limit=5, train_start="2000-01-01",
train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05",
test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False,
fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20)
......@@ -127,7 +125,7 @@ class TestExperimentSetup:
assert data_store.get("dimensions", "general") == "dim1"
assert data_store.get("time_dim", "general") == "int_dim"
assert data_store.get("interpolation_method", "general") == "cubic"
assert data_store.get("limit_nan_fill", "general") == 5
assert data_store.get("interpolation_limit", "general") == 5
# train parameters
assert data_store.get("start", "general.train") == "2000-01-01"
assert data_store.get("end", "general.train") == "2000-01-02"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment