Commit 3b54d872 authored by lukas leufen's avatar lukas leufen

Experiment Setup finished

Closes #11, #12, and #22

See merge request toar/machinelearningtools!12
parents 7e9cd7eb ea26f48b
Pipeline #25981 passed with stages
in 1 minute and 43 seconds
......@@ -56,3 +56,4 @@ htmlcov/
.pytest_cache
/test/data/
report.html
/TestExperiment/
......@@ -9,3 +9,4 @@ pytest-lazy-fixture==0.6.1
pytest-cov
pytest-html
pydot
mock
__author__ = "Lukas Leufen"
__date__ = '2019-11-14'
import logging
from src.helpers import TimeTracking
from src import helpers
import argparse
import time
formatter = "%(asctime)s - %(levelname)s: %(message)s [%(filename)s:%(funcName)s:%(lineno)s]"
logging.basicConfig(level=logging.INFO, format=formatter)
class run(object):
"""
basic run class to measure execution time. Either call this class calling it by 'with' or delete the class instance
after finishing the measurement. The duration result is logged.
"""
def __init__(self):
self.time = TimeTracking()
logging.info(f"{self.__class__.__name__} started")
def __del__(self):
self.time.stop()
logging.info(f"{self.__class__.__name__} finished after {self.time}")
def __enter__(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
pass
def do_stuff(self):
time.sleep(2)
class ExperimentSetup:
"""
params:
trainable: Train new model if true, otherwise try to load existing model
"""
def __init__(self, **kwargs):
self.data_path = None
self.experiment_path = None
self.experiment_name = None
self.trainable = None
self.fraction_of_train = None
self.use_all_stations_on_all_data_sets = None
self.network = None
self.var_all_dict = None
self.all_stations = None
self.variables = None
self.dimensions = None
self.dim = None
self.target_dim = None
self.target_var = None
self.setup_experiment(**kwargs)
def _set_param(self, param, value, default=None):
if default is not None:
value = value.get(param, default)
setattr(self, param, value)
logging.info(f"set experiment attribute: {param}={value}")
def setup_experiment(self, **kwargs):
# set data path of this experiment
self._set_param("data_path", helpers.prepare_host())
# set experiment name
exp_date = args.experiment_date
exp_name, exp_path = helpers.set_experiment_name(experiment_date=exp_date)
self._set_param("experiment_name", exp_name)
self._set_param("experiment_path", exp_path)
helpers.check_path_and_create(self.experiment_path)
# set if model is trainable
self._set_param("trainable", kwargs, default=True)
# set fraction of train
self._set_param("fraction_of_train", kwargs, default=0.8)
# use all stations on all data sets (train, val, test)
self._set_param("use_all_stations_on_all_data_sets", kwargs, default=True)
self._set_param("network", kwargs, default="AIRBASE")
self._set_param("var_all_dict", kwargs, default={'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum',
'u': 'average_values', 'v': 'average_values', 'no': 'dma8eu',
'no2': 'dma8eu', 'cloudcover': 'average_values',
'pblheight': 'maximum'})
self._set_param("all_stations", kwargs, default=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087',
'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', 'DEBY020',
'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073',
'DEBY039', 'DEBW038', 'DEBW081', 'DEBY075', 'DEBW040',
'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042',
'DEBW039', 'DEBY001', 'DEBY113', 'DEBY089', 'DEBW024',
'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068',
'DEBW010', 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084',
'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063',
'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034',
'DEBY088', ])
self._set_param("variables", kwargs, default=list(self.var_all_dict.keys()))
self._set_param("dimensions", kwargs, default={'new_index': ['datetime', 'Stations']})
self._set_param("dim", kwargs, default='datetime')
self._set_param("target_dim", kwargs, default='variables')
self._set_param("target_var", kwargs, default="o3")
class PreProcessing(run):
def __init__(self, setup):
super().__init__()
self.setup = setup
class Training(run):
def __init__(self, setup):
super().__init__()
self.setup = setup
class PostProcessing(run):
def __init__(self, setup):
super().__init__()
self.setup = setup
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--experiment_date', metavar='--exp_date', type=str, nargs=1, default=None,
help="set experiment date as string")
args = parser.parse_args()
with run():
exp_setup = ExperimentSetup(trainable=True)
PreProcessing(exp_setup)
Training(exp_setup)
PostProcessing(exp_setup)
......@@ -9,6 +9,9 @@ import math
from typing import Union
import numpy as np
import os
import time
import socket
import sys
def to_list(arg):
......@@ -84,3 +87,85 @@ class LearningRateDecay(keras.callbacks.History):
self.lr['lr'].append(current_lr)
logging.info(f"Set learning rate to {current_lr}")
return K.get_value(self.model.optimizer.lr)
class TimeTracking(object):
"""
Track time to measure execution time. Time tracking automatically starts on initialisation and ends by calling stop
method. Duration can always be shown by printing the time tracking object or calling get_current_duration.
"""
def __init__(self, start=True):
self.start = None
self.end = None
if start:
self._start()
def _start(self):
self.start = time.time()
self.end = None
def _end(self):
self.end = time.time()
def _duration(self):
if self.end:
return self.end - self.start
else:
return time.time() - self.start
def __repr__(self):
return f"{round(self._duration(), 2)}s"
def run(self):
self._start()
def stop(self, get_duration=False):
if self.end is None:
self._end()
else:
msg = f"Time was already stopped {time.time() - self.end}s ago."
logging.error(msg)
raise AssertionError(msg)
if get_duration:
return self.duration()
def duration(self):
return self._duration()
def prepare_host():
hostname = socket.gethostname()
user = os.getlogin()
if hostname == 'ZAM144':
path = f'/home/{user}/Data/toar_daily/'
elif hostname == 'zam347':
path = f'/home/{user}/Data/toar_daily/'
elif hostname == 'linux-gzsx':
path = f'/home/{user}/machinelearningtools'
elif (len(hostname) > 2) and (hostname[:2] == 'jr'):
path = f'/p/project/cjjsc42/{user}/DATA/toar_daily/'
elif (len(hostname) > 2) and (hostname[:2] == 'jw'):
path = f'/p/home/jusers/{user}/juwels/intelliaq/DATA/toar_daily/'
else:
logging.error(f"unknown host '{hostname}'")
raise OSError(f"unknown host '{hostname}'")
if not os.path.exists(path):
logging.error(f"path '{path}' does not exist for host '{hostname}'.")
raise NotADirectoryError(f"path '{path}' does not exist for host '{hostname}'.")
else:
logging.info(f"set path to: {path}")
return path
def set_experiment_name(experiment_date=None, experiment_path=None):
if experiment_date is None:
experiment_name = "TestExperiment"
else:
experiment_name = f"{experiment_date}_network/"
if experiment_path is None:
experiment_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", experiment_name))
else:
experiment_path = os.path.abspath(experiment_path)
return experiment_name, experiment_path
import pytest
from src.helpers import to_list, check_path_and_create, l_p_loss, LearningRateDecay
from src.helpers import *
import logging
import os
import keras
import numpy as np
import mock
class TestToList:
......@@ -78,3 +79,111 @@ class TestLearningRateDecay:
model.compile(optimizer=keras.optimizers.Adam(), loss=l_p_loss(2))
model.fit(np.array([1, 0, 2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=5, callbacks=[lr_decay])
assert lr_decay.lr['lr'] == [0.02, 0.02, 0.02*0.95, 0.02*0.95, 0.02*0.95*0.95]
class TestTimeTracking:
def test_init(self):
t = TimeTracking()
assert t.start is not None
assert t.start < time.time()
assert t.end is None
t2 = TimeTracking(start=False)
assert t2.start is None
def test__start(self):
t = TimeTracking(start=False)
t._start()
assert t.start < time.time()
def test__end(self):
t = TimeTracking()
t._end()
assert t.end > t.start
def test__duration(self):
t = TimeTracking()
d1 = t._duration()
assert d1 > 0
d2 = t._duration()
assert d2 > d1
t._end()
d3 = t._duration()
assert d3 > d2
assert d3 == t._duration()
def test_repr(self):
t = TimeTracking()
t._end()
duration = t._duration()
assert t.__repr__().rstrip() == f"{round(duration, 2)}s".rstrip()
def test_run(self):
t = TimeTracking(start=False)
assert t.start is None
t.run()
assert t.start is not None
def test_stop(self):
t = TimeTracking()
assert t.end is None
duration = t.stop(get_duration=True)
assert duration == t._duration()
with pytest.raises(AssertionError) as e:
t.stop()
assert "Time was already stopped" in e.value.args[0]
t.run()
assert t.end is None
assert t.stop() is None
assert t.end is not None
def test_duration(self):
t = TimeTracking()
duration = t
assert duration is not None
duration = t.stop(get_duration=True)
assert duration == t.duration()
class TestPrepareHost:
@mock.patch("socket.gethostname", side_effect=["linux-gzsx", "ZAM144", "zam347", "jrtest", "jwtest"])
@mock.patch("os.getlogin", return_value="testUser")
@mock.patch("os.path.exists", return_value=True)
def test_prepare_host(self, mock_host, mock_user, mock_path):
path = prepare_host()
assert path == "/home/testUser/machinelearningtools"
path = prepare_host()
assert path == "/home/testUser/Data/toar_daily/"
path = prepare_host()
assert path == "/home/testUser/Data/toar_daily/"
path = prepare_host()
assert path == "/p/project/cjjsc42/testUser/DATA/toar_daily/"
path = prepare_host()
assert path == "/p/home/jusers/testUser/juwels/intelliaq/DATA/toar_daily/"
@mock.patch("socket.gethostname", return_value="NotExistingHostName")
@mock.patch("os.getlogin", return_value="zombie21")
def test_error_handling(self, mock_user, mock_host):
with pytest.raises(OSError) as e:
prepare_host()
assert "unknown host 'NotExistingHostName'" in e.value.args[0]
mock_host.return_value = "linux-gzsx"
with pytest.raises(NotADirectoryError) as e:
prepare_host()
assert "path '/home/zombie21/machinelearningtools' does not exist for host 'linux-gzsx'" in e.value.args[0]
class TestSetExperimentName:
def test_set_experiment(self):
exp_name, exp_path = set_experiment_name()
assert exp_name == ""
assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ""))
exp_name, exp_path = set_experiment_name(experiment_date="2019-11-14", experiment_path="./test2")
assert exp_name == "2019-11-14_network/"
assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test2"))
def test_set_experiment_from_sys(self):
exp_name, _ = set_experiment_name(experiment_date="2019-11-14")
assert exp_name == "2019-11-14_network/"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment