Commit c265313e authored by lukas leufen's avatar lukas leufen

updated inception model and data prep class

Closes #7, #2 and #1

/close #2

See merge request toar/machinelearningtools!6
parents ff90f62c 430cc664
Pipeline #25553 passed with stages
in 1 minute and 41 seconds
# .coveragerc to control coverage.py
[run]
branch = True
[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
# Don't complain about missing debug-only code:
def __repr__
if self\.debug
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:
# Don't complain about import statements
import
......@@ -40,10 +40,18 @@ Thumbs.db
.idea/
/venv/
# check plot folder #
#####################
# don't check data and plot folder #
####################################
/data/
/plots/
# tmp folder #
##############
/tmp/
# test related data #
#####################
.coverage
htmlcov/
.pytest_cache
/test/data/
stages:
- init
- test
- pages
### Static Badges ###
version:
stage: init
tags:
- leap
- zam347
- base
only:
- master
- tags
script:
- chmod +x ./CI/update_badge.sh
- chmod +x ./CI/create_version_badge.sh
- ./CI/create_version_badge.sh
artifacts:
name: pages
when: always
paths:
- badges/
### Tests ###
tests:
tags:
- leap
- zam347
- base
- django
stage: test
variables:
FAILURE_THRESHOLD: 90
before_script:
- chmod +x ./CI/update_badge.sh
- ./CI/update_badge.sh > /dev/null
script:
- pip install -r requirements.txt
- chmod +x ./CI/run_pytest.sh
- ./CI/run_pytest.sh
after_script:
- ./CI/update_badge.sh > /dev/null
artifacts:
name: pages
when: always
paths:
- badges/
coverage:
tags:
- leap
- zam347
- base
- django
stage: test
variables:
FAILURE_THRESHOLD: 50
COVERAGE_PASS_THRESHOLD: 80
before_script:
- chmod +x ./CI/update_badge.sh
- ./CI/update_badge.sh > /dev/null
script:
- pip install -r requirements.txt
- chmod +x ./CI/run_pytest_coverage.sh
- ./CI/run_pytest_coverage.sh
after_script:
- ./CI/update_badge.sh > /dev/null
artifacts:
name: pages
when: always
paths:
- badges/
- coverage/
#### Pages ####
pages:
stage: pages
tags:
- leap
- zam347
- base
script:
- mkdir -p public/badges/
- cp -af badges/badge_*.svg public/badges/
- ls public/badges/
- mkdir -p public/coverage
- cp -af coverage/. public/coverage
- ls public/coverage
- ls public
when: always
artifacts:
name: pages
when: always
paths:
- public
- badges/
- coverage/
cache:
key: old-pages
paths:
- public/badges/
- public/coverage/
#!/bin/bash
VERSION="$(git describe --tags $(git rev-list --tags --max-count=1))"
COLOR="blue"
BADGE_NAME="version"
ls .
./CI/update_badge.sh -b ${BADGE_NAME} -c ${COLOR} -s ${VERSION}
exit 0
#!/bin/bash
# run pytest for all modules
python3 -m pytest test/ | tee test_results.out
IS_FAILED=$?
# exit 0 if no tests implemented
RUN_NO_TESTS="$(grep -c 'no tests ran' test_results.out)"
if [[ ${RUN_NO_TESTS} > 0 ]]; then
echo "no test available"
echo "incomplete" > status.txt
echo "no tests avail" > incomplete.txt
exit 0
fi
# extract if tests passed or not
TEST_FAILED="$(grep -oP '(\d+\s{1}failed)' test_results.out)"
TEST_FAILED="$(echo ${TEST_FAILED} | (grep -oP '\d*'))"
TEST_PASSED="$(grep -oP '\d+\s{1}passed' test_results.out)"
TEST_PASSED="$(echo ${TEST_PASSED} | (grep -oP '\d*'))"
if [[ -z "$TEST_FAILED" ]]; then
TEST_FAILED=0
fi
let "TEST_PASSED=${TEST_PASSED}-${TEST_FAILED}"
# calculate metrics
let "SUM=${TEST_FAILED}+${TEST_PASSED}"
let "TEST_PASSED_RATIO=${TEST_PASSED}*100/${SUM}"
# report
if [[ ${IS_FAILED} == 0 ]]; then
if [[ ${TEST_PASSED_RATIO} -lt 100 ]]; then
echo "only ${TEST_PASSED_RATIO}% passed"
echo "incomplete" > status.txt
echo "${TEST_PASSED_RATIO}%25 passed" > incomplete.txt
if [[ ${TEST_PASSED_RATIO} -lt ${FAILURE_THRESHOLD} ]]; then
echo -e "\033[1;31monly ${TEST_PASSED_RATIO}% passed!!\033[0m"
exit 1
fi
else
echo "passed"
echo "success" > status.txt
fi
exit 0
else
echo "not passed"
exit 1
fi
#!/usr/bin/env bash
# run coverage twice, 1) for html deploy 2) for success evaluation
python3 -m pytest --cov=src --cov-report html test/
python3 -m pytest --cov=src --cov-report term test/ | tee coverage_results.out
IS_FAILED=$?
# move html coverage report
mkdir coverage/
BRANCH_NAME=$( echo -e "${CI_COMMIT_REF_NAME////_}")
mkdir coverage/${BRANCH_NAME}
mkdir coverage/recent
cp -r htmlcov/* coverage/${BRANCH_NAME}/.
cp -r htmlcov/* coverage/recent/.
if [[ "${CI_COMMIT_REF_NAME}" = "master" ]]; then
cp -r htmlcov/* coverage/.
fi
# extract coverage information
COVERAGE_RATIO="$(grep -oP '\d+\%' coverage_results.out | tail -1)"
COVERAGE_RATIO="$(echo ${COVERAGE_RATIO} | (grep -oP '\d*'))"
# report
if [[ ${IS_FAILED} == 0 ]]; then
if [[ ${COVERAGE_RATIO} -lt ${COVERAGE_PASS_THRESHOLD} ]]; then
echo "only ${COVERAGE_RATIO}% covered"
echo "incomplete" > status.txt
echo "${COVERAGE_RATIO}%25" > incomplete.txt
if [[ ${COVERAGE_RATIO} -lt ${FAILURE_THRESHOLD} ]]; then
echo -e "\033[1;31monly ${COVERAGE_RATIO}% covered!!\033[0m"
exit 1
fi
else
echo "passed"
echo "success" > status.txt
echo "${COVERAGE_RATIO}%25" > success.txt
fi
exit 0
else
echo "not passed"
exit 1
fi
#!/bin/bash
# 'running', 'success' or 'failure' is in this file
if [[ -e status.txt ]]; then
EXIT_STATUS=`cat status.txt`
else
EXIT_STATUS="running"
fi
printf "%s\n" ${EXIT_STATUS}
# fetch badge_status
BADGE_STATUS="${CI_COMMIT_REF_NAME}:${CI_JOB_NAME}"
# replace - with --
BADGE_STATUS=$( echo -e "${BADGE_STATUS//\-/--}")
# Set values for shields.io fields based on STATUS
if [[ ${EXIT_STATUS} = "running" ]]; then
BADGE_SUBJECT="running"
BADGE_COLOR="lightgrey"
elif [[ ${EXIT_STATUS} = "failure" ]]; then
BADGE_SUBJECT="failed"
BADGE_COLOR="red"
elif [[ ${EXIT_STATUS} = "success" ]]; then
BADGE_SUBJECT="passed"
BADGE_COLOR="brightgreen"
if [[ -e success.txt ]]; then
SUCCESS_MESSAGE=`cat success.txt`
BADGE_SUBJECT="${SUCCESS_MESSAGE}"
fi
elif [[ ${EXIT_STATUS} = "incomplete" ]]; then
EXIT_STATUS_MESSAGE=`cat incomplete.txt`
BADGE_SUBJECT="${EXIT_STATUS_MESSAGE}"
EXIT_STATUS_RATIO="$(echo ${EXIT_STATUS_MESSAGE} | (grep -oP '\d*') | head -1)"
printf "%s\n" ${EXIT_STATUS_RATIO}
if [[ "${EXIT_STATUS_RATIO}" -lt "${FAILURE_THRESHOLD}" ]]; then
BADGE_COLOR="red"
else
BADGE_COLOR="yellow"
fi
else
exit 1
fi
# load additional options
while getopts b:c:s: option
do
case ${option} in
b) BADGE_STATUS=$( echo -e "${OPTARG//\-/--}");;
c) BADGE_COLOR=$( echo -e "${OPTARG//\-/--}");;
s) BADGE_SUBJECT=$( echo -e "${OPTARG//\-/--}");;
esac
done
# Set filename for the badge (i.e. 'ci-test-branch-job.svg')
CI_COMMIT_REF_NAME_NO_SLASH="$( echo -e "${CI_COMMIT_REF_NAME}" | tr '/' '_' )"
if [[ ${BADGE_STATUS} = "version" ]]; then
BADGE_FILENAME="badge_version.svg"
else
BADGE_FILENAME="badge_${CI_COMMIT_REF_NAME_NO_SLASH}-${CI_JOB_NAME}.svg"
fi
RECENT_BADGE_FILENAME="badge_recent-${CI_JOB_NAME}.svg"
# Get the badge from shields.io
SHIELDS_IO_NAME=${BADGE_STATUS}-${BADGE_SUBJECT}-${BADGE_COLOR}.svg
printf "%s\n" "INFO: Fetching badge ${SHIELDS_IO_NAME} from shields.io to ${BADGE_FILENAME}."
printf "%s\n" "${SHIELDS_IO_NAME//\_/__}"
printf "%s\n" "${SHIELDS_IO_NAME//\#/%23}"
SHIELDS_IO_NAME="$( echo -e "${SHIELDS_IO_NAME//\_/__}" )"
SHIELDS_IO_NAME="$( echo -e "${SHIELDS_IO_NAME//\#/%23}")"
curl "https://img.shields.io/badge/${SHIELDS_IO_NAME}" > ${BADGE_FILENAME}
echo "https://img.shields.io/badge/${SHIELDS_IO_NAME}"
SHIELDS_IO_NAME_RECENT="RECENT:${SHIELDS_IO_NAME}"
curl "https://img.shields.io/badge/${SHIELDS_IO_NAME_RECENT}" > ${RECENT_BADGE_FILENAME}
echo "${SHIELDS_IO_NAME_RECENT}" > testRecentName.txt
#
if [[ ! -d ./badges ]]; then
# Control will enter here if $DIRECTORY doesn't exist.
mkdir badges/
fi
mv ${BADGE_FILENAME} ./badges/.
# replace outdated recent badge by new badge
mv ${RECENT_BADGE_FILENAME} ./badges/${RECENT_BADGE_FILENAME}
# set status to failed, this will be overwritten if job ended with exitcode 0
echo "failed" > status.txt
exit 0
# MachineLearningTools
This is a collection of all relevant functions used for ML stuff in the ESDE group
\ No newline at end of file
This is a collection of all relevant functions used for ML stuff in the ESDE group
## Inception Model
See a description [here](https://towardsdatascience.com/a-simple-guide-to-the-versions-of-the-inception-network-7fc52b863202)
or take a look on the papers [Going Deeper with Convolutions (Szegedy et al., 2014)](https://arxiv.org/abs/1409.4842)
and [Network In Network (Lin et al., 2014)](https://arxiv.org/abs/1312.4400).
\ No newline at end of file
This diff is collapsed.
__author__ = 'Lukas Leufen'
__date__ = '2019-10-21'
def to_list(arg):
if not isinstance(arg, list):
arg = [arg]
return arg
This diff is collapsed.
__author__ = 'Felix Kleinert, Lukas Leufen'
__date__ = '2019-10-16'
import requests
import json
import logging
import pandas as pd
import datetime as dt
from typing import Iterator, Union, List
join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/'
logging.basicConfig(level=logging.INFO)
def download_join(station_name: Union[str, List[str]], statvar: dict) -> [pd.DataFrame, pd.DataFrame]:
"""
read data from JOIN/TOAR
:param station_name: Station name e.g. DEBY122
:param statvar: key as variable like 'O3', values as statistics on keys like 'mean'
:returns:
- df - pandas df with all variables and statistics
- meta - pandas df with all meta information
"""
# make sure station_name parameter is a list
if not isinstance(station_name, list):
station_name = [station_name]
# load series information
opts = {'base': join_url_base, 'service': 'series', 'station_id': station_name[0]}
url = create_url(**opts)
response = requests.get(url)
station_vars = response.json()
vars_dict = {item[3].lower(): item[0] for item in station_vars}
# download all variables with given statistic
data = None
df = None
for var in _lower_list(sorted(vars_dict.keys())):
if var in statvar.keys():
logging.info('load: {}'.format(var))
# create data link
opts = {'base': join_url_base, 'service': 'stats', 'id': vars_dict[var], 'statistics': statvar[var],
'sampling': 'daily', 'capture': 0, 'min_data_length': 1460}
url = create_url(**opts)
# load data
response = requests.get(url)
data = response.json()
# correct namespace of statistics
stat = _correct_stat_name(statvar[var])
# store data in pandas dataframe
index = map(lambda s: dt.datetime.strptime(s, "%Y-%m-%d %H:%M"), data['datetime'])
if df is None:
df = pd.DataFrame(data[stat], index=index, columns=[var])
else:
df = pd.concat([df, pd.DataFrame(data[stat], index=index, columns=[var])], axis=1)
logging.debug('finished: {}'.format(var))
if data:
meta = pd.DataFrame.from_dict(data['metadata'], orient='index')
meta.columns = station_name
return df, meta
else:
raise ValueError("No data found in JOIN.")
def _correct_stat_name(stat: str) -> str:
"""
Map given statistic name to new namespace defined by mapping dict. Return given name stat if not element of mapping
namespace.
:param stat: namespace from JOIN server
:return: stat mapped to local namespace
"""
mapping = {'average_values': 'mean', 'maximum': 'max', 'minimum': 'min'}
return mapping.get(stat, stat)
def _lower_list(args: List[str]) -> Iterator[str]:
"""
lower all elements of given list
:param args: list with string entries to lower
:return: iterator that lowers all list entries
"""
for string in args:
yield string.lower()
def create_url(base: str, service: str, **kwargs: Union[str, int, float]) -> str:
"""
create a request url with given base url, service type and arbitrarily many additional keyword arguments
:param base: basic url of the rest service
:param service: service type, e.g. series, stats
:param kwargs: keyword pairs for optional request specifications, e.g. 'statistics=maximum'
:return: combined url as string
"""
url = '{}{}/?'.format(base, service) + '&'.join('{}={}'.format(k, v) for k, v in kwargs.items())
return url
if __name__ == "__main__":
var_all_dic = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values',
'pblheight': 'maximum'}
station = 'DEBW107'
download_join(station, var_all_dic)
__author__ = 'Lukas Leufen'
__date__ = '2019-10-23'
import xarray as xr
import pandas as pd
from typing import Union, Tuple
Data = Union[xr.DataArray, pd.DataFrame]
def standardise(data: Data, dim: Union[str, int]) -> Tuple[Data, Data, Data]:
"""
This function standardises a xarray.dataarray (along dim) or pandas.DataFrame (along axis) with mean=0 and std=1
:param data:
:param string/int dim:
| for xarray.DataArray as string: name of dimension which should be standardised
| for pandas.DataFrame as int: axis of dimension which should be standardised
:return: xarray.DataArrays or pandas.DataFrames:
#. mean: Mean of data
#. std: Standard deviation of data
#. data: Standardised data
"""
return data.mean(dim), data.std(dim), (data - data.mean(dim)) / data.std(dim)
def standardise_inverse(data: Data, mean: Data, std: Data) -> Data:
"""
This is the inverse function of `standardise` and therefore vanishes the standardising.
:param data:
:param mean:
:param std:
:return:
"""
return data * std + mean
def centre(data: Data, dim: Union[str, int]) -> Tuple[Data, None, Data]:
"""
This function centres a xarray.dataarray (along dim) or pandas.DataFrame (along axis) to mean=0
:param data:
:param string/int dim:
| for xarray.DataArray as string: name of dimension which should be standardised
| for pandas.DataFrame as int: axis of dimension which should be standardised
:return: xarray.DataArrays or pandas.DataFrames:
#. mean: Mean of data
#. std: Standard deviation of data
#. data: Standardised data
"""
return data.mean(dim), None, data - data.mean(dim)
def centre_inverse(data: Data, mean: Data) -> Data:
"""
This function is the inverse function of `centre` and therefore adds the given values of mean to the data.
:param data:
:param mean:
:return:
"""
return data + mean
This diff is collapsed.
import pytest
from src.inception_model import InceptionModelBase
import keras
import tensorflow as tf
class TestInceptionModelBase:
@pytest.fixture
def base(self):
return InceptionModelBase()
@pytest.fixture
def input_x(self):
return keras.Input(shape=(32, 32, 3))
@staticmethod
def step_in(element, depth=1):
for _ in range(depth):
element = element.input._keras_history[0]
return element
def test_init(self, base):
assert base.number_of_blocks == 0
assert base.part_of_block == 0
assert base.ord_base == 96
assert base.act_number == 0
def test_block_part_name(self, base):
assert base.block_part_name() == chr(96)
base.part_of_block += 1
assert base.block_part_name() == 'a'
def test_create_conv_tower_3x3(self, base, input_x):
opts = {'input_x': input_x, 'reduction_filter': 64, 'tower_filter': 32, 'tower_kernel': (3, 3)}
tower = base.create_conv_tower(**opts)
# check last element of tower (activation)
assert base.part_of_block == 1
assert tower.name == 'Block_0a_act_2/Relu:0'
act_layer = tower._keras_history[0]
assert isinstance(act_layer, keras.layers.advanced_activations.ReLU)
assert act_layer.name == "Block_0a_act_2"
# check previous element of tower (conv2D)
conv_layer = self.step_in(act_layer)
assert isinstance(conv_layer, keras.layers.Conv2D)
assert conv_layer.filters == 32
assert conv_layer.padding == 'same'
assert conv_layer.kernel_size == (3, 3)
assert conv_layer.strides == (1, 1)
assert conv_layer.name == "Block_0a_3x3"
# check previous element of tower (activation)
act_layer2 = self.step_in(conv_layer)
assert isinstance(act_layer2, keras.layers.advanced_activations.ReLU)
assert act_layer2.name == "Block_0a_act_1"
# check previous element of tower (conv2D)
conv_layer2 = self.step_in(act_layer2)
assert isinstance(conv_layer2, keras.layers.Conv2D)
assert conv_layer2.filters == 64
assert conv_layer2.kernel_size == (1, 1)
assert conv_layer2.padding == 'same'
assert conv_layer2.name == 'Block_0a_1x1'
assert conv_layer2.input._keras_shape == (None, 32, 32, 3)
def test_create_conv_tower_3x3_activation(self, base, input_x):
opts = {'input_x': input_x, 'reduction_filter': 64, 'tower_filter': 32, 'tower_kernel': (3, 3)}
# create tower with standard activation function
tower = base.create_conv_tower(activation='tanh', **opts)
assert tower.name == 'Block_0a_act_2_tanh/Tanh:0'
act_layer = tower._keras_history[0]
assert isinstance(act_layer, keras.layers.core.Activation)
assert act_layer.name == "Block_0a_act_2_tanh"
# create tower with activation function class
tower = base.create_conv_tower(activation=keras.layers.LeakyReLU, **opts)
assert tower.name == 'Block_0b_act_2/LeakyRelu:0'
act_layer = tower._keras_history[0]
assert isinstance(act_layer, keras.layers.advanced_activations.LeakyReLU)
assert act_layer.name == "Block_0b_act_2"
def test_create_conv_tower_1x1(self, base, input_x):
opts = {'input_x': input_x, 'reduction_filter': 64, 'tower_filter': 32, 'tower_kernel': (1, 1)}
tower = base.create_conv_tower(**opts)