Commit 4e1252c2 authored by felix kleinert's avatar felix kleinert

Merge branch 'felix_issue211-inclue-new-intellio3-plots-in-mlair' into 'develop'

Resolve "Inclue new IntelliO3 plots in MLAir"

See merge request !194

closes #211
parents 0e8e07f6 1dba57c6
Pipeline #53931 passed with stages
in 9 minutes and 53 seconds
......@@ -48,7 +48,7 @@ DEFAULT_CREATE_NEW_BOOTSTRAPS = False
DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
"PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
"PlotAvailability", "PlotSeparationOfScales"]
"PlotAvailability", "PlotAvailabilityHistogram", "PlotSeparationOfScales"]
DEFAULT_SAMPLING = "daily"
DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA",
"temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
......
This diff is collapsed.
......@@ -19,8 +19,8 @@ from mlair.helpers import TimeTracking, statistics, extract_value
from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
from mlair.model_modules.model_class import AbstractModelClass
from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \
PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotAvailability, PlotConditionalQuantiles, \
PlotSeparationOfScales
PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotAvailability, PlotAvailabilityHistogram, \
PlotConditionalQuantiles, PlotSeparationOfScales
from mlair.run_modules.run_environment import RunEnvironment
......@@ -293,6 +293,10 @@ class PostProcessing(RunEnvironment):
if "PlotAvailability" in plot_list:
avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
PlotAvailability(avail_data, plot_folder=self.plot_path, time_dimension=time_dimension)
if "PlotAvailabilityHistogram" in plot_list:
avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
PlotAvailabilityHistogram(avail_data, plot_folder=self.plot_path, )# time_dimension=time_dimension)
def calculate_test_score(self):
"""Evaluate test score of model and save locally."""
......
......@@ -115,9 +115,47 @@ class PreProcessing(RunEnvironment):
precision = 4
path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
path_config.check_path_and_create(path)
set_names = ["train", "val", "test"]
df = pd.DataFrame(columns=meta_data + set_names)
for set_name in set_names:
names_of_set = ["train", "val", "test"]
df = self.create_info_df(meta_data, meta_round, names_of_set, precision)
column_format = self.create_column_format_for_tex(df)
self.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df)
self.save_to_md(path=path, filename="station_sample_size.md", df=df)
df_nometa = df.drop(meta_data, axis=1)
self.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, df=df_nometa)
self.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa)
# df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
# column_format=column_format)
df_descr = self.create_describe_df(df_nometa)
column_format = self.create_column_format_for_tex(df_descr)
self.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr)
self.save_to_md(path=path, filename="station_describe_short.md", df=df_descr)
# df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format)
@staticmethod
def create_describe_df(df, percentiles=None, ignore_last_lines: int = 2):
if percentiles is None:
percentiles = [.05, .1, .25, .5, .75, .9, .95]
df_descr = df.iloc[:-ignore_last_lines].astype('float32').describe(
percentiles=percentiles).astype('int32')
df_descr = pd.concat([df.loc[['# Samples']], df_descr]).T
df_descr.rename(columns={"# Samples": "no. samples", "count": "no. stations"}, inplace=True)
df_descr_colnames = list(df_descr.columns)
df_descr_colnames = [df_descr_colnames[1]] + [df_descr_colnames[0]] + df_descr_colnames[2:]
df_descr = df_descr[df_descr_colnames]
return df_descr
@staticmethod
def save_to_tex(path, filename, column_format, df, na_rep='---'):
df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format)
@staticmethod
def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"):
df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding),
tablefmt=tablefmt)
def create_info_df(self, meta_data, meta_round, names_of_set, precision):
df = pd.DataFrame(columns=meta_data + names_of_set)
for set_name in names_of_set:
data = self.data_store.get("data_collection", set_name)
for station in data:
station_name = str(station.id_class)
......@@ -130,15 +168,21 @@ class PreProcessing(RunEnvironment):
df.sort_index(inplace=True)
df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
df.index.name = 'stat. ID'
return df
@staticmethod
def create_column_format_for_tex(df: pd.DataFrame) -> str:
"""
Creates column format for latex table based on the shape of a given DataFrame.
Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
"""
column_format = np.repeat('c', df.shape[1] + 1)
column_format[0] = 'l'
column_format[-1] = 'r'
column_format = ''.join(column_format.tolist())
df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'),
tablefmt="github")
df.drop(meta_data, axis=1).to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
column_format=column_format)
return column_format
def split_train_val_test(self) -> None:
"""
......
......@@ -70,4 +70,5 @@ class TestAllDefaults:
assert DEFAULT_NUMBER_OF_BOOTSTRAPS == 20
assert DEFAULT_PLOT_LIST == ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore",
"PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore",
"PlotConditionalQuantiles", "PlotAvailability", "PlotSeparationOfScales"]
"PlotConditionalQuantiles", "PlotAvailability", "PlotAvailabilityHistogram",
"PlotSeparationOfScales"]
......@@ -9,6 +9,8 @@ from mlair.helpers import PyTestRegex
from mlair.run_modules.experiment_setup import ExperimentSetup
from mlair.run_modules.pre_processing import PreProcessing
from mlair.run_modules.run_environment import RunEnvironment
import pandas as pd
import numpy as np
class TestPreProcessing:
......@@ -117,3 +119,38 @@ class TestPreProcessing:
assert pre.transformation(data_preparation, stations) is None
class data_preparation_no_trans: pass
assert pre.transformation(data_preparation_no_trans, stations) is None
@pytest.fixture
def dummy_df(self):
data_dict = {'station_name': {'DEBW013': 'Stuttgart Bad Cannstatt', 'DEBW076': 'Baden-Baden',
'DEBW087': 'Schwäbische_Alb', 'DEBW107': 'Tübingen',
'DEBY081': 'Garmisch-Partenkirchen/Kreuzeckbahnstraße', '# Stations': np.nan,
'# Samples': np.nan},
'station_lon': {'DEBW013': 9.2297, 'DEBW076': 8.2202, 'DEBW087': 9.2076, 'DEBW107': 9.0512,
'DEBY081': 11.0631, '# Stations': np.nan, '# Samples': np.nan},
'station_lat': {'DEBW013': 48.8088, 'DEBW076': 48.7731, 'DEBW087': 48.3458, 'DEBW107': 48.5077,
'DEBY081': 47.4764, '# Stations': np.nan, '# Samples': np.nan},
'station_alt': {'DEBW013': 235.0, 'DEBW076': 148.0, 'DEBW087': 798.0, 'DEBW107': 325.0,
'DEBY081': 735.0, '# Stations': np.nan, '# Samples': np.nan},
'train': {'DEBW013': 1413, 'DEBW076': 3002, 'DEBW087': 3016, 'DEBW107': 1782, 'DEBY081': 2837,
'# Stations': 6, '# Samples': 12050},
'val': {'DEBW013': 698, 'DEBW076': 715, 'DEBW087': 700, 'DEBW107': 701, 'DEBY081': 456,
'# Stations': 6, '# Samples': 3270},
'test': {'DEBW013': 1066, 'DEBW076': 696, 'DEBW087': 1080, 'DEBW107': 1080, 'DEBY081': 700,
'# Stations': 6, '# Samples': 4622}}
df = pd.DataFrame.from_dict(data_dict)
return df
def test_create_column_format_for_tex(self):
df = pd.DataFrame(np.ones((2, 1)))
df_col = PreProcessing.create_column_format_for_tex(df) # len: 1+1
assert df_col == 'lr'
assert len(df_col) == 2
df = pd.DataFrame(np.ones((2, 2)))
df_col = PreProcessing.create_column_format_for_tex(df) # len: 2+1
assert df_col == 'lcr'
assert len(df_col) == 3
df = pd.DataFrame(np.ones((2, 3)))
df_col = PreProcessing.create_column_format_for_tex(df) # len: 3+1
assert df_col == 'lccr'
assert len(df_col) == 4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment