Commit dda93fe3 authored by lukas leufen's avatar lukas leufen

added table reporting to latex and markdown

parent 26d51f3d
Pipeline #33908 failed with stages
in 4 minutes and 52 seconds
......@@ -53,6 +53,7 @@ seaborn==0.10.0
--no-binary shapely Shapely==1.7.0
six==1.11.0
statsmodels==0.11.1
tabulate
tensorboard==1.13.1
tensorflow-estimator==1.13.0
tensorflow==1.13.1
......
......@@ -53,6 +53,7 @@ seaborn==0.10.0
--no-binary shapely Shapely==1.7.0
six==1.11.0
statsmodels==0.11.1
tabulate
tensorboard==1.13.1
tensorflow-estimator==1.13.0
tensorflow-gpu==1.13.1
......
......@@ -3,10 +3,14 @@ __date__ = '2019-11-25'
import logging
import os
from typing import Tuple, Dict, List
import numpy as np
import pandas as pd
from src.data_handling.data_generator import DataGenerator
from src.helpers import TimeTracking
from src.helpers import TimeTracking, check_path_and_create
from src.join import EmptyQueryResult
from src.run_modules.run_environment import RunEnvironment
......@@ -54,6 +58,52 @@ class PreProcessing(RunEnvironment):
logging.debug(f"Number of test stations: {n_test}")
logging.debug(f"TEST SHAPE OF GENERATOR CALL: {self.data_store.get('generator', 'test')[0][0].shape}"
f"{self.data_store.get('generator', 'test')[0][1].shape}")
self.create_latex_report()
def create_latex_report(self):
"""
This function creates a latex table containing the Station IDs as index, and number of valid data points per
station per subset as well as used_meta_data:
could look like this
\begin{tabular}{llrrrlll}
\toprule
{} & station\_name & station\_lon & station\_lat & station\_alt & train & val & test \\
\midrule
DENW094 & Aachen-Burtscheid & 6.0939 & 50.7547 & 205.0 & 1875 & 584 & 1032 \\
DEBW029 & Aalen & 10.0963 & 48.8479 & 424.0 & 2958 & 715 & 1080 \\
DENI052 & Allertal & 9.6230 & 52.8294 & 38.0 & 2790 & 497 & 1080 \\
"""
meta_data = ['station_name', 'station_lon', 'station_lat', 'station_alt']
meta_round = ["station_lon", "station_lat", "station_alt"]
precision = 4
path = self.data_store.get("experiment_path")
path = os.path.join(path, "latex_report")
check_path_and_create(path)
table_name = "test.tex"
data_train: DataGenerator = self.data_store.get('generator', 'train')
data_val: DataGenerator = self.data_store.get('generator', 'val')
data_test: DataGenerator = self.data_store.get('generator', 'test')
df = pd.DataFrame(columns=meta_data+["train", "val", "test"])
for k, data in zip(["train", "val", "test"], [data_train, data_val, data_test]):
stations = data.stations
for station in stations:
df.loc[station, k] = data.get_data_generator(station).get_transposed_label().shape[0]
if df.loc[station, meta_data].isnull().any():
df.loc[station, meta_data] = data.get_data_generator(station).meta.loc[meta_data].values.flatten()
df.loc["# Samples", k] = df.loc[:, k].sum()
df.loc["# Stations", k] = df.loc[:, k].count()
df[meta_round] = df[meta_round].astype(float).round(precision)
df.sort_index(inplace=True)
df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
df.index.name = 'stat. ID'
column_format = np.repeat('c', df.shape[1]+1)
column_format[0] = 'l'
column_format[-1] = 'r'
column_format = ''.join(column_format.tolist())
df.to_latex(os.path.join(path, "test.tex"), na_rep='---', column_format=column_format)
df.to_markdown(open(os.path.join(path, "test.md"), "w"), tablefmt="github")
df.drop(meta_data, axis=1).to_latex(os.path.join(path, "test_short.tex"), na_rep='---', column_format=column_format)
def split_train_val_test(self) -> None:
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment