BUG: NaN error during PostProcessing
There is an error regarding NaNs during PostProcessing when using AIRBASE data. Run the following to reproduce the error:
__author__ = "Lukas Leufen"
__date__ = '2019-11-14'
import argparse
from mlair.workflows import DefaultWorkflow
from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampling
def load_stations():
import json
try:
# filename = 'supplement/station_list_north_german_plain_rural_UBA.json'
filename = 'supplement/station_list_north_german_plain_rural_AIRBASE.json'
with open(filename, 'r') as jfile:
stations = json.load(jfile)
except FileNotFoundError:
stations = None
return stations
stats = {'o3': 'dma8eu', 'no': 'dma8eu', 'no2': 'dma8eu',
'relhum': 'average_values', 'u': 'average_values', 'v': 'average_values',
'cloudcover': 'average_values', 'pblheight': 'maximum',
'temp': 'maximum'}
data_origin = {'o3': '', 'no': '', 'no2': '',
'relhum': 'REA', 'u': 'REA', 'v': 'REA',
'cloudcover': 'REA', 'pblheight': 'REA',
'temp': 'REA'}
def main(parser_args):
workflow = DefaultWorkflow(stations=load_stations(), network="AIRBASE",
evaluate_bootstraps=False, plot_list=["PlotAvailability", "PlotStationMap"],
data_origin=data_origin, data_handler=DataHandlerMixedSampling,
interpolation_limit=1,
overwrite_local_data=True,
sampling="daily", sampling_inputs="hourly",
statistics_per_var=stats,
create_new_model=True, train_model=True, epochs=2,
window_history_size=48, **parser_args.__dict__)
workflow.run()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default=None,
help="set experiment date as string")
args = parser.parse_args()
main(args)
Error message:
Traceback (most recent call last):
File "run_T1B_experiments.py", line 49, in <module>
main(args)
File "run_T1B_experiments.py", line 41, in main
workflow.run()
File "/home/l.leufen/mlair/mlair/workflows/abstract_workflow.py", line 32, in run
stage(**self._registry_kwargs[pos])
File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 81, in __init__
self._run()
File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 101, in _run
self.skill_scores = self.calculate_skill_scores()
File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 563, in calculate_skill_scores
self.window_lead_time)
File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 239, in climatological_skill_scores
external_data=external_data).values.flatten())
File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 246, in _climatological_skill_score
return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs)
File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 341, in skill_score_mu_case_4
r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/scipy/stats/stats.py", line 3531, in pearsonr
normym = linalg.norm(ym)
File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/scipy/linalg/misc.py", line 142, in norm
a = np.asarray_chkfinite(a)
File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/numpy/lib/function_base.py", line 499, in asarray_chkfinite
"array must not contain infs or NaNs")
ValueError: array must not contain infs or NaNs