This is a quick demonstration of how to do point verication with harp using ensembles from IFSENS and an experimental AROME-Arctic ensemble.
The data for daily forecasts initiated at 00 UTC 8 - 31 March 2018 have already been interpolated to points at WMO stations and can be found under /lustre/storeB/project/nwp/alertness/wp4/harp/FCTABLE and the observations can be found under /lustre/storeB/project/nwp/alertness/wp4/harp/OBSTABLE.
To begin we attach the harp and tidyverse packages and set our data directory. If you do not have tidyverse, it is sufficient to just attach dplyr.
library(harp)
library(tidyverse)
data_dir <- "/lustre/storeB/project/nwp/alertness/wp4/harp/"
Now let’s read in the MEPS data for 2m temperature using read_point_forecast()
t2m_fcst
## ● AAEPS
## # A tibble: 105,672 x 18
## fcdate validdate leadtime SID parameter
## <dttm> <dttm> <int> <int> <chr>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1003 T2m
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1004 T2m
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1006 T2m
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1007 T2m
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1008 T2m
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1009 T2m
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1011 T2m
## # … with 105,662 more rows, and 13 more variables: reference_runs_mbr000 <dbl>,
## # reference_runs_mbr001 <dbl>, reference_runs_mbr002 <dbl>,
## # reference_runs_mbr003 <dbl>, reference_runs_mbr004 <dbl>,
## # reference_runs_mbr005 <dbl>, reference_runs_mbr006 <dbl>,
## # reference_runs_mbr007 <dbl>, reference_runs_mbr008 <dbl>,
## # reference_runs_mbr009 <dbl>, reference_runs_mbr010 <dbl>, fcst_cycle <chr>,
## # units <chr>
##
## ● IFSENS
## # A tibble: 99,144 x 57
## fcdate validdate leadtime SID ECEPS50_mbr000
## <dttm> <dttm> <int> <int> <dbl>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 271.
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 263.
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1003 260.
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1004 259.
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1006 257.
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1007 259.
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1008 257.
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1009 262.
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 267.
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1011 258.
## # … with 99,134 more rows, and 52 more variables: ECEPS50_mbr001 <dbl>,
## # ECEPS50_mbr002 <dbl>, ECEPS50_mbr003 <dbl>, ECEPS50_mbr004 <dbl>,
## # ECEPS50_mbr005 <dbl>, ECEPS50_mbr006 <dbl>, ECEPS50_mbr007 <dbl>,
## # ECEPS50_mbr008 <dbl>, ECEPS50_mbr009 <dbl>, ECEPS50_mbr010 <dbl>,
## # ECEPS50_mbr011 <dbl>, ECEPS50_mbr012 <dbl>, ECEPS50_mbr013 <dbl>,
## # ECEPS50_mbr014 <dbl>, ECEPS50_mbr015 <dbl>, ECEPS50_mbr016 <dbl>,
## # ECEPS50_mbr017 <dbl>, ECEPS50_mbr018 <dbl>, ECEPS50_mbr019 <dbl>,
## # ECEPS50_mbr020 <dbl>, ECEPS50_mbr021 <dbl>, ECEPS50_mbr022 <dbl>,
## # ECEPS50_mbr023 <dbl>, ECEPS50_mbr024 <dbl>, ECEPS50_mbr025 <dbl>,
## # ECEPS50_mbr026 <dbl>, ECEPS50_mbr027 <dbl>, ECEPS50_mbr028 <dbl>,
## # ECEPS50_mbr029 <dbl>, ECEPS50_mbr030 <dbl>, ECEPS50_mbr031 <dbl>,
## # ECEPS50_mbr032 <dbl>, ECEPS50_mbr033 <dbl>, ECEPS50_mbr034 <dbl>,
## # ECEPS50_mbr035 <dbl>, ECEPS50_mbr036 <dbl>, ECEPS50_mbr037 <dbl>,
## # ECEPS50_mbr038 <dbl>, ECEPS50_mbr039 <dbl>, ECEPS50_mbr040 <dbl>,
## # ECEPS50_mbr041 <dbl>, ECEPS50_mbr042 <dbl>, ECEPS50_mbr043 <dbl>,
## # ECEPS50_mbr044 <dbl>, ECEPS50_mbr045 <dbl>, ECEPS50_mbr046 <dbl>,
## # ECEPS50_mbr047 <dbl>, ECEPS50_mbr048 <dbl>, ECEPS50_mbr049 <dbl>,
## # ECEPS50_mbr050 <dbl>, fcst_cycle <chr>, units <chr>
Now read the 2m temperarure observations using read_point_obs()
t2m_obs
## # A tibble: 1,998,955 x 7
## validdate SID lon lat elev T2m units
## <dttm> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2018-03-08 00:00:00 1001 -8.67 70.9 9 272. K
## 2 2018-03-08 00:00:00 1002 16.2 80.1 7 257. K
## 3 2018-03-08 00:00:00 1003 15.5 77.0 11 258. K
## 4 2018-03-08 00:00:00 1006 22.8 78.3 13 257. K
## 5 2018-03-08 00:00:00 1007 11.9 78.9 8 259. K
## 6 2018-03-08 00:00:00 1008 15.5 78.2 27 257. K
## 7 2018-03-08 00:00:00 1009 25.0 80.7 6 262. K
## 8 2018-03-08 00:00:00 1010 16.1 69.3 13 266. K
## 9 2018-03-08 00:00:00 1015 17.8 69.6 14 264. K
## 10 2018-03-08 00:00:00 1016 28.9 78.9 13 256. K
## # … with 1,998,945 more rows
Maybe we want both forecast and observations in \(^\circ C\) instead of K. We can do this by using a scale function:
t2m_fcst <- scale_point_forecast(t2m_fcst, -273.15, new_units = "degC")
t2m_obs <- scale_point_obs(t2m_obs, T2m, -273.15, new_units = "degC")
t2m_fcst
## ● AAEPS
## # A tibble: 105,672 x 18
## fcdate validdate leadtime SID parameter fcst_cycle
## <dttm> <dttm> <int> <int> <chr> <chr>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m 00
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m 00
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1003 T2m 00
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1004 T2m 00
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1006 T2m 00
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1007 T2m 00
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1008 T2m 00
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1009 T2m 00
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m 00
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1011 T2m 00
## # … with 105,662 more rows, and 12 more variables: units <chr>,
## # reference_runs_mbr000 <dbl>, reference_runs_mbr001 <dbl>,
## # reference_runs_mbr002 <dbl>, reference_runs_mbr003 <dbl>,
## # reference_runs_mbr004 <dbl>, reference_runs_mbr005 <dbl>,
## # reference_runs_mbr006 <dbl>, reference_runs_mbr007 <dbl>,
## # reference_runs_mbr008 <dbl>, reference_runs_mbr009 <dbl>,
## # reference_runs_mbr010 <dbl>
##
## ● IFSENS
## # A tibble: 99,144 x 57
## fcdate validdate leadtime SID fcst_cycle units
## <dttm> <dttm> <int> <int> <chr> <chr>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 00 degC
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 00 degC
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1003 00 degC
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1004 00 degC
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1006 00 degC
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1007 00 degC
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1008 00 degC
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1009 00 degC
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 00 degC
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1011 00 degC
## # … with 99,134 more rows, and 51 more variables: ECEPS50_mbr000 <dbl>,
## # ECEPS50_mbr001 <dbl>, ECEPS50_mbr002 <dbl>, ECEPS50_mbr003 <dbl>,
## # ECEPS50_mbr004 <dbl>, ECEPS50_mbr005 <dbl>, ECEPS50_mbr006 <dbl>,
## # ECEPS50_mbr007 <dbl>, ECEPS50_mbr008 <dbl>, ECEPS50_mbr009 <dbl>,
## # ECEPS50_mbr010 <dbl>, ECEPS50_mbr011 <dbl>, ECEPS50_mbr012 <dbl>,
## # ECEPS50_mbr013 <dbl>, ECEPS50_mbr014 <dbl>, ECEPS50_mbr015 <dbl>,
## # ECEPS50_mbr016 <dbl>, ECEPS50_mbr017 <dbl>, ECEPS50_mbr018 <dbl>,
## # ECEPS50_mbr019 <dbl>, ECEPS50_mbr020 <dbl>, ECEPS50_mbr021 <dbl>,
## # ECEPS50_mbr022 <dbl>, ECEPS50_mbr023 <dbl>, ECEPS50_mbr024 <dbl>,
## # ECEPS50_mbr025 <dbl>, ECEPS50_mbr026 <dbl>, ECEPS50_mbr027 <dbl>,
## # ECEPS50_mbr028 <dbl>, ECEPS50_mbr029 <dbl>, ECEPS50_mbr030 <dbl>,
## # ECEPS50_mbr031 <dbl>, ECEPS50_mbr032 <dbl>, ECEPS50_mbr033 <dbl>,
## # ECEPS50_mbr034 <dbl>, ECEPS50_mbr035 <dbl>, ECEPS50_mbr036 <dbl>,
## # ECEPS50_mbr037 <dbl>, ECEPS50_mbr038 <dbl>, ECEPS50_mbr039 <dbl>,
## # ECEPS50_mbr040 <dbl>, ECEPS50_mbr041 <dbl>, ECEPS50_mbr042 <dbl>,
## # ECEPS50_mbr043 <dbl>, ECEPS50_mbr044 <dbl>, ECEPS50_mbr045 <dbl>,
## # ECEPS50_mbr046 <dbl>, ECEPS50_mbr047 <dbl>, ECEPS50_mbr048 <dbl>,
## # ECEPS50_mbr049 <dbl>, ECEPS50_mbr050 <dbl>
t2m_obs
## # A tibble: 1,998,955 x 7
## validdate SID lon lat units elev T2m
## <dttm> <int> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 2018-03-08 00:00:00 1001 -8.67 70.9 degC 9 -0.950
## 2 2018-03-08 00:00:00 1002 16.2 80.1 degC 7 -16.2
## 3 2018-03-08 00:00:00 1003 15.5 77.0 degC 11 -15.2
## 4 2018-03-08 00:00:00 1006 22.8 78.3 degC 13 -16.4
## 5 2018-03-08 00:00:00 1007 11.9 78.9 degC 8 -14.5
## 6 2018-03-08 00:00:00 1008 15.5 78.2 degC 27 -16.5
## 7 2018-03-08 00:00:00 1009 25.0 80.7 degC 6 -11.4
## 8 2018-03-08 00:00:00 1010 16.1 69.3 degC 13 -7.35
## 9 2018-03-08 00:00:00 1015 17.8 69.6 degC 14 -9.45
## 10 2018-03-08 00:00:00 1016 28.9 78.9 degC 13 -16.8
## # … with 1,998,945 more rows
Now we join the forecasts and observations so that each row has a forecast and an observation. Rows where there are no observations are discarded.
t2m_fcst <- join_to_fcst(t2m_fcst, t2m_obs)
t2m_fcst
## ● AAEPS
## # A tibble: 80,262 x 22
## fcdate validdate leadtime SID parameter fcst_cycle
## <dttm> <dttm> <int> <int> <chr> <chr>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m 00
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m 00
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1003 T2m 00
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1006 T2m 00
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1007 T2m 00
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1008 T2m 00
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1009 T2m 00
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m 00
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1015 T2m 00
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1016 T2m 00
## # … with 80,252 more rows, and 16 more variables: units <chr>,
## # reference_runs_mbr000 <dbl>, reference_runs_mbr001 <dbl>,
## # reference_runs_mbr002 <dbl>, reference_runs_mbr003 <dbl>,
## # reference_runs_mbr004 <dbl>, reference_runs_mbr005 <dbl>,
## # reference_runs_mbr006 <dbl>, reference_runs_mbr007 <dbl>,
## # reference_runs_mbr008 <dbl>, reference_runs_mbr009 <dbl>,
## # reference_runs_mbr010 <dbl>, lon <dbl>, lat <dbl>, elev <dbl>, T2m <dbl>
##
## ● IFSENS
## # A tibble: 73,914 x 61
## fcdate validdate leadtime SID fcst_cycle units
## <dttm> <dttm> <int> <int> <chr> <chr>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 00 degC
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 00 degC
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1003 00 degC
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1006 00 degC
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1007 00 degC
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1008 00 degC
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1009 00 degC
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 00 degC
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1015 00 degC
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1016 00 degC
## # … with 73,904 more rows, and 55 more variables: ECEPS50_mbr000 <dbl>,
## # ECEPS50_mbr001 <dbl>, ECEPS50_mbr002 <dbl>, ECEPS50_mbr003 <dbl>,
## # ECEPS50_mbr004 <dbl>, ECEPS50_mbr005 <dbl>, ECEPS50_mbr006 <dbl>,
## # ECEPS50_mbr007 <dbl>, ECEPS50_mbr008 <dbl>, ECEPS50_mbr009 <dbl>,
## # ECEPS50_mbr010 <dbl>, ECEPS50_mbr011 <dbl>, ECEPS50_mbr012 <dbl>,
## # ECEPS50_mbr013 <dbl>, ECEPS50_mbr014 <dbl>, ECEPS50_mbr015 <dbl>,
## # ECEPS50_mbr016 <dbl>, ECEPS50_mbr017 <dbl>, ECEPS50_mbr018 <dbl>,
## # ECEPS50_mbr019 <dbl>, ECEPS50_mbr020 <dbl>, ECEPS50_mbr021 <dbl>,
## # ECEPS50_mbr022 <dbl>, ECEPS50_mbr023 <dbl>, ECEPS50_mbr024 <dbl>,
## # ECEPS50_mbr025 <dbl>, ECEPS50_mbr026 <dbl>, ECEPS50_mbr027 <dbl>,
## # ECEPS50_mbr028 <dbl>, ECEPS50_mbr029 <dbl>, ECEPS50_mbr030 <dbl>,
## # ECEPS50_mbr031 <dbl>, ECEPS50_mbr032 <dbl>, ECEPS50_mbr033 <dbl>,
## # ECEPS50_mbr034 <dbl>, ECEPS50_mbr035 <dbl>, ECEPS50_mbr036 <dbl>,
## # ECEPS50_mbr037 <dbl>, ECEPS50_mbr038 <dbl>, ECEPS50_mbr039 <dbl>,
## # ECEPS50_mbr040 <dbl>, ECEPS50_mbr041 <dbl>, ECEPS50_mbr042 <dbl>,
## # ECEPS50_mbr043 <dbl>, ECEPS50_mbr044 <dbl>, ECEPS50_mbr045 <dbl>,
## # ECEPS50_mbr046 <dbl>, ECEPS50_mbr047 <dbl>, ECEPS50_mbr048 <dbl>,
## # ECEPS50_mbr049 <dbl>, ECEPS50_mbr050 <dbl>, lon <dbl>, lat <dbl>,
## # elev <dbl>, T2m <dbl>
We are now ready to verify, with ens_verify()
t2m_verif <- ens_verify(t2m_fcst, T2m)
t2m_verif
## $ens_summary_scores
## # A tibble: 34 x 12
## mname leadtime num_cases mean_bias stde rmse spread spread_skill_ratio
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAEPS 0 4604 1.11 3.18 3.36 1.51 0.450
## 2 AAEPS 3 4444 1.33 3.43 3.68 1.34 0.365
## 3 AAEPS 6 4892 1.08 3.32 3.49 1.32 0.379
## 4 AAEPS 9 4710 0.680 2.27 2.37 1.10 0.465
## 5 AAEPS 12 4883 0.489 1.74 1.80 0.978 0.542
## 6 AAEPS 15 4722 0.445 1.69 1.74 0.978 0.561
## 7 AAEPS 18 4898 0.494 2.55 2.60 1.16 0.446
## 8 AAEPS 21 4670 0.851 3.30 3.41 1.29 0.378
## 9 AAEPS 24 4604 1.24 3.78 3.98 1.37 0.344
## 10 AAEPS 27 4445 1.54 4.09 4.37 1.39 0.319
## # … with 24 more rows, and 4 more variables: rank_histogram <list>, crps <dbl>,
## # crps_potential <dbl>, crps_reliability <dbl>
##
## $ens_threshold_scores
## # A tibble: 0 x 0
##
## $det_summary_scores
## # A tibble: 1,054 x 9
## mname member leadtime sub_model num_cases bias rmse mae stde
## <chr> <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 AAEPS mbr000 0 reference_runs 4604 0.650 3.21 2.25 3.14
## 2 AAEPS mbr000 3 reference_runs 4444 0.877 3.46 2.40 3.35
## 3 AAEPS mbr000 6 reference_runs 4892 0.659 3.33 2.32 3.26
## 4 AAEPS mbr000 9 reference_runs 4710 0.198 2.32 1.59 2.31
## 5 AAEPS mbr000 12 reference_runs 4883 -0.00730 1.77 1.27 1.77
## 6 AAEPS mbr000 15 reference_runs 4722 -0.0814 1.76 1.28 1.76
## 7 AAEPS mbr000 18 reference_runs 4898 -0.0433 2.65 1.97 2.65
## 8 AAEPS mbr000 21 reference_runs 4670 0.297 3.42 2.49 3.41
## 9 AAEPS mbr000 24 reference_runs 4604 0.679 3.92 2.83 3.86
## 10 AAEPS mbr000 27 reference_runs 4445 0.990 4.27 3.04 4.15
## # … with 1,044 more rows
##
## attr(,"parameter")
## [1] "T2m"
## attr(,"start_date")
## [1] "2018030800"
## attr(,"end_date")
## [1] "2018033100"
## attr(,"num_stations")
## [1] 208
We can now plot scores using plot_point_verif()
plot_point_verif(t2m_verif, crps)
Here it is clear that a different number of cases have been used for AAEPS than IFSENS. This often happens when comparing forecast models and for a fair comparison the same cases should be used for both models. harp can fix this by using the common_cases() function:
t2m_verif <- ens_verify(common_cases(t2m_fcst), T2m)
plot_point_verif(t2m_verif, crps)
plot_point_verif(t2m_verif, spread_skill)
plot_point_verif(t2m_verif, rank_histogram)
Rank histograms for ensembles with a different number of members are lopsided - so we can fix that with the rank_is_relative switch, and can also normalize so that the ideal column height is 1.
plot_point_verif(
t2m_verif, normalized_rank_histogram, rank_is_relative = TRUE
)
To verify for categorical scores, just add some thresholds
t2m_verif <- ens_verify(common_cases(t2m_fcst), T2m, thresholds = seq(-10, 10, 5))
t2m_verif
## $ens_summary_scores
## # A tibble: 34 x 12
## mname leadtime num_cases mean_bias stde rmse spread spread_skill_ratio
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAEPS 0 4240 1.09 3.13 3.31 1.49 0.450
## 2 AAEPS 3 4084 1.32 3.38 3.63 1.32 0.362
## 3 AAEPS 6 4512 1.10 3.29 3.47 1.30 0.376
## 4 AAEPS 9 4333 0.710 2.25 2.36 1.09 0.460
## 5 AAEPS 12 4506 0.507 1.74 1.81 0.966 0.534
## 6 AAEPS 15 4343 0.464 1.69 1.75 0.963 0.549
## 7 AAEPS 18 4518 0.515 2.53 2.59 1.14 0.441
## 8 AAEPS 21 4290 0.857 3.25 3.37 1.27 0.377
## 9 AAEPS 24 4241 1.22 3.72 3.91 1.34 0.343
## 10 AAEPS 27 4086 1.53 4.03 4.31 1.36 0.316
## # … with 24 more rows, and 4 more variables: rank_histogram <list>, crps <dbl>,
## # crps_potential <dbl>, crps_reliability <dbl>
##
## $ens_threshold_scores
## # A tibble: 170 x 18
## mname leadtime threshold fair_brier_score sample_climatolo… bss_ref_climatol…
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 AAEPS 0 -10 0.0983 0.564 0.564
## 2 AAEPS 3 -10 0.0964 0.533 0.533
## 3 AAEPS 6 -10 0.102 0.554 0.554
## 4 AAEPS 9 -10 0.0827 0.762 0.762
## 5 AAEPS 12 -10 0.0462 0.869 0.869
## 6 AAEPS 15 -10 0.0459 0.862 0.862
## 7 AAEPS 18 -10 0.0931 0.728 0.728
## 8 AAEPS 21 -10 0.121 0.630 0.630
## 9 AAEPS 24 -10 0.130 0.576 0.576
## 10 AAEPS 27 -10 0.130 0.542 0.542
## # … with 160 more rows, and 12 more variables: num_cases_total <int>,
## # num_cases_observed <int>, num_cases_forecast <int>, brier_score <dbl>,
## # brier_skill_score <dbl>, brier_score_reliability <dbl>,
## # brier_score_resolution <dbl>, brier_score_uncertainty <dbl>,
## # reliability <list>, roc <list>, roc_area <dbl>, economic_value <list>
##
## $det_summary_scores
## # A tibble: 1,054 x 9
## mname member leadtime sub_model num_cases bias rmse mae stde
## <chr> <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 AAEPS mbr000 0 reference_runs 4240 0.648 3.16 2.22 3.09
## 2 AAEPS mbr000 3 reference_runs 4084 0.882 3.43 2.38 3.31
## 3 AAEPS mbr000 6 reference_runs 4512 0.682 3.30 2.30 3.23
## 4 AAEPS mbr000 9 reference_runs 4333 0.228 2.31 1.58 2.29
## 5 AAEPS mbr000 12 reference_runs 4506 0.00957 1.77 1.26 1.77
## 6 AAEPS mbr000 15 reference_runs 4343 -0.0625 1.75 1.27 1.75
## 7 AAEPS mbr000 18 reference_runs 4518 -0.0137 2.61 1.94 2.61
## 8 AAEPS mbr000 21 reference_runs 4290 0.315 3.34 2.44 3.33
## 9 AAEPS mbr000 24 reference_runs 4241 0.663 3.82 2.76 3.76
## 10 AAEPS mbr000 27 reference_runs 4086 0.982 4.17 2.98 4.06
## # … with 1,044 more rows
##
## attr(,"parameter")
## [1] "T2m"
## attr(,"start_date")
## [1] "2018030800"
## attr(,"end_date")
## [1] "2018033100"
## attr(,"num_stations")
## [1] 192
You can also save your verification data so you can explore plots on an app in your internet browser. Here we will save into tempdir() which is a temporary directory that will be erased when you close you R session. Usually you would save to somewhere more permanent…
save_point_verif(t2m_verif, tempdir())
## Saving point verification scores to:
## /tmp/RtmptzdezR/harpPointVerif.harp.T2m.harp.2018030800-2018033100.harp.AAEPS.model.IFSENS.rds
You’ll see that the file name is a little convoluted, but this helps the app get information from the file name.
You start the app with shiny_plot_point_verif(<dir>)
shiny_plot_point_verif(tempdir())
Typically model precipitation is accumulated from the start of the model run. This is still the case when we interpolate the data. However, when we read in the interpolated data we can accumulate to different periods using AccPcp*h, where you replace the * with the number of hours of accumulation time.
pcp_6h <- read_point_forecast(
2018030800,
2018033100,
c("AAEPS", "IFSENS"),
"EPS",
"AccPcp6h",
by = "1d",
file_path = file.path(data_dir, "FCTABLE"),
file_template = list(
AAEPS = aaeps_template,
IFSENS = ifsens_template
)
) %>%
common_cases() %>%
join_to_fcst(
read_point_obs(
first_validdate(.),
last_validdate(.),
"AccPcp6h",
obs_path = file.path(data_dir, "OBSTABLE")
)
)
pcp_6h_verif <- ens_verify(
pcp_6h, AccPcp6h, thresholds = c(0.125, 0.25, 0.5, 1, 2, 4, 8)
)
save_point_verif(pcp_6h_verif, tempdir())
## Saving point verification scores to:
## /tmp/RtmptzdezR/harpPointVerif.harp.AccPcp6h.harp.2018030800-2018033000.harp.AAEPS.model.IFSENS.rds
Do the same for 12h precipitation
read_point_forecast(
2018030800,
2018033100,
c("AAEPS", "IFSENS"),
"EPS",
"AccPcp12h",
by = "1d",
file_path = file.path(data_dir, "FCTABLE"),
file_template = list(
AAEPS = aaeps_template,
IFSENS = ifsens_template
)
) %>%
common_cases() %>%
join_to_fcst(
read_point_obs(
first_validdate(.),
last_validdate(.),
"AccPcp12h",
obs_path = file.path(data_dir, "OBSTABLE")
)
) %>%
ens_verify(
AccPcp12h, thresholds = c(0.125, 0.25, 0.5, 1, 2, 4, 8)
) %>%
save_point_verif(tempdir())
The verification can also be split into groups using the groupings argument. By default, the data are already by grouped by lead time, but we could for example group by location. harp comes with a data frame of stations grouped by region / geographic characteristic taken from the “monitor”. So we simply need to join this data frame to our forecast / observations data. We need to “force” join as join_to_fcst() is designed to join observations to forecasts, so the units are checked - here we don’t want to do this.
t2m_grouped <- join_to_fcst(
common_cases(t2m_fcst),
station_groups,
force_join = TRUE
)
t2m_grouped
## ● AAEPS
## # A tibble: 313,782 x 23
## fcdate validdate leadtime SID parameter fcst_cycle
## <dttm> <dttm> <int> <int> <chr> <chr>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m 00
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m 00
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m 00
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m 00
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m 00
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m 00
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m 00
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m 00
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m 00
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 T2m 00
## # … with 313,772 more rows, and 17 more variables: units <chr>,
## # reference_runs_mbr000 <dbl>, reference_runs_mbr001 <dbl>,
## # reference_runs_mbr002 <dbl>, reference_runs_mbr003 <dbl>,
## # reference_runs_mbr004 <dbl>, reference_runs_mbr005 <dbl>,
## # reference_runs_mbr006 <dbl>, reference_runs_mbr007 <dbl>,
## # reference_runs_mbr008 <dbl>, reference_runs_mbr009 <dbl>,
## # reference_runs_mbr010 <dbl>, lon <dbl>, lat <dbl>, elev <dbl>, T2m <dbl>,
## # group <chr>
##
## ● IFSENS
## # A tibble: 313,782 x 62
## fcdate validdate leadtime SID fcst_cycle units
## <dttm> <dttm> <int> <int> <chr> <chr>
## 1 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 00 degC
## 2 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 00 degC
## 3 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 00 degC
## 4 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 00 degC
## 5 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 00 degC
## 6 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 00 degC
## 7 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 00 degC
## 8 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 00 degC
## 9 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 00 degC
## 10 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1002 00 degC
## # … with 313,772 more rows, and 56 more variables: ECEPS50_mbr000 <dbl>,
## # ECEPS50_mbr001 <dbl>, ECEPS50_mbr002 <dbl>, ECEPS50_mbr003 <dbl>,
## # ECEPS50_mbr004 <dbl>, ECEPS50_mbr005 <dbl>, ECEPS50_mbr006 <dbl>,
## # ECEPS50_mbr007 <dbl>, ECEPS50_mbr008 <dbl>, ECEPS50_mbr009 <dbl>,
## # ECEPS50_mbr010 <dbl>, ECEPS50_mbr011 <dbl>, ECEPS50_mbr012 <dbl>,
## # ECEPS50_mbr013 <dbl>, ECEPS50_mbr014 <dbl>, ECEPS50_mbr015 <dbl>,
## # ECEPS50_mbr016 <dbl>, ECEPS50_mbr017 <dbl>, ECEPS50_mbr018 <dbl>,
## # ECEPS50_mbr019 <dbl>, ECEPS50_mbr020 <dbl>, ECEPS50_mbr021 <dbl>,
## # ECEPS50_mbr022 <dbl>, ECEPS50_mbr023 <dbl>, ECEPS50_mbr024 <dbl>,
## # ECEPS50_mbr025 <dbl>, ECEPS50_mbr026 <dbl>, ECEPS50_mbr027 <dbl>,
## # ECEPS50_mbr028 <dbl>, ECEPS50_mbr029 <dbl>, ECEPS50_mbr030 <dbl>,
## # ECEPS50_mbr031 <dbl>, ECEPS50_mbr032 <dbl>, ECEPS50_mbr033 <dbl>,
## # ECEPS50_mbr034 <dbl>, ECEPS50_mbr035 <dbl>, ECEPS50_mbr036 <dbl>,
## # ECEPS50_mbr037 <dbl>, ECEPS50_mbr038 <dbl>, ECEPS50_mbr039 <dbl>,
## # ECEPS50_mbr040 <dbl>, ECEPS50_mbr041 <dbl>, ECEPS50_mbr042 <dbl>,
## # ECEPS50_mbr043 <dbl>, ECEPS50_mbr044 <dbl>, ECEPS50_mbr045 <dbl>,
## # ECEPS50_mbr046 <dbl>, ECEPS50_mbr047 <dbl>, ECEPS50_mbr048 <dbl>,
## # ECEPS50_mbr049 <dbl>, ECEPS50_mbr050 <dbl>, lon <dbl>, lat <dbl>,
## # elev <dbl>, T2m <dbl>, group <chr>
As you can see, the group column has been added. Now we can verify with groupings = c("leadtime", "group")
t2m_grouped_verif <- ens_verify(
t2m_grouped, T2m, groupings = c("leadtime", "group"),
thresholds = seq(-10, 5, 5)
)
Now when we plot. something weird happens…
plot_point_verif(t2m_grouped_verif, crps)
This is because all of the groups are being plotted on top of each other, so we need to separate them out using facet_by and / or filter_by
plot_point_verif(t2m_grouped_verif, crps, facet_by = vars(group))
## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.
plot_point_verif(t2m_grouped_verif, crps, filter_by = vars(group == "Norway"))
plot_point_verif(
t2m_grouped_verif, crps, facet_by = vars(group),
filter_by = vars(group %in% c("Norway", "Sweden", "Finland"))
)
## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.
save_point_verif(t2m_grouped_verif, tempdir())
## Saving point verification scores to:
## /tmp/RtmptzdezR/harpPointVerif.harp.T2m.harp.2018030800-2018033100.harp.AAEPS.model.IFSENS.rds
Deterministic verification works in exactly the same way as the EPS verification - you follow the exact same workflow: READ -> READ -> JOIN -> VERIFY
fcst_t2m_det <- read_point_forecast(
2018030800,
2018033100,
c("arome_arctic", "MEPS_det"),
"det",
"T2m",
by = "1d",
file_path = file.path(data_dir, "FCTABLE")
) %>%
common_cases() %>%
scale_point_forecast(-273.15, new_units = "degC")
obs_t2m <- read_point_obs(
first_validdate(fcst_t2m_det),
last_validdate(fcst_t2m_det),
"T2m",
stations = pull_stations(fcst_t2m_det),
obs_path = file.path(data_dir, "OBSTABLE")
) %>%
scale_point_obs(T2m, -273.15, new_units = "degC")
fcst_t2m_det <- join_to_fcst(fcst_t2m_det, obs_t2m)
verif_t2m_det <- det_verify(
fcst_t2m_det,
T2m,
thresholds = seq(-15, 5, 2.5)
)
verif_t2m_det
## $det_summary_scores
## # A tibble: 34 x 7
## mname leadtime num_cases bias rmse mae stde
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 arome_arctic 0 4275 0.651 2.96 2.07 2.89
## 2 arome_arctic 3 4110 0.808 3.16 2.20 3.05
## 3 arome_arctic 6 4539 0.564 3.00 2.08 2.94
## 4 arome_arctic 9 4362 0.0486 2.04 1.42 2.04
## 5 arome_arctic 12 4526 -0.140 1.58 1.15 1.57
## 6 arome_arctic 15 4364 -0.222 1.56 1.15 1.55
## 7 arome_arctic 18 4544 -0.171 2.47 1.84 2.47
## 8 arome_arctic 21 4315 0.220 3.23 2.36 3.22
## 9 arome_arctic 24 4275 0.603 3.74 2.71 3.69
## 10 arome_arctic 27 4110 0.935 4.14 2.94 4.03
## # … with 24 more rows
##
## $det_threshold_scores
## # A tibble: 306 x 40
## mname leadtime threshold num_cases_for_th… num_cases_for_t… num_cases_for_t…
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 arome… 0 -15 3759 3445 3617
## 2 arome… 3 -15 3506 3172 3355
## 3 arome… 6 -15 3988 3645 3816
## 4 arome… 9 -15 4254 4156 4192
## 5 arome… 12 -15 4476 4456 4451
## 6 arome… 15 -15 4312 4297 4275
## 7 arome… 18 -15 4424 4226 4324
## 8 arome… 21 -15 4092 3745 3950
## 9 arome… 24 -15 3941 3488 3765
## 10 arome… 27 -15 3730 3193 3556
## # … with 296 more rows, and 34 more variables: cont_tab <list>,
## # threat_score <dbl>, hit_rate <dbl>, miss_rate <dbl>,
## # false_alarm_rate <dbl>, false_alarm_ratio <dbl>, heidke_skill_score <dbl>,
## # pierce_skill_score <dbl>, kuiper_skill_score <dbl>, percent_correct <dbl>,
## # frequency_bias <dbl>, equitable_threat_score <dbl>, odds_ratio <dbl>,
## # log_odds_ratio <dbl>, odds_ratio_skill_score <dbl>,
## # extreme_dependency_score <dbl>, symmetric_eds <dbl>,
## # extreme_dependency_index <dbl>, symmetric_edi <dbl>,
## # threat_score_std_error <dbl>, hit_rate_std_error <dbl>,
## # false_alarm_rate_std_error <dbl>, false_alarm_ratio_std_error <dbl>,
## # heidke_skill_score_std_error <dbl>, pierce_skill_score_std_error <dbl>,
## # percent_correct_std_error <dbl>, equitable_threat_score_std_error <dbl>,
## # log_odds_ratio_std_error <dbl>, log_odds_ratio_degrees_of_freedom <dbl>,
## # odds_ratio_skill_score_std_error <dbl>,
## # extreme_dependency_score_std_error <dbl>, symmetric_eds_std_error <dbl>,
## # extreme_dependency_index_std_error <dbl>, symmetric_edi_std_error <dbl>
##
## attr(,"parameter")
## [1] "T2m"
## attr(,"start_date")
## [1] "2018030800"
## attr(,"end_date")
## [1] "2018033100"
## attr(,"num_stations")
## [1] 193
save_point_verif(verif_t2m_det, tempdir())
Again, works exactly the same, except that you need to add vertical_coordinate = "pressure when running read_point_forecast() and read_point_obs(), and make sure to group the verification by “p”. Plotting is done with plot_profile_verif() and is not available yet in the app.
As upper air data were interpolated to all stations and not just upper air stations, we use the first day to find out which stations we need.
fcst_t_det <- read_point_forecast(
2018030800,
2018030800,
c("arome_arctic", "MEPS_det"),
"det",
"T",
by = "1d",
vertical_coordinate = "pressure",
file_path = file.path(data_dir, "FCTABLE")
) %>%
common_cases()
obs_t <- read_point_obs(
first_validdate(fcst_t_det),
last_validdate(fcst_t_det),
"T",
vertical_coordinate = "pressure",
stations = pull_stations(fcst_t_det),
obs_path = file.path(data_dir, "OBSTABLE")
)
stations <- pull_stations(join_to_fcst(fcst_t_det, obs_t))
fcst_t_det <- read_point_forecast(
2018030800,
2018033100,
c("arome_arctic", "MEPS_det"),
"det",
"T",
by = "1d",
stations = stations,
vertical_coordinate = "pressure",
file_path = file.path(data_dir, "FCTABLE")
) %>%
common_cases() %>%
scale_point_forecast(-273.15, new_units = "degC")
obs_t <- read_point_obs(
first_validdate(fcst_t_det),
last_validdate(fcst_t_det),
"T",
vertical_coordinate = "pressure",
stations = pull_stations(fcst_t_det),
obs_path = file.path(data_dir, "OBSTABLE")
) %>%
scale_point_obs(T, -273.15, new_units = "degC")
fcst_t_det <- join_to_fcst(fcst_t_det, obs_t)
verif_t_det <- det_verify(
fcst_t_det,
T,
groupings = c("leadtime", "p")
)
verif_t_det
## $det_summary_scores
## # A tibble: 312 x 8
## mname leadtime p num_cases bias rmse mae stde
## <chr> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 arome_arctic 0 50 56 -0.405 0.806 0.608 0.702
## 2 arome_arctic 0 100 89 -0.0651 0.622 0.487 0.622
## 3 arome_arctic 0 150 86 0.0651 0.640 0.479 0.641
## 4 arome_arctic 0 200 86 0.165 0.558 0.397 0.536
## 5 arome_arctic 0 250 86 0.0137 0.497 0.396 0.499
## 6 arome_arctic 0 300 86 -0.105 0.436 0.324 0.425
## 7 arome_arctic 0 400 86 -0.0697 0.317 0.228 0.311
## 8 arome_arctic 0 500 86 -0.0538 0.264 0.183 0.260
## 9 arome_arctic 0 700 87 -0.101 0.360 0.256 0.348
## 10 arome_arctic 0 850 86 -0.0108 0.252 0.184 0.253
## # … with 302 more rows
##
## $det_threshold_scores
## # A tibble: 0 x 0
##
## attr(,"parameter")
## [1] "T"
## attr(,"start_date")
## [1] "2018030800"
## attr(,"end_date")
## [1] "2018033100"
## attr(,"num_stations")
## [1] 4
plot_profile_verif(verif_t_det, bias, facet_by = vars(leadtime))
plot_profile_verif(verif_t_det, mae, facet_by = vars(leadtime))
plot_profile_verif(verif_t_det, stde, facet_by = vars(leadtime))