SPEED bias-corrected suitability distribution: histograms
Load packages, read data and source custom scripts
rm(list = ls())
library(tidyr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)
path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
source(file.path(path_proj, "src", "63-tidy-names.R"))
source(file.path(path_proj, "src", "64-read-sce.R"))
prefix <- c("binom_maskout_simple", "binom_maskout_interact")
prefix_regex <- paste0("(", paste0(prefix, collapse = "|"), ")")
Prepare dataset for histograms
suitability_files <- paste0("uk_1km_suitability_(", prefix_regex, ")_20yr-mean-annual") %>%
list.files(path_modelled, ., full.names = TRUE)
data_plot <- tibble::tibble(file = basename(suitability_files)) %>%
tidyr::extract(file, c("model", "period"), remove = FALSE,
"uk_1km_suitability_(.+)_20yr-mean-annual_?(.*)\\.fst") %>%
dplyr::mutate(period = sub("([0-9]{4}).*-([0-9]{4}).*", "\\1-\\2", period)) %>%
within(period[period == ""] <- "1991-2011 (predicted)") %>%
dplyr::mutate(data = map2(file, model, ~ read_sce1(file.path(path_modelled, .x), .y, TRUE, FALSE)))
data_observed <- fst::read_fst(file.path(path_processed, "uk_1km_dataframe_train_full.fst")) %>%
dplyr::mutate_at(vars(matches("^count_[0-9]")), ~ . / count_no0) %>%
dplyr::select(id, matches("^count_[1-5]")) %>%
dplyr::rename_with(~ sub("^count_", "", .))
data_observed <- tibble::tibble(
file = "uk_1km_dataframe_train_full.fst", model = prefix,
period = "1991-2011 (observed)", data = rep(list(data_observed), length(prefix))
)
data_plot <- bind_rows(data_observed, data_plot) %>%
dplyr::mutate(model = gsub("_", " ", stringr::str_to_sentence(model))) %>%
tidyr::unnest(data) %>%
dplyr::rename_with(~ sub("^", "class_", .), matches("^[0-9]"))
land_classes <- grep("^class_", names(data_plot), value = TRUE)
Make function and visualize histogram
# custom function
hist_trend <- function (data, var, binwidth = 0.05) {
gg <- ggplot(data) +
geom_histogram(aes_string(x = var, y = paste0("..density..*", binwidth)),
binwidth = binwidth, boundary = TRUE,
fill = "lightblue", col = "black", size = rel(0.15)) +
facet_grid(period ~ model) +
labs(y = NULL, x = "Proportion") +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(limits = c(0, 1)) +
theme_bw()
cat("\n\n")
cat(paste0("## ", tidy_make_classes(get_land_class(var), 2), "\n\n"))
print(gg)
}
# make histogram
walk(land_classes, ~ hist_trend(data_plot, .))
(1) Arable
#> Warning: Removed 324612 rows containing non-finite values (stat_bin).
(2) Wetland
#> Warning: Removed 324612 rows containing non-finite values (stat_bin).
(3) Improved grassland
#> Warning: Removed 299898 rows containing non-finite values (stat_bin).
(4) Forest
#> Warning: Removed 324612 rows containing non-finite values (stat_bin).
(5) Semi natural grassland
#> Warning: Removed 299898 rows containing non-finite values (stat_bin).
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 151.573 7.292 159.306