UKCP18 SPEED bias-corrected suitability distribution: histograms


Load packages, read data and source custom scripts

rm(list = ls())
library(tidyr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)

path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
path_uk_1km_suitability <- file.path(path_modelled, "uk-1km-suitability")

source(file.path(path_proj, "src", "63-tidy-names.R"))
source(file.path(path_proj, "src", "64-read-sce.R"))

speed_tag <- "bias_corrected_01"
prefix <- c("binom_maskout_simple", "binom_maskout_interact")
prefix_regex <- paste0("(", paste0(prefix, collapse = "|"), ")")
sce_type_regex <- paste0("(", paste("ukcp18-speed_rcp\\d+", speed_tag, sep = "_"),
                          "|chess)")
regex <- paste("^uk_1km_suitability", prefix_regex, sce_type_regex,
               "20yr-mean-annual", "(.+)\\.fst$", sep = "_")

Prepare dataset for histograms

suitability_files <- list.files(path_uk_1km_suitability, regex, full.names = TRUE)

data_plot <- tibble::tibble(file = basename(suitability_files)) %>%
    tidyr::extract(file, c("model", "sce-type", "period"), remove = FALSE, regex) %>%
    tidyr::extract(file, c("rcp"), remove = FALSE, "ukcp18-speed_(rcp\\d+)") %>%
    dplyr::mutate(period = sub("(\\d{4})\\d*-(\\d{4})\\d*", "\\1-\\2", period)) %>%
    within(period[period == "baseline"] <- "1991-2011 (predicted)") %>%
    dplyr::mutate(
        data = map2(file, model,
                    ~ read_sce1(file.path(path_uk_1km_suitability, .x), .y, TRUE, FALSE))
    )
rcps <- unique(data_plot$rcp[!is.na(data_plot$rcp)])


data_observed <- fst::read_fst(file.path(path_processed, "uk_1km_dataframe_train_full.fst")) %>%
    dplyr::mutate_at(vars(matches("^count_[0-9]")), ~ . / count_no0) %>%
    dplyr::select(id, matches("^count_[1-5]")) %>%
    dplyr::rename_with(~ sub("^count_", "", .))

model_unique <- unique(data_plot$model)
data_observed <- tibble::tibble(
    file = "uk_1km_dataframe_train_full.fst", rcp = NA, model = model_unique,
    `sce-type` = NA, period = "1991-2011 (observed)",
    data = rep(list(data_observed), length(model_unique))
)

data_plot <- bind_rows(data_observed, data_plot) %>%
    dplyr::mutate(model = gsub("_", " ", stringr::str_to_sentence(model))) %>%
    tidyr::unnest(data) %>%
    dplyr::rename_with(~ sub("^", "class_", .), matches("^[0-9]"))

land_classes <- grep("^class_", names(data_plot), value = TRUE)

Custom function to make histograms

hist_trend <- function (data, var, binwidth = 0.05) {
    gg <- ggplot(data) +
        geom_histogram(aes_string(x = var, y = paste0("..density..*", binwidth)),
                       binwidth = binwidth, boundary = TRUE,
                       fill = "lightblue", col = "black", size = rel(0.15)) +
        facet_grid(period ~ model) +
        labs(y = NULL, x = "Proportion") +
        scale_y_continuous(labels = scales::percent) +
        scale_x_continuous(limits = c(0, 1)) +
        theme_bw()
    print(gg)
}

Visualize suitability histograms across scenatios

for (land_class in land_classes) {
    cat("\n\n")
    cat(paste0("## ", tidy_make_classes(get_land_class(land_class), 2), "\n\n"))
    for (rcp in rcps) {
        cat("\n\n")
        cat(paste0("### ", toupper(rcp), "\n\n"))
        data_hist <- data_plot[data_plot$rcp == rcp | is.na(data_plot$rcp), ]
        hist_trend(data_hist, land_class)
    }
}

(1) Arable

RCP26

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP45

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP60

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP85

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

(2) Wetland

RCP26

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP45

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP60

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP85

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

(3) Improved grassland

RCP26

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

RCP45

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

RCP60

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

RCP85

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

(4) Forest

RCP26

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP45

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP60

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

RCP85

#> Warning: Removed 324612 rows containing non-finite values (stat_bin).

(5) Semi natural grassland

RCP26

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

RCP45

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

RCP60

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

RCP85

#> Warning: Removed 299898 rows containing non-finite values (stat_bin).

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>     user   system  elapsed 
#> 1463.775   76.073 1542.072