Visualize marginal effects for binomial models


Exploring association between covariates used for land suitability modelling.

Load packages, read data and source custom scripts

rm(list = ls())
library(tidyr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)

path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
path_uk_1km_suitability <- file.path(path_modelled, "uk-1km-suitability")

source(file.path(path_proj, "src", "42-vis-assoc.R"))

land_cover <- fst::read_fst(file.path(path_processed, "uk_1km_dataframe_train_full.fst"))

Read and join datasets

prefixes <- c("binom_full_simple", "binom_full_interact",
              "binom_maskout_simple", "binom_maskout_interact")
files <- paste("uk_1km_suitability", prefixes, "chess", "20yr-mean-annual_baseline.fst",
               sep = "_") %>%
    file.path(path_uk_1km_suitability, .)
data_type <- stringr::str_to_sentence(gsub("_", " ", prefixes))

land_cover <- map2(files, data_type,
                   ~ mutate(fst::read_fst(.x), data_type = .y)) %>%
    map(~ setNames(., sub(paste0(prefixes, "_", collapse = "|"), "pred", names(.)))) %>%
    map(~ dplyr::left_join(land_cover, .)) %>%
    dplyr::bind_rows()
#> Joining, by = "id"
#> Joining, by = "id"
#> Joining, by = "id"
#> Joining, by = "id"
land_cover <- na.omit(land_cover)

Association with one-dimensional variables

covs <- c("gdd", "max_tas", "max_tasmax", "maxmonth_tas", "min_tas", "min_tasmin",
          "smd", "sms", "elev", "slope_nb8")
responses <- grep("^pred[1-5]", names(land_cover), value = TRUE)
classes <- stringr::str_to_sentence(gsub("_", " ", sub("^pred[0-9]_", "" ,responses)))
map2(responses, classes, ~ assoc_1d(.x, covs, land_cover, .y))
#> [[1]]

#> 
#> [[2]]

#> 
#> [[3]]

#> 
#> [[4]]

#> 
#> [[5]]

Maskout simple: association with two-dimensional variables

covs_2d <- c(tas_x = "min_tas", tas_y = "max_tas", sm_x = "smd", sm_y = "sms")
map2(responses, classes, ~ assoc_2d(.x, covs_2d, land_cover, .y))
#> [[1]]

#> 
#> [[2]]

#> 
#> [[3]]

#> 
#> [[4]]

#> 
#> [[5]]

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#> 693.749  60.781 755.683