Binomial models: full data without interaction
Running binomial models for count data, for each land class.
Load packages, read data and source custom scripts
rm(list = ls())
library(mgcv)
#> Loading required package: nlme
#> This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
library(purrr)
library(tibble)
path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
land_cover <- file.path(path_processed, "uk_1km_dataframe_train_full.fst") %>%
fst::read_fst()
prefix <- "binom_full_simple"
output_name <- paste0(sub("binom", "model_binomials", prefix), ".rds")
run_model <- TRUE
Running binomial models
Defining formulas for 6 classes of land cover.
form <- ~ s(gdd) + s(min_tas) + s(max_tas) + s(smd) + s(sms)
form1 <- update(
form,
cbind(count_1_arable, count_total - count_1_arable) ~ . + s(elev) + s(slope_nb8)
)
form2 <- update(
form,
cbind(count_2_wetland, count_total - count_2_wetland) ~ . + s(elev) + s(slope_nb8)
)
form3 <- update(
form,
cbind(count_3_improved_grassland, count_total - count_3_improved_grassland) ~ .
)
form4 <- update(
form,
cbind(count_4_forest, count_total - count_4_forest) ~ . + s(elev) + s(slope_nb8)
)
form5 <- update(
form,
cbind(count_5_semi_natural_grassland, count_total - count_5_semi_natural_grassland) ~ .
)
# form6 <- update(
# form,
# cbind(count_6_urban, count_total - count_6_urban) ~ s(dist) + s(pop) + s(gdhi)
# )
Define model data frame.
form_names <- grep("form[[:digit:]]", ls(), value = TRUE)
make_model_name <- function(x, prefix) sub("count", prefix, all.vars(get(x))[1])
model_names <- sapply(form_names, make_model_name, prefix = prefix)
model_df <- tibble::tibble(
name = model_names,
form = map(form_names, get)
)
Run models.
if (run_model) {
ini <- Sys.time()
model_df$model <- map(model_df$form, ~ gam(., family = binomial(), data = land_cover))
print(Sys.time() - ini)
} else {
model_df <- readRDS(file.path(path_modelled, output_name))
}
#> Time difference of 1.903525 hours
Save model
Using compress = FALSE
for faster readRDS
and saveRDS.
model_df
#> # A tibble: 5 x 3
#> name form model
#> <chr> <list> <list>
#> 1 binom_full_simple_1_arable <formula> <gam>
#> 2 binom_full_simple_2_wetland <formula> <gam>
#> 3 binom_full_simple_3_improved_grassland <formula> <gam>
#> 4 binom_full_simple_4_forest <formula> <gam>
#> 5 binom_full_simple_5_semi_natural_grassland <formula> <gam>
saveRDS(model_df, file = file.path(path_modelled, output_name), compress = FALSE)
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 5602.881 778.108 6862.902