SPIFA Urban Ipixuna: summarise results
Visualise and summarise the resulst of CIFA models.
Load required libraries and data
rm(list = ls())
library(day2day)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS: WGS 84
samples1 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-1gp.rds"))
samples2 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-2gp.rds"))
samples3 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-3gp.rds"))
iter <- nrow(samples1[["theta"]])
thin <- 10
MCMC convergence
Difficulty parameters
list(samples1, samples2, samples3) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "c"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model, ncol = 1)
Discrimination parameters
list(samples1, samples2, samples3) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "a"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model, ncol = 1) +
theme(legend.position = "none")
Latent abilities
list(samples1, samples2, samples3) |>
map(~ gather.spifa(select(as_tibble(.x, 0, 1, select = "theta"), 1:10))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model, ncol = 1)
Correlation parameters
list(samples1, samples2, samples3) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "corr"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model, ncol = 1)
Gaussian processes standard deviations
list(samples1, samples2, samples3) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_sd"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model, ncol = 1)
Gaussian processes scale parameters
list(samples1, samples2, samples3) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_phi"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model, ncol = 1)
Credible intervals
Discrimination parameters
list(samples1, samples2, samples3) |>
map(~ summary(as_tibble(.x, iter/2, select = "a"))) |>
bind_rows(.id = "Model") |>
gg_errorbarh(sorted = FALSE) +
facet_wrap(~ Model)
Difficulty parameters
list(samples1, samples2, samples3) |>
map(~ summary(as_tibble(.x, iter/2, select = "c"))) |>
bind_rows(.id = "Model") |>
gg_errorbarh(sorted = FALSE) +
facet_wrap(~ Model)
Analysis of latent abilities
Compute summaries for the latent abilities.
theta_summary <- list(samples1, samples2, samples3) |>
map(~ summary(as_tibble(.x, iter/2, select = "theta"))) |>
bind_rows(.id = "Model") |>
tidyr::extract(Parameters, c("subject", "factor"),
"\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median <- theta_summary |>
dplyr::select(Model, subject, factor, `50%`) |>
pivot_wider(names_from = c(Model, factor), values_from = `50%`, names_prefix = "M") |>
dplyr::select(-subject)
factorsdata <- fidata |>
bind_cols(theta_median) |>
dplyr::select(registro, any_children, matches("^M[1-3]")) |>
tidyr::pivot_longer(matches("^M[1-3]"), names_to = c("Model", "factor_label"),
names_pattern = "M([1-3]+)_([1-3]+)$", values_to = "factor_value") |>
dplyr::mutate(Model = paste0("M", Model)) |>
dplyr::mutate(factor_label = paste0("F", factor_label))
Empirical distribution of latent factors medians
factorsdata |>
ggplot(aes(x = factor_value, y = after_stat(density), fill)) +
geom_histogram(binwidth = 0.2) +
facet_grid(Model ~ factor_label) +
labs(x = "latent factor")
factorsdata |>
ggplot(aes(x = factor_value, y = after_stat(density))) +
geom_histogram(aes(fill = any_children), position = "dodge", bins = 10) +
facet_grid(Model ~ factor_label, scales = "free") +
theme(legend.position = "bottom") +
labs(x = "latent factor", fill = "")
Spatial Distribution of each factor
factorsdata |>
st_jitter(factor = 0.02) |>
ggplot() +
geom_sf(aes(col = factor_value), size = 1) +
facet_grid(Model ~ factor_label) +
scale_color_distiller(palette = "RdBu", direction = -1) +
theme_bw(base_size = 10) +
theme(panel.background = element_rect(fill = 'gray10'),
panel.grid.major = element_line(color = 'gray30'),
panel.grid.minor = element_line(color = 'green', linewidth = 2),
legend.position = "bottom",
axis.text.x = element_text(size = rel(0.8)),
axis.text.y = element_text(size = rel(0.8))) +
labs(col = "")
Variogram of latent factors
Computing extent of coordinates.
extent <- fidata |>
st_distance() |>
max() |>
as.numeric()
Computing variograms for each factor.
factorsdata[c("longitude_3857", "latitude_3857")] <-
st_coordinates(st_transform(factorsdata, 3857))
gg_variog <- factorsdata |>
st_set_geometry(NULL) |>
group_by(Model, factor_label) |>
tidyr::nest() |>
mutate(variogs = map(data,
~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
cutoff = extent * 0.7, width = extent / 200)
)
) |>
dplyr::select(-data) |>
tidyr::unnest(col = variogs)
Visualizing variograms per factor.
gg_variog |>
ggplot(aes(dist, gamma)) +
# geom_point(aes(size = np)) +
geom_point(aes(alpha = np)) +
geom_smooth() +
# expand_limits(y = 0, x = 0) +
scale_x_continuous(limits = c(0, extent * 0.5)) +
facet_grid(Model ~ factor_label, scales = "free_y") +
labs(x = "distance", y = "semi-variogram") +
theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 360 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 360 rows containing missing values or values outside the scale range
#> (`geom_point()`).
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 85.115 4.558 118.591