SPIFA Urban Ipixuna Season: summarise results
Visualise and summarise the results of SPIFA models.
Load required libraries and data
rm(list = ls())
library(day2day)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS: WGS 84
samples1 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-dry-1gp.rds"))
samples2 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-wet-1gp.rds"))
samples3 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-dry-2gp.rds"))
samples4 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-wet-2gp.rds"))
samples5 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-dry-3gp.rds"))
samples6 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-wet-3gp-restricted.rds"))
iter <- nrow(samples1[["theta"]])
thin <- 10
MCMC convergence
Difficulty parameters
Top plots are dry season and bottom plots are wet season.
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "c"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model)
Discrimination parameters
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "a"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model) +
theme(legend.position = "none")
Latent abilities
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ gather.spifa(select(as_tibble(.x, 0, 1, select = "theta"), 1:10))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model)
Correlation parameters
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "corr"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model)
Gaussian processes standard deviations
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_sd"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model)
Gaussian processes scale parameters
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_phi"))) |>
bind_rows(.id = "Model") |>
ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
geom_path(alpha = 0.6, linewidth = rel(0.1)) +
facet_wrap(~ Model)
Credible intervals
Discrimination parameters
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ summary(as_tibble(.x, iter/2, select = "a"))) |>
bind_rows(.id = "Model") |>
gg_errorbarh(sorted = FALSE) +
facet_wrap(~ Model)
Difficulty parameters
list(samples1, samples3, samples5, samples2, samples4, samples6) |>
map(~ summary(as_tibble(.x, iter/2, select = "c"))) |>
bind_rows(.id = "Model") |>
gg_errorbarh(sorted = FALSE) +
geom_vline(xintercept = 0, linetype = "dashed") +
facet_wrap(~ Model) +
theme_bw() +
theme(legend.position = "bottom")
Analysis of latent abilities
Compute summaries for the latent abilities (dry)
theta_summary_dry <- as_tibble(samples5, iter/thin/2, select = "theta") |>
summary() |>
tidyr::extract(Parameters, c("subject", "factor"),
"\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median_dry <- theta_summary_dry |>
dplyr::select(subject, factor, `50%`) |>
pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
dplyr::select(-subject)
factorsdata_dry <- filter(fidata, season == "dry") |>
bind_cols(theta_median_dry) |>
dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
values_to = "factor_value")
Compute summaries for the latent abilities (wet)
theta_summary_wet <- as_tibble(samples6, iter/thin/2, select = "theta") |>
summary() |>
tidyr::extract(Parameters, c("subject", "factor"),
"\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median_wet <- theta_summary_wet |>
dplyr::select(subject, factor, `50%`) |>
pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
dplyr::select(-subject)
factorsdata_wet <- filter(fidata, season == "wet") |>
bind_cols(theta_median_wet) |>
dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
values_to = "factor_value")
Spatial Distribution of each factor (dry)
factorsdata_dry |>
st_jitter(factor = 0.02) |>
ggplot() +
geom_sf(aes(col = factor_value), size = 1) +
facet_wrap(~ factor_label) +
scale_color_distiller(palette = "RdBu", direction = -1) +
theme_bw(base_size = 10) +
theme(panel.background = element_rect(fill = 'gray10'),
panel.grid.major = element_line(color = 'gray30'),
panel.grid.minor = element_line(color = 'green', linewidth = 2),
legend.position = "bottom",
axis.text.x = element_text(size = rel(0.8)),
axis.text.y = element_text(size = rel(0.8))) +
labs(col = "")
Spatial Distribution of each factor (wet)
factorsdata_wet |>
st_jitter(factor = 0.02) |>
ggplot() +
geom_sf(aes(col = factor_value), size = 1) +
facet_wrap(~ factor_label) +
scale_color_distiller(palette = "RdBu", direction = -1) +
theme_bw(base_size = 10) +
theme(panel.background = element_rect(fill = 'gray10'),
panel.grid.major = element_line(color = 'gray30'),
panel.grid.minor = element_line(color = 'green', linewidth = 2),
legend.position = "bottom",
axis.text.x = element_text(size = rel(0.8)),
axis.text.y = element_text(size = rel(0.8))) +
labs(col = "")
Variogram of latent factors
Computing extent of coordinates.
extent <- fidata |>
st_distance() |>
max() |>
as.numeric()
Computing variograms for each factor.
factorsdata_dry[c("longitude_3857", "latitude_3857")] <-
st_coordinates(st_transform(factorsdata_dry, 3857))
factorsdata_wet[c("longitude_3857", "latitude_3857")] <-
st_coordinates(st_transform(factorsdata_wet, 3857))
gg_variog_dry <- factorsdata_dry |>
st_set_geometry(NULL) |>
group_by(factor_label) |>
tidyr::nest() |>
mutate(variogs = map(data,
~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
cutoff = extent * 0.7, width = extent / 200)
)
) |>
dplyr::select(-data) |>
tidyr::unnest(col = variogs)
gg_variog_wet <- factorsdata_wet |>
st_set_geometry(NULL) |>
group_by(factor_label) |>
tidyr::nest() |>
mutate(variogs = map(data,
~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
cutoff = extent * 0.7, width = extent / 200)
)
) |>
dplyr::select(-data) |>
tidyr::unnest(col = variogs)
Visualizing variograms per factor.
gg_variog_dry |>
ggplot(aes(dist, gamma)) +
# geom_point(aes(size = np)) +
geom_point(aes(alpha = np)) +
geom_smooth() +
expand_limits(y = 0, x = 0) +
scale_x_continuous(limits = c(0, extent * 0.5)) +
facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
labs(x = "distance", y = "semi-variogram") +
theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 120 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 120 rows containing missing values or values outside the scale range
#> (`geom_point()`).
gg_variog_wet |>
ggplot(aes(dist, gamma)) +
# geom_point(aes(size = np)) +
geom_point(aes(alpha = np)) +
geom_smooth() +
expand_limits(y = 0, x = 0) +
scale_x_continuous(limits = c(0, extent * 0.5)) +
facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
labs(x = "distance", y = "semi-variogram") +
theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 120 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 120 rows containing missing values or values outside the scale range
#> (`geom_point()`).
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 106.881 4.588 137.104