SPIFA Urban Ipixuna: summarise results


Visualise and summarise the resulst of CIFA models.

Load required libraries and data

rm(list = ls())
library(day2day)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)

path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")

fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
    st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source 
#>   `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg' 
#>   using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS:  WGS 84
samples1 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-1gp.rds"))
samples2 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-2gp.rds"))
samples3 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-3gp.rds"))

iter <- nrow(samples1[["theta"]])
thin <- 10

MCMC convergence

Difficulty parameters

list(samples1, samples2, samples3) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "c"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model, ncol = 1)

Discrimination parameters

list(samples1, samples2, samples3) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "a"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model, ncol = 1) +
        theme(legend.position = "none")

Latent abilities

list(samples1, samples2, samples3) |>
    map(~ gather.spifa(select(as_tibble(.x, 0, 1, select = "theta"), 1:10))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model, ncol = 1)

Correlation parameters

list(samples1, samples2, samples3) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "corr"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model, ncol = 1)

Gaussian processes standard deviations

list(samples1, samples2, samples3) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_sd"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model, ncol = 1)

Gaussian processes scale parameters

list(samples1, samples2, samples3) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_phi"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model, ncol = 1)

Credible intervals

Discrimination parameters

list(samples1, samples2, samples3) |>
    map(~ summary(as_tibble(.x, iter/2, select = "a"))) |>
    bind_rows(.id = "Model") |>
    gg_errorbarh(sorted = FALSE) +
    facet_wrap(~ Model)

Difficulty parameters

list(samples1, samples2, samples3) |>
    map(~ summary(as_tibble(.x, iter/2, select = "c"))) |>
    bind_rows(.id = "Model") |>
    gg_errorbarh(sorted = FALSE) +
    facet_wrap(~ Model)

Analysis of latent abilities

Compute summaries for the latent abilities.

theta_summary <- list(samples1, samples2, samples3) |>
    map(~ summary(as_tibble(.x, iter/2, select = "theta"))) |>
    bind_rows(.id = "Model") |>
  tidyr::extract(Parameters, c("subject", "factor"),
                 "\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median <- theta_summary |>
    dplyr::select(Model, subject, factor, `50%`) |>
    pivot_wider(names_from = c(Model, factor), values_from = `50%`, names_prefix = "M") |>
    dplyr::select(-subject)

factorsdata <- fidata |>
    bind_cols(theta_median) |>
    dplyr::select(registro, any_children, matches("^M[1-3]")) |>
    tidyr::pivot_longer(matches("^M[1-3]"), names_to = c("Model", "factor_label"),
        names_pattern = "M([1-3]+)_([1-3]+)$", values_to = "factor_value") |>
    dplyr::mutate(Model = paste0("M", Model)) |>
    dplyr::mutate(factor_label = paste0("F", factor_label))

Empirical distribution of latent factors medians

factorsdata |>
    ggplot(aes(x = factor_value, y = after_stat(density), fill)) +
        geom_histogram(binwidth = 0.2) +
        facet_grid(Model ~ factor_label) +
        labs(x = "latent factor")

factorsdata |>
    ggplot(aes(x = factor_value, y = after_stat(density))) +
        geom_histogram(aes(fill = any_children), position = "dodge", bins = 10) +
        facet_grid(Model ~ factor_label, scales = "free") +
        theme(legend.position = "bottom") +
        labs(x = "latent factor", fill = "")

Spatial Distribution of each factor

factorsdata |>
    st_jitter(factor = 0.02) |>
    ggplot() +
        geom_sf(aes(col = factor_value), size = 1) +
        facet_grid(Model ~ factor_label) +
        scale_color_distiller(palette = "RdBu", direction = -1) +
        theme_bw(base_size = 10) +
        theme(panel.background = element_rect(fill = 'gray10'),
              panel.grid.major = element_line(color = 'gray30'),
              panel.grid.minor = element_line(color = 'green', linewidth = 2),
              legend.position = "bottom",
              axis.text.x = element_text(size = rel(0.8)),
              axis.text.y = element_text(size = rel(0.8))) +
        labs(col = "")

Variogram of latent factors

Computing extent of coordinates.

extent <- fidata |>
  st_distance() |>
  max() |>
  as.numeric()

Computing variograms for each factor.

factorsdata[c("longitude_3857", "latitude_3857")] <-
    st_coordinates(st_transform(factorsdata, 3857))

gg_variog <- factorsdata |>
  st_set_geometry(NULL) |>
  group_by(Model, factor_label) |>
  tidyr::nest() |>
  mutate(variogs = map(data,
    ~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
                       cutoff = extent * 0.7, width = extent / 200)
    )
  ) |>
  dplyr::select(-data) |>
  tidyr::unnest(col = variogs)

Visualizing variograms per factor.

gg_variog |>
  ggplot(aes(dist, gamma)) +
    # geom_point(aes(size = np)) +
    geom_point(aes(alpha = np)) +
    geom_smooth() +
    # expand_limits(y = 0, x = 0) +
    scale_x_continuous(limits = c(0, extent * 0.5)) +
    facet_grid(Model ~ factor_label, scales = "free_y") +
    labs(x = "distance", y = "semi-variogram") +
    theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 360 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 360 rows containing missing values or values outside the scale range
#> (`geom_point()`).

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>  85.115   4.558 118.591