SPIFA Urban Ipixuna Season: summarise results


Visualise and summarise the results of SPIFA models.

Load required libraries and data

rm(list = ls())
library(day2day)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)

path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")

fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
    st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source 
#>   `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg' 
#>   using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS:  WGS 84
samples1 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-dry-1gp.rds"))
samples2 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-wet-1gp.rds"))
samples3 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-dry-2gp.rds"))
samples4 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-wet-2gp.rds"))
samples5 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-dry-3gp.rds"))
samples6 <- readRDS(file.path(path_modelled, "spifa-ipixuna-urban-wet-3gp-restricted.rds"))

iter <- nrow(samples1[["theta"]])
thin <- 10

MCMC convergence

Difficulty parameters

Top plots are dry season and bottom plots are wet season.

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "c"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model)

Discrimination parameters

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "a"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model) +
        theme(legend.position = "none")

Latent abilities

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ gather.spifa(select(as_tibble(.x, 0, 1, select = "theta"), 1:10))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model)

Correlation parameters

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "corr"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model)

Gaussian processes standard deviations

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_sd"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model)

Gaussian processes scale parameters

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ gather.spifa(as_tibble(.x, 0, 1, select = "mgp_phi"))) |>
    bind_rows(.id = "Model") |>
    ggplot(aes(iteration, Value, group = Parameters, col = Parameters)) +
        geom_path(alpha = 0.6, linewidth = rel(0.1)) +
        facet_wrap(~ Model)

Credible intervals

Discrimination parameters

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ summary(as_tibble(.x, iter/2, select = "a"))) |>
    bind_rows(.id = "Model") |>
    gg_errorbarh(sorted = FALSE) +
    facet_wrap(~ Model)

Difficulty parameters

list(samples1, samples3, samples5, samples2, samples4, samples6) |>
    map(~ summary(as_tibble(.x, iter/2, select = "c"))) |>
    bind_rows(.id = "Model") |>
    gg_errorbarh(sorted = FALSE) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    facet_wrap(~ Model) +
    theme_bw() +
    theme(legend.position = "bottom")

Analysis of latent abilities

Compute summaries for the latent abilities (dry)

theta_summary_dry <- as_tibble(samples5, iter/thin/2, select = "theta") |>
  summary() |>
  tidyr::extract(Parameters, c("subject", "factor"),
                 "\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median_dry <- theta_summary_dry |>
    dplyr::select(subject, factor, `50%`) |>
    pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
    dplyr::select(-subject)

factorsdata_dry <- filter(fidata, season == "dry") |>
    bind_cols(theta_median_dry) |>
    dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
    tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
                        values_to = "factor_value")

Compute summaries for the latent abilities (wet)

theta_summary_wet <- as_tibble(samples6, iter/thin/2, select = "theta") |>
  summary() |>
  tidyr::extract(Parameters, c("subject", "factor"),
                 "\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median_wet <- theta_summary_wet |>
    dplyr::select(subject, factor, `50%`) |>
    pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
    dplyr::select(-subject)

factorsdata_wet <- filter(fidata, season == "wet") |>
    bind_cols(theta_median_wet) |>
    dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
    tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
                        values_to = "factor_value")

Spatial Distribution of each factor (dry)

factorsdata_dry |>
    st_jitter(factor = 0.02) |>
    ggplot() +
        geom_sf(aes(col = factor_value), size = 1) +
        facet_wrap(~ factor_label) +
        scale_color_distiller(palette = "RdBu", direction = -1) +
        theme_bw(base_size = 10) +
        theme(panel.background = element_rect(fill = 'gray10'),
              panel.grid.major = element_line(color = 'gray30'),
              panel.grid.minor = element_line(color = 'green', linewidth = 2),
              legend.position = "bottom",
              axis.text.x = element_text(size = rel(0.8)),
              axis.text.y = element_text(size = rel(0.8))) +
        labs(col = "")

Spatial Distribution of each factor (wet)

factorsdata_wet |>
    st_jitter(factor = 0.02) |>
    ggplot() +
        geom_sf(aes(col = factor_value), size = 1) +
        facet_wrap(~ factor_label) +
        scale_color_distiller(palette = "RdBu", direction = -1) +
        theme_bw(base_size = 10) +
        theme(panel.background = element_rect(fill = 'gray10'),
              panel.grid.major = element_line(color = 'gray30'),
              panel.grid.minor = element_line(color = 'green', linewidth = 2),
              legend.position = "bottom",
              axis.text.x = element_text(size = rel(0.8)),
              axis.text.y = element_text(size = rel(0.8))) +
        labs(col = "")

Variogram of latent factors

Computing extent of coordinates.

extent <- fidata |>
  st_distance() |>
  max() |>
  as.numeric()

Computing variograms for each factor.

factorsdata_dry[c("longitude_3857", "latitude_3857")] <-
    st_coordinates(st_transform(factorsdata_dry, 3857))

factorsdata_wet[c("longitude_3857", "latitude_3857")] <-
    st_coordinates(st_transform(factorsdata_wet, 3857))

gg_variog_dry <- factorsdata_dry |>
  st_set_geometry(NULL) |>
  group_by(factor_label) |>
  tidyr::nest() |>
  mutate(variogs = map(data,
    ~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
                       cutoff = extent * 0.7, width = extent / 200)
    )
  ) |>
  dplyr::select(-data) |>
  tidyr::unnest(col = variogs)

gg_variog_wet <- factorsdata_wet |>
  st_set_geometry(NULL) |>
  group_by(factor_label) |>
  tidyr::nest() |>
  mutate(variogs = map(data,
    ~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
                       cutoff = extent * 0.7, width = extent / 200)
    )
  ) |>
  dplyr::select(-data) |>
  tidyr::unnest(col = variogs)

Visualizing variograms per factor.

gg_variog_dry |>
  ggplot(aes(dist, gamma)) +
    # geom_point(aes(size = np)) +
    geom_point(aes(alpha = np)) +
    geom_smooth() +
    expand_limits(y = 0, x = 0) +
    scale_x_continuous(limits = c(0, extent * 0.5)) +
    facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
    labs(x = "distance", y = "semi-variogram") +
    theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 120 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 120 rows containing missing values or values outside the scale range
#> (`geom_point()`).

gg_variog_wet |>
  ggplot(aes(dist, gamma)) +
    # geom_point(aes(size = np)) +
    geom_point(aes(alpha = np)) +
    geom_smooth() +
    expand_limits(y = 0, x = 0) +
    scale_x_continuous(limits = c(0, extent * 0.5)) +
    facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
    labs(x = "distance", y = "semi-variogram") +
    theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 120 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 120 rows containing missing values or values outside the scale range
#> (`geom_point()`).

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#> 106.881   4.588 137.104