CIFA Urban Ipixuna: summarise results


Visualise and summarise the resulst of CIFA models.

Load required libraries and data

rm(list = ls())
library(day2day)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)

path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")

fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
    st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source 
#>   `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg' 
#>   using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS:  WGS 84
samples <- readRDS(file.path(path_modelled, "cifa-ipixuna-urban.rds"))
iter <- 1 * 10 ^ 5
thin <- 10
n <- nrow(fidata)

MCMC convergence

Difficulty parameters

as_tibble(samples, 0, 10, select = "c") |>
    gg_trace(alpha = 0.6) +
    theme(legend.position = "right")

Discrimination parameters

as_tibble(samples, 0, 10, select = "a") |>
    gg_trace(alpha = 0.6) +
    theme(legend.position = "none")

Latent abilities

as_tibble(samples, 0, 10, select = "theta") |>
    select(1:10, 301:310) |>
    gg_trace(alpha = 0.6) +
    theme(legend.position = "right")

Correlation parameters

as_tibble(samples, 0, 10, select = "corr") |>
    gg_trace(alpha = 0.6) +
    theme(legend.position = "right")

Posterior densities

Difficulty parameters

as_tibble(samples, iter/thin/2, 5, "c") |>
    gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 3)
#> Picking joint bandwidth of 0.0581

Discrimination parameters

as_tibble(samples, iter/thin/2, 5, "a") |>
    gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 6)
#> Picking joint bandwidth of 0.189

Correlation parameters

as_tibble(samples, iter/thin/2, 5, "corr") |>
    gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 1)
#> Picking joint bandwidth of 0.0151

Latent abilities

as_tibble(samples, iter/thin/2, 5, "theta") |>
    dplyr::select(1:50) |>
    gg_density(aes(fill = median), alpha = 0.8, ridges = TRUE, scale = 4) +
    scale_fill_distiller(palette = "RdYlBu")
#> Picking joint bandwidth of 0.0941

Summarise

Quantiles of the posteriors

Provide quantiles for each parameter.

as_tibble(samples, iter/thin/2, select = c("c", "a")) |>
    summary() |>
    print(n = 1000)
#> # A tibble: 72 × 6
#>    Parameters  `2.5%`   `10%`   `50%`  `90%` `97.5%`
#>    <fct>        <dbl>   <dbl>   <dbl>  <dbl>   <dbl>
#>  1 c[1]        0.165   0.263   0.492   0.760  0.901 
#>  2 c[2]       -0.0574  0.0618  0.259   0.474  0.588 
#>  3 c[3]        0.597   0.717   0.954   1.23   1.37  
#>  4 c[4]       -1.66   -1.43   -1.07   -0.783 -0.617 
#>  5 c[5]       -0.900  -0.718  -0.390  -0.109  0.0449
#>  6 c[6]       -2.53   -2.25   -1.78   -1.36  -1.19  
#>  7 c[7]       -1.47   -1.30   -1.04   -0.815 -0.697 
#>  8 c[8]       -0.273  -0.167   0.0564  0.290  0.409 
#>  9 c[9]       -1.42   -1.24   -0.940  -0.671 -0.551 
#> 10 c[10]      -1.22   -1.01   -0.705  -0.423 -0.298 
#> 11 c[11]      -2.15   -1.96   -1.61   -1.28  -1.15  
#> 12 c[12]      -2.62   -2.36   -1.91   -1.53  -1.35  
#> 13 c[13]      -2.64   -2.41   -1.99   -1.63  -1.46  
#> 14 c[14]      -2.28   -2.04   -1.68   -1.38  -1.24  
#> 15 c[15]       0.416   0.493   0.646   0.809  0.911 
#> 16 c[16]      -2.46   -2.20   -1.79   -1.47  -1.33  
#> 17 c[17]      -1.66   -1.48   -1.23   -1.02  -0.924 
#> 18 c[18]       0.0631  0.154   0.338   0.537  0.646 
#> 19 A[1,1]      0       0       0       0      0     
#> 20 A[2,1]      0       0       0       0      0     
#> 21 A[3,1]      1.24    1.37    1.66    2.05   2.27  
#> 22 A[4,1]      0.684   0.940   1.47    2.03   2.33  
#> 23 A[5,1]      0.0183  0.344   0.877   1.40   1.71  
#> 24 A[6,1]      0.388   0.705   1.26    1.84   2.13  
#> 25 A[7,1]      1.12    1.27    1.58    1.93   2.14  
#> 26 A[8,1]      1.22    1.37    1.71    2.12   2.38  
#> 27 A[9,1]      1.34    1.51    1.89    2.34   2.66  
#> 28 A[10,1]     1.51    1.71    2.16    2.69   3.05  
#> 29 A[11,1]     1.50    1.67    2.00    2.37   2.57  
#> 30 A[12,1]     1.49    1.67    2.12    2.65   3.00  
#> 31 A[13,1]     1.42    1.59    1.93    2.31   2.53  
#> 32 A[14,1]    -0.273  -0.0528  0.352   0.733  0.925 
#> 33 A[15,1]     0       0       0       0      0     
#> 34 A[16,1]     0       0       0       0      0     
#> 35 A[17,1]     0       0       0       0      0     
#> 36 A[18,1]     0.929   1.04    1.29    1.59   1.78  
#> 37 A[1,2]      1.06    1.25    1.63    2.18   2.55  
#> 38 A[2,2]      0       0       0       0      0     
#> 39 A[3,2]      0       0       0       0      0     
#> 40 A[4,2]      0       0       0       0      0     
#> 41 A[5,2]      0       0       0       0      0     
#> 42 A[6,2]      0       0       0       0      0     
#> 43 A[7,2]      0       0       0       0      0     
#> 44 A[8,2]      0       0       0       0      0     
#> 45 A[9,2]      0       0       0       0      0     
#> 46 A[10,2]     0       0       0       0      0     
#> 47 A[11,2]     0       0       0       0      0     
#> 48 A[12,2]     0       0       0       0      0     
#> 49 A[13,2]     0       0       0       0      0     
#> 50 A[14,2]     0       0       0       0      0     
#> 51 A[15,2]     0.452   0.542   0.720   0.924  1.05  
#> 52 A[16,2]     0.952   1.11    1.43    1.80   2.04  
#> 53 A[17,2]     0.614   0.729   0.982   1.29   1.50  
#> 54 A[18,2]     0       0       0       0      0     
#> 55 A[1,3]      0       0       0       0      0     
#> 56 A[2,3]      1.06    1.19    1.52    1.94   2.24  
#> 57 A[3,3]      0       0       0       0      0     
#> 58 A[4,3]      0.288   0.543   1.02    1.60   2.00  
#> 59 A[5,3]      0.832   1.11    1.75    2.55   3.07  
#> 60 A[6,3]      0.668   0.928   1.51    2.18   2.67  
#> 61 A[7,3]      0       0       0       0      0     
#> 62 A[8,3]      0       0       0       0      0     
#> 63 A[9,3]      0       0       0       0      0     
#> 64 A[10,3]     0       0       0       0      0     
#> 65 A[11,3]     0       0       0       0      0     
#> 66 A[12,3]     0       0       0       0      0     
#> 67 A[13,3]     0       0       0       0      0     
#> 68 A[14,3]     0.638   0.837   1.25    1.69   1.94  
#> 69 A[15,3]     0       0       0       0      0     
#> 70 A[16,3]     0       0       0       0      0     
#> 71 A[17,3]     0       0       0       0      0     
#> 72 A[18,3]     0       0       0       0      0

Credible intervals

Discrimination parameters

as_tibble(samples, iter/thin/ 2, select = "a") |>
    summary() |>
    gg_errorbarh(sorted = FALSE)

Difficulty parameters

as_tibble(samples, iter/thin/2, select = "c") |>
  summary() |>
  gg_errorbar(sorted = FALSE)

Latent abilities

as_tibble(samples, iter/thin/2, select = "theta") |>
  summary() |>
  tidyr::extract(Parameters, c("subject", "factor"),
        "\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE) |>
  gg_errorbar(sorted = TRUE) +
    facet_wrap(~ factor, ncol = 1)

Analysis of latent abilities

Compute summaries for the latent abilities.

theta_summary <- as_tibble(samples, iter/thin/2, select = "theta") |>
  summary() |>
  tidyr::extract(Parameters, c("subject", "factor"),
                 "\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median <- theta_summary |>
    dplyr::select(subject, factor, `50%`) |>
    pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
    dplyr::select(-subject)

factorsdata <- fidata |>
    bind_cols(theta_median) |>
    dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
    tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
                        values_to = "factor_value")

Empirical distribution of latent factors medians

factorsdata |>
    ggplot(aes(x = factor_value, y = after_stat(density))) +
        geom_histogram(binwidth = 0.2) +
        facet_wrap(~ factor_label) +
        labs(x = "latent factor")

factorsdata |>
    ggplot(aes(x = factor_value, y = after_stat(density))) +
        geom_histogram(aes(fill = any_children)) +
        facet_grid(any_children ~ factor_label, scales = "free") +
        theme(legend.position = "none") +
        labs(x = "latent factor")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Spatial Distribution of each factor

factorsdata |>
    st_jitter(factor = 0.02) |>
    ggplot() +
        geom_sf(aes(col = factor_value), size = 1) +
        facet_wrap(~ factor_label) +
        scale_color_distiller(palette = "RdBu", direction = -1) +
        theme_bw(base_size = 10) +
        theme(panel.background = element_rect(fill = 'gray10'),
              panel.grid.major = element_line(color = 'gray30'),
              panel.grid.minor = element_line(color = 'green', linewidth = 2),
              legend.position = "bottom",
              axis.text.x = element_text(size = rel(0.8)),
              axis.text.y = element_text(size = rel(0.8))) +
        labs(col = "")

Variogram of latent factors

Computing extent of coordinates.

extent <- factorsdata |>
  st_distance() |>
  max() |>
  as.numeric()

Computing variograms for each factor.

factorsdata[c("longitude_3857", "latitude_3857")] <-
    st_coordinates(st_transform(factorsdata, 3857))

gg_variog <- factorsdata |>
  st_set_geometry(NULL) |>
  group_by(factor_label) |>
  tidyr::nest() |>
  mutate(variogs = map(data,
    ~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
                       cutoff = extent * 0.7, width = extent / 200)
    )
  ) |>
  dplyr::select(-data) |>
  tidyr::unnest(col = variogs)

Visualizing variograms per factor.

gg_variog |>
  ggplot(aes(dist, gamma)) +
    # geom_point(aes(size = np)) +
    geom_point(aes(alpha = np)) +
    geom_smooth() +
    expand_limits(y = 0, x = 0) +
    scale_x_continuous(limits = c(0, extent * 0.5)) +
    facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
    labs(x = "distance", y = "semi-variogram") +
    theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 120 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 120 rows containing missing values or values outside the scale range
#> (`geom_point()`).

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>  21.886   2.116  37.722