CIFA Urban Ipixuna: summarise results
Visualise and summarise the resulst of CIFA models.
Load required libraries and data
rm(list = ls())
library(day2day)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS: WGS 84
samples <- readRDS(file.path(path_modelled, "cifa-ipixuna-urban.rds"))
iter <- 1 * 10 ^ 5
thin <- 10
n <- nrow(fidata)
MCMC convergence
Difficulty parameters
as_tibble(samples, 0, 10, select = "c") |>
gg_trace(alpha = 0.6) +
theme(legend.position = "right")
Discrimination parameters
as_tibble(samples, 0, 10, select = "a") |>
gg_trace(alpha = 0.6) +
theme(legend.position = "none")
Latent abilities
as_tibble(samples, 0, 10, select = "theta") |>
select(1:10, 301:310) |>
gg_trace(alpha = 0.6) +
theme(legend.position = "right")
Correlation parameters
as_tibble(samples, 0, 10, select = "corr") |>
gg_trace(alpha = 0.6) +
theme(legend.position = "right")
Posterior densities
Difficulty parameters
as_tibble(samples, iter/thin/2, 5, "c") |>
gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 3)
#> Picking joint bandwidth of 0.0581
Discrimination parameters
as_tibble(samples, iter/thin/2, 5, "a") |>
gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 6)
#> Picking joint bandwidth of 0.189
Correlation parameters
as_tibble(samples, iter/thin/2, 5, "corr") |>
gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 1)
#> Picking joint bandwidth of 0.0151
Latent abilities
as_tibble(samples, iter/thin/2, 5, "theta") |>
dplyr::select(1:50) |>
gg_density(aes(fill = median), alpha = 0.8, ridges = TRUE, scale = 4) +
scale_fill_distiller(palette = "RdYlBu")
#> Picking joint bandwidth of 0.0941
Summarise
Quantiles of the posteriors
Provide quantiles for each parameter.
as_tibble(samples, iter/thin/2, select = c("c", "a")) |>
summary() |>
print(n = 1000)
#> # A tibble: 72 × 6
#> Parameters `2.5%` `10%` `50%` `90%` `97.5%`
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 c[1] 0.165 0.263 0.492 0.760 0.901
#> 2 c[2] -0.0574 0.0618 0.259 0.474 0.588
#> 3 c[3] 0.597 0.717 0.954 1.23 1.37
#> 4 c[4] -1.66 -1.43 -1.07 -0.783 -0.617
#> 5 c[5] -0.900 -0.718 -0.390 -0.109 0.0449
#> 6 c[6] -2.53 -2.25 -1.78 -1.36 -1.19
#> 7 c[7] -1.47 -1.30 -1.04 -0.815 -0.697
#> 8 c[8] -0.273 -0.167 0.0564 0.290 0.409
#> 9 c[9] -1.42 -1.24 -0.940 -0.671 -0.551
#> 10 c[10] -1.22 -1.01 -0.705 -0.423 -0.298
#> 11 c[11] -2.15 -1.96 -1.61 -1.28 -1.15
#> 12 c[12] -2.62 -2.36 -1.91 -1.53 -1.35
#> 13 c[13] -2.64 -2.41 -1.99 -1.63 -1.46
#> 14 c[14] -2.28 -2.04 -1.68 -1.38 -1.24
#> 15 c[15] 0.416 0.493 0.646 0.809 0.911
#> 16 c[16] -2.46 -2.20 -1.79 -1.47 -1.33
#> 17 c[17] -1.66 -1.48 -1.23 -1.02 -0.924
#> 18 c[18] 0.0631 0.154 0.338 0.537 0.646
#> 19 A[1,1] 0 0 0 0 0
#> 20 A[2,1] 0 0 0 0 0
#> 21 A[3,1] 1.24 1.37 1.66 2.05 2.27
#> 22 A[4,1] 0.684 0.940 1.47 2.03 2.33
#> 23 A[5,1] 0.0183 0.344 0.877 1.40 1.71
#> 24 A[6,1] 0.388 0.705 1.26 1.84 2.13
#> 25 A[7,1] 1.12 1.27 1.58 1.93 2.14
#> 26 A[8,1] 1.22 1.37 1.71 2.12 2.38
#> 27 A[9,1] 1.34 1.51 1.89 2.34 2.66
#> 28 A[10,1] 1.51 1.71 2.16 2.69 3.05
#> 29 A[11,1] 1.50 1.67 2.00 2.37 2.57
#> 30 A[12,1] 1.49 1.67 2.12 2.65 3.00
#> 31 A[13,1] 1.42 1.59 1.93 2.31 2.53
#> 32 A[14,1] -0.273 -0.0528 0.352 0.733 0.925
#> 33 A[15,1] 0 0 0 0 0
#> 34 A[16,1] 0 0 0 0 0
#> 35 A[17,1] 0 0 0 0 0
#> 36 A[18,1] 0.929 1.04 1.29 1.59 1.78
#> 37 A[1,2] 1.06 1.25 1.63 2.18 2.55
#> 38 A[2,2] 0 0 0 0 0
#> 39 A[3,2] 0 0 0 0 0
#> 40 A[4,2] 0 0 0 0 0
#> 41 A[5,2] 0 0 0 0 0
#> 42 A[6,2] 0 0 0 0 0
#> 43 A[7,2] 0 0 0 0 0
#> 44 A[8,2] 0 0 0 0 0
#> 45 A[9,2] 0 0 0 0 0
#> 46 A[10,2] 0 0 0 0 0
#> 47 A[11,2] 0 0 0 0 0
#> 48 A[12,2] 0 0 0 0 0
#> 49 A[13,2] 0 0 0 0 0
#> 50 A[14,2] 0 0 0 0 0
#> 51 A[15,2] 0.452 0.542 0.720 0.924 1.05
#> 52 A[16,2] 0.952 1.11 1.43 1.80 2.04
#> 53 A[17,2] 0.614 0.729 0.982 1.29 1.50
#> 54 A[18,2] 0 0 0 0 0
#> 55 A[1,3] 0 0 0 0 0
#> 56 A[2,3] 1.06 1.19 1.52 1.94 2.24
#> 57 A[3,3] 0 0 0 0 0
#> 58 A[4,3] 0.288 0.543 1.02 1.60 2.00
#> 59 A[5,3] 0.832 1.11 1.75 2.55 3.07
#> 60 A[6,3] 0.668 0.928 1.51 2.18 2.67
#> 61 A[7,3] 0 0 0 0 0
#> 62 A[8,3] 0 0 0 0 0
#> 63 A[9,3] 0 0 0 0 0
#> 64 A[10,3] 0 0 0 0 0
#> 65 A[11,3] 0 0 0 0 0
#> 66 A[12,3] 0 0 0 0 0
#> 67 A[13,3] 0 0 0 0 0
#> 68 A[14,3] 0.638 0.837 1.25 1.69 1.94
#> 69 A[15,3] 0 0 0 0 0
#> 70 A[16,3] 0 0 0 0 0
#> 71 A[17,3] 0 0 0 0 0
#> 72 A[18,3] 0 0 0 0 0
Credible intervals
Discrimination parameters
as_tibble(samples, iter/thin/ 2, select = "a") |>
summary() |>
gg_errorbarh(sorted = FALSE)
Difficulty parameters
as_tibble(samples, iter/thin/2, select = "c") |>
summary() |>
gg_errorbar(sorted = FALSE)
Latent abilities
as_tibble(samples, iter/thin/2, select = "theta") |>
summary() |>
tidyr::extract(Parameters, c("subject", "factor"),
"\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE) |>
gg_errorbar(sorted = TRUE) +
facet_wrap(~ factor, ncol = 1)
Analysis of latent abilities
Compute summaries for the latent abilities.
theta_summary <- as_tibble(samples, iter/thin/2, select = "theta") |>
summary() |>
tidyr::extract(Parameters, c("subject", "factor"),
"\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median <- theta_summary |>
dplyr::select(subject, factor, `50%`) |>
pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
dplyr::select(-subject)
factorsdata <- fidata |>
bind_cols(theta_median) |>
dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
values_to = "factor_value")
Empirical distribution of latent factors medians
factorsdata |>
ggplot(aes(x = factor_value, y = after_stat(density))) +
geom_histogram(binwidth = 0.2) +
facet_wrap(~ factor_label) +
labs(x = "latent factor")
factorsdata |>
ggplot(aes(x = factor_value, y = after_stat(density))) +
geom_histogram(aes(fill = any_children)) +
facet_grid(any_children ~ factor_label, scales = "free") +
theme(legend.position = "none") +
labs(x = "latent factor")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Spatial Distribution of each factor
factorsdata |>
st_jitter(factor = 0.02) |>
ggplot() +
geom_sf(aes(col = factor_value), size = 1) +
facet_wrap(~ factor_label) +
scale_color_distiller(palette = "RdBu", direction = -1) +
theme_bw(base_size = 10) +
theme(panel.background = element_rect(fill = 'gray10'),
panel.grid.major = element_line(color = 'gray30'),
panel.grid.minor = element_line(color = 'green', linewidth = 2),
legend.position = "bottom",
axis.text.x = element_text(size = rel(0.8)),
axis.text.y = element_text(size = rel(0.8))) +
labs(col = "")
Variogram of latent factors
Computing extent of coordinates.
extent <- factorsdata |>
st_distance() |>
max() |>
as.numeric()
Computing variograms for each factor.
factorsdata[c("longitude_3857", "latitude_3857")] <-
st_coordinates(st_transform(factorsdata, 3857))
gg_variog <- factorsdata |>
st_set_geometry(NULL) |>
group_by(factor_label) |>
tidyr::nest() |>
mutate(variogs = map(data,
~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
cutoff = extent * 0.7, width = extent / 200)
)
) |>
dplyr::select(-data) |>
tidyr::unnest(col = variogs)
Visualizing variograms per factor.
gg_variog |>
ggplot(aes(dist, gamma)) +
# geom_point(aes(size = np)) +
geom_point(aes(alpha = np)) +
geom_smooth() +
expand_limits(y = 0, x = 0) +
scale_x_continuous(limits = c(0, extent * 0.5)) +
facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
labs(x = "distance", y = "semi-variogram") +
theme(legend.position = "bottom")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 120 rows containing non-finite outside the scale range (`stat_smooth()`).
#> Warning: Removed 120 rows containing missing values or values outside the scale range
#> (`geom_point()`).
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 21.886 2.116 37.722