Semi-variogram of the latent factors with CIFA


Visualise and summarise the resulst of CIFA models.

Load required libraries and data

rm(list = ls())
library(day2day)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)
library(gstat)

path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
path_src <- file.path(path_main, "src")

source(file.path(path_src, "ggtheme-publication.R"))

fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
    st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source 
#>   `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg' 
#>   using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS:  WGS 84
samples <- readRDS(file.path(path_modelled, "cifa-ipixuna-urban.rds"))
iter <- 1 * 10 ^ 5
thin <- 10
n <- nrow(fidata)

Analysis of latent abilities

Compute summaries for the latent abilities.

theta_summary <- as_tibble(samples, iter/thin/2, select = "theta") |>
  summary() |>
  tidyr::extract(Parameters, c("subject", "factor"),
                 "\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median <- theta_summary |>
    dplyr::select(subject, factor, `50%`) |>
    pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
    dplyr::select(-subject)

factorsdata <- fidata |>
    bind_cols(theta_median) |>
    dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
    tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
                        values_to = "factor_value")

Variogram of latent factors

Computing extent of coordinates.

extent <- factorsdata |>
  st_distance() |>
  max() |>
  as.numeric()

Computing variograms for each factor.

factorsdata[c("longitude_3857", "latitude_3857")] <-
    st_coordinates(st_transform(factorsdata, 3857))

# empirical variogram
gg_variogs <- factorsdata |>
  st_set_geometry(NULL) |>
  group_by(factor_label) |>
  tidyr::nest() |>
  mutate(variogs = map(data,
    ~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
                       # cutoff = extent * 0.7, width = extent / 200)
                       cutoff = extent * 0.5, width = extent / 40)
    )
  ) |>
  ungroup()

# prepare empirical variogram
gg_variog_emp <- dplyr::select(gg_variogs, factor_label, variogs) |>
  tidyr::unnest(col = variogs)

# variogram fitting
gg_variogs <- gg_variogs |>
  mutate(psill = 1, nugget = 0.2, range = c(50, 15, 60)) |>
  mutate(model = pmap(list(psill, range, nugget), function(...) vgm(model = "Exp", ...))) |>
  mutate(vgfit = map2(variogs, model, ~ fit.variogram(.x, fit.method = 1, model = .y)))

# prepare theoretical variogram
gg_variog_theo <- gg_variogs |>
    mutate(vgline = map(vgfit, ~ variogramLine(.x, maxdist = extent * 0.5, n = 1000))) |>
    dplyr::select(factor_label, vgline) |>
    tidyr::unnest(col = vgline)

Visualizing variograms per factor.

# Names for the strips
# factor_labels <- c("F1" = "(A)~First~factor",
#         "F2" = "(B)~Second~factor",
#         "F3" = "(C)~Third~factor")
factor_labels <- c(
    "F1" = "atop('(F1) Severe food insecurity', 'including children')",
    "F2" = "atop('(F2) Anxiety and relying on', 'social relations')",
    "F3" = "atop('(F3) Adults eating less and', 'going hungry')"
)
gg_variog_emp$factor_label <- factor_labels[gg_variog_emp$factor_label]
gg_variog_theo$factor_label <- factor_labels[gg_variog_theo$factor_label]
ggthemr::ggthemr_reset()
pal_aux <- ggthemr:::load_palette.character("fresh")
pal_aux$swatch[c(2, 3)] <- pal_aux$swatch[c(3, 2)]
ggthemr::ggthemr(pal_aux, layout = "clear")

gg <- gg_variog_emp |>
  ggplot(aes(dist, gamma)) +
    geom_point(aes(size = np, alpha = np)) +
    geom_line(aes(dist, gamma, color = "Exponential model"), data = gg_variog_theo) +
    expand_limits(y = 0, x = 0) +
    scale_x_continuous(limits = c(0, extent * 0.5)) +
    scale_y_continuous(limits = c(0.5, 1.2)) +
    scale_size_continuous(range = c(1, 2.5), breaks = seq(250, 1250, 250)) +
    facet_wrap(~factor_label, ncol = 3, labeller = label_parsed) +
    labs(y = expression(widetilde(gamma)(u):Semi-variogram),
    x = expression(u:Distance~"in"~meters), size = "Number of pairs:", alpha = "Number of pairs:", color = "") +
     guides(color= guide_legend(), size=guide_legend()) +
    # labs(x = "distance", y = "semi-variogram") +
    theme(legend.position = "bottom") +
    theme_Publication()
#> Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
#> ℹ Please use the `linewidth` argument instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
gg
#> Warning: The `scale_name` argument of `discrete_scale()` is deprecated as of ggplot2 3.5.0.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

Save graph of variogram.

ggsave(file.path(path_data, "figures", "cifa_variogs.pdf"), gg, width = 7, height = 3.7)

Compare theoretical variograms

We compare the fitting of theoretical variogram for the first factor.

vg = gg_variogs$variogs[[1]]
vgfit1 = fit.variogram(vg, fit.method = 1,
    model = vgm(psill = 1, model = "Exp", nugget = 0.2, range = 50))
vgfit2 = fit.variogram(vg, fit.method = 1,
    model = vgm(psill = 1, model = "Gau", nugget = 0.2, range = 50))
#> Warning in fit.variogram(vg, fit.method = 1, model = vgm(psill = 1, model = "Gau", : No convergence
#> after 200 iterations: try different initial values?
vgfit3 = fit.variogram(vg, fit.method = 1,
    model = vgm(psill = 1, model = "Sph", nugget = 0.2, range = 50))
#> Warning in fit.variogram(vg, fit.method = 1, model = vgm(psill = 1, model = "Sph", : singular model
#> in variogram fit
# squared error
sapply(list(vgfit1, vgfit2, vgfit3), function(x) attr(x, "SSErr"))
#> [1] 29.43300 29.50208 63.14785

Visualize model fitting:

plot(gamma ~ dist, data = vg, ylim = c(0.5, 1.1), pch = 19, cex = sqrt(vg$np / 1000),
    xlab = "distance", ylab = "semi-variogram")
lines(variogramLine(vgfit1, maxdist = extent * 0.5, n = 1000), col = 1, lwd = 3)
lines(variogramLine(vgfit2, maxdist = extent * 0.5, n = 1000), col = 2, lwd = 3)
lines(variogramLine(vgfit3, maxdist = extent * 0.5, n = 1000), col = 3, lwd = 3)
legend("bottomright", c("Exponential", "Gaussian", "Spherical"), col = 1:3, lwd = 3)

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>   4.328   0.447   4.462