Semi-variogram of the latent factors with CIFA
Visualise and summarise the resulst of CIFA models.
Load required libraries and data
rm(list = ls())
library(day2day)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(spifa)
library(tidyr)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)
library(gstat)
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
path_src <- file.path(path_main, "src")
source(file.path(path_src, "ggtheme-publication.R"))
fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS: WGS 84
samples <- readRDS(file.path(path_modelled, "cifa-ipixuna-urban.rds"))
iter <- 1 * 10 ^ 5
thin <- 10
n <- nrow(fidata)
Analysis of latent abilities
Compute summaries for the latent abilities.
theta_summary <- as_tibble(samples, iter/thin/2, select = "theta") |>
summary() |>
tidyr::extract(Parameters, c("subject", "factor"),
"\\[([[:digit:]]+),([[:digit:]]+)\\]", convert = TRUE)
theta_median <- theta_summary |>
dplyr::select(subject, factor, `50%`) |>
pivot_wider(names_from = factor, values_from = `50%`, names_prefix = "F") |>
dplyr::select(-subject)
factorsdata <- fidata |>
bind_cols(theta_median) |>
dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
values_to = "factor_value")
Variogram of latent factors
Computing extent of coordinates.
extent <- factorsdata |>
st_distance() |>
max() |>
as.numeric()
Computing variograms for each factor.
factorsdata[c("longitude_3857", "latitude_3857")] <-
st_coordinates(st_transform(factorsdata, 3857))
# empirical variogram
gg_variogs <- factorsdata |>
st_set_geometry(NULL) |>
group_by(factor_label) |>
tidyr::nest() |>
mutate(variogs = map(data,
~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
# cutoff = extent * 0.7, width = extent / 200)
cutoff = extent * 0.5, width = extent / 40)
)
) |>
ungroup()
# prepare empirical variogram
gg_variog_emp <- dplyr::select(gg_variogs, factor_label, variogs) |>
tidyr::unnest(col = variogs)
# variogram fitting
gg_variogs <- gg_variogs |>
mutate(psill = 1, nugget = 0.2, range = c(50, 15, 60)) |>
mutate(model = pmap(list(psill, range, nugget), function(...) vgm(model = "Exp", ...))) |>
mutate(vgfit = map2(variogs, model, ~ fit.variogram(.x, fit.method = 1, model = .y)))
# prepare theoretical variogram
gg_variog_theo <- gg_variogs |>
mutate(vgline = map(vgfit, ~ variogramLine(.x, maxdist = extent * 0.5, n = 1000))) |>
dplyr::select(factor_label, vgline) |>
tidyr::unnest(col = vgline)
Visualizing variograms per factor.
# Names for the strips
# factor_labels <- c("F1" = "(A)~First~factor",
# "F2" = "(B)~Second~factor",
# "F3" = "(C)~Third~factor")
factor_labels <- c(
"F1" = "atop('(F1) Severe food insecurity', 'including children')",
"F2" = "atop('(F2) Anxiety and relying on', 'social relations')",
"F3" = "atop('(F3) Adults eating less and', 'going hungry')"
)
gg_variog_emp$factor_label <- factor_labels[gg_variog_emp$factor_label]
gg_variog_theo$factor_label <- factor_labels[gg_variog_theo$factor_label]
ggthemr::ggthemr_reset()
pal_aux <- ggthemr:::load_palette.character("fresh")
pal_aux$swatch[c(2, 3)] <- pal_aux$swatch[c(3, 2)]
ggthemr::ggthemr(pal_aux, layout = "clear")
gg <- gg_variog_emp |>
ggplot(aes(dist, gamma)) +
geom_point(aes(size = np, alpha = np)) +
geom_line(aes(dist, gamma, color = "Exponential model"), data = gg_variog_theo) +
expand_limits(y = 0, x = 0) +
scale_x_continuous(limits = c(0, extent * 0.5)) +
scale_y_continuous(limits = c(0.5, 1.2)) +
scale_size_continuous(range = c(1, 2.5), breaks = seq(250, 1250, 250)) +
facet_wrap(~factor_label, ncol = 3, labeller = label_parsed) +
labs(y = expression(widetilde(gamma)(u):Semi-variogram),
x = expression(u:Distance~"in"~meters), size = "Number of pairs:", alpha = "Number of pairs:", color = "") +
guides(color= guide_legend(), size=guide_legend()) +
# labs(x = "distance", y = "semi-variogram") +
theme(legend.position = "bottom") +
theme_Publication()
#> Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
#> ℹ Please use the `linewidth` argument instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
gg
#> Warning: The `scale_name` argument of `discrete_scale()` is deprecated as of ggplot2 3.5.0.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Save graph of variogram.
ggsave(file.path(path_data, "figures", "cifa_variogs.pdf"), gg, width = 7, height = 3.7)
Compare theoretical variograms
We compare the fitting of theoretical variogram for the first factor.
vg = gg_variogs$variogs[[1]]
vgfit1 = fit.variogram(vg, fit.method = 1,
model = vgm(psill = 1, model = "Exp", nugget = 0.2, range = 50))
vgfit2 = fit.variogram(vg, fit.method = 1,
model = vgm(psill = 1, model = "Gau", nugget = 0.2, range = 50))
#> Warning in fit.variogram(vg, fit.method = 1, model = vgm(psill = 1, model = "Gau", : No convergence
#> after 200 iterations: try different initial values?
vgfit3 = fit.variogram(vg, fit.method = 1,
model = vgm(psill = 1, model = "Sph", nugget = 0.2, range = 50))
#> Warning in fit.variogram(vg, fit.method = 1, model = vgm(psill = 1, model = "Sph", : singular model
#> in variogram fit
# squared error
sapply(list(vgfit1, vgfit2, vgfit3), function(x) attr(x, "SSErr"))
#> [1] 29.43300 29.50208 63.14785
Visualize model fitting:
plot(gamma ~ dist, data = vg, ylim = c(0.5, 1.1), pch = 19, cex = sqrt(vg$np / 1000),
xlab = "distance", ylab = "semi-variogram")
lines(variogramLine(vgfit1, maxdist = extent * 0.5, n = 1000), col = 1, lwd = 3)
lines(variogramLine(vgfit2, maxdist = extent * 0.5, n = 1000), col = 2, lwd = 3)
lines(variogramLine(vgfit3, maxdist = extent * 0.5, n = 1000), col = 3, lwd = 3)
legend("bottomright", c("Exponential", "Gaussian", "Spherical"), col = 1:3, lwd = 3)
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 4.328 0.447 4.462