EIFA Urban Ipixuna: latent factors
Analise the latent factors obtained from the exploratory item factor analysis (EIFA) with 3 dimensions for different number of dimensions for the urban households of Ipixuna.
Load required libraries and data
rm(list = ls())
library(day2day)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS: WGS 84
eifa_data <- readRDS(file.path(path_modelled, "eifa-ipixuna-urban.rds"))
Summary of select model
We apply varimax transformation to obtain independent factors.
ndim_selected <- 3
rotation <- "varimax"
model_selected <- eifa_data$model[[which(eifa_data$ndim == ndim_selected)]]
summary(model_selected, rotate = rotation, suppress = 0.3)
#>
#> Rotation: varimax
#>
#> Rotated factor loadings:
#>
#> F1 F2 F3 h2
#> item_1_A_worried_that_food_ends -0.322 -0.639 -0.480 0.742
#> item_2_A_run_out_of_food -0.486 -0.416 -0.529 0.689
#> item_3_A_ate_few_food_types -0.761 -0.395 -0.309 0.831
#> item_4_B_skipped_a_meal -0.764 NA -0.506 0.906
#> item_5_B_ate_less_than_required -0.630 NA -0.653 0.910
#> item_6_B_hungry_but_did_not_eat -0.683 -0.353 -0.634 0.993
#> item_7_B_at_most_one_meal_per_day -0.736 NA -0.457 0.817
#> item_8_C_ate_few_food_types -0.836 NA NA 0.825
#> item_9_C_ate_less_than_required -0.858 NA NA 0.880
#> item_10_C_decreased_food_quantity -0.875 -0.322 NA 0.926
#> item_11_C_skipped_a_meal -0.935 NA NA 0.985
#> item_12_C_hungry_but_did_not_eat -0.881 NA -0.353 0.949
#> item_13_C_at_most_one_meal_per_day -0.946 NA NA 0.994
#> item_14_D_food_just_with_farinha -0.568 NA -0.640 0.797
#> item_15_D_credit_for_eating -0.312 -0.753 NA 0.668
#> item_16_D_borrowed_food NA -0.892 -0.358 0.990
#> item_17_D_had_meals_at_neighbors -0.375 -0.552 -0.369 0.581
#> item_18_D_reduced_meat_or_fish -0.541 -0.326 -0.485 0.634
#>
#> Rotated SS loadings: 8.571 3.31 3.235
#>
#> Factor correlations:
#>
#> F1 F2 F3
#> F1 1 0 0
#> F2 0 1 0
#> F3 0 0 1
Distribution of latent factors
Computing the household scores of the latent factors.
factorsdata <- fidata |>
bind_cols(as_tibble(fscores(model_selected, rotate = rotation))) |>
dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
values_to = "factor_value")
Global distribution of factors
Normalised histogram per factor.
factorsdata |>
ggplot(aes(x = factor_value, y = after_stat(density))) +
geom_histogram(binwidth = 0.2) +
facet_wrap(~ factor_label) +
labs(x = "latent factor")
Distribution of factors by children-type households
Normalised histogram per factor by children-type household.
factorsdata |>
ggplot(aes(x = factor_value, y = after_stat(density))) +
geom_histogram(aes(fill = any_children)) +
# facet_wrap(~ any_children + factor_label, scales = "free") +
facet_grid(any_children ~ factor_label, scales = "free") +
theme(legend.position = "none") +
labs(x = "latent factor")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Spatial analysis of latent factors
Spatial distribution
factorsdata |>
st_jitter(factor = 0.02) |>
ggplot() +
geom_sf(aes(col = factor_value), size = 1) +
facet_wrap(~ factor_label) +
scale_color_distiller(palette = "RdBu", direction = 1) +
theme_bw(base_size = 7) +
theme(panel.background = element_rect(fill = 'gray10'),
panel.grid.major = element_line(color = 'gray30'),
panel.grid.minor = element_line(color = 'green', linewidth = 2),
legend.position = "bottom") +
labs(col = "")
Association of latent factors and coordinates
factorsdata[c("longitude_3857", "latitude_3857")] <-
st_coordinates(st_transform(factorsdata, 3857))
factorsdata |>
ggplot(aes(longitude_3857, factor_value)) +
geom_jitter() +
facet_wrap(~ factor_label) +
theme_gray(base_size = 7)
factorsdata |>
ggplot(aes(latitude_3857, factor_value)) +
geom_point() +
facet_wrap(~ factor_label) +
theme_gray(base_size = 7)
Variogram of latent factors
Computing extent of coordinates.
extent <- factorsdata |>
st_distance() |>
max() |>
as.numeric()
Computing variograms for each factor.
gg_variog <- factorsdata |>
st_set_geometry(NULL) |>
group_by(factor_label) |>
tidyr::nest() |>
mutate(variogs = map(data,
~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
cutoff = extent * 0.7, width = extent / 200)
)
) |>
dplyr::select(-data) |>
tidyr::unnest(col = variogs)
Visualizing variograms per factor.
gg_variog |>
ggplot(aes(dist, gamma)) +
# geom_point(aes(size = np)) +
geom_point(aes(alpha = np)) +
geom_smooth() +
expand_limits(y = 0, x = 0) +
facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
labs(x = "distance", y = "semi-variogram") +
theme_gray(base_size = 7)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 3.768 0.642 4.001