EIFA Urban Ipixuna: latent factors


Analise the latent factors obtained from the exploratory item factor analysis (EIFA) with 3 dimensions for different number of dimensions for the urban households of Ipixuna.

Load required libraries and data

rm(list = ls())
library(day2day)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(mirt)
#> Loading required package: stats4
#> Loading required package: lattice
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(purrr)
library(ggplot2)

path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")

fidata <- file.path(path_processed, "fi-items-ipixuna-urban.gpkg") |>
    st_read(as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source 
#>   `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg' 
#>   using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS:  WGS 84
eifa_data <- readRDS(file.path(path_modelled, "eifa-ipixuna-urban.rds"))

Summary of select model

We apply varimax transformation to obtain independent factors.

ndim_selected <- 3
rotation <- "varimax"
model_selected <- eifa_data$model[[which(eifa_data$ndim == ndim_selected)]]
summary(model_selected, rotate = rotation, suppress = 0.3)
#> 
#> Rotation:  varimax 
#> 
#> Rotated factor loadings: 
#> 
#>                                        F1     F2     F3    h2
#> item_1_A_worried_that_food_ends    -0.322 -0.639 -0.480 0.742
#> item_2_A_run_out_of_food           -0.486 -0.416 -0.529 0.689
#> item_3_A_ate_few_food_types        -0.761 -0.395 -0.309 0.831
#> item_4_B_skipped_a_meal            -0.764     NA -0.506 0.906
#> item_5_B_ate_less_than_required    -0.630     NA -0.653 0.910
#> item_6_B_hungry_but_did_not_eat    -0.683 -0.353 -0.634 0.993
#> item_7_B_at_most_one_meal_per_day  -0.736     NA -0.457 0.817
#> item_8_C_ate_few_food_types        -0.836     NA     NA 0.825
#> item_9_C_ate_less_than_required    -0.858     NA     NA 0.880
#> item_10_C_decreased_food_quantity  -0.875 -0.322     NA 0.926
#> item_11_C_skipped_a_meal           -0.935     NA     NA 0.985
#> item_12_C_hungry_but_did_not_eat   -0.881     NA -0.353 0.949
#> item_13_C_at_most_one_meal_per_day -0.946     NA     NA 0.994
#> item_14_D_food_just_with_farinha   -0.568     NA -0.640 0.797
#> item_15_D_credit_for_eating        -0.312 -0.753     NA 0.668
#> item_16_D_borrowed_food                NA -0.892 -0.358 0.990
#> item_17_D_had_meals_at_neighbors   -0.375 -0.552 -0.369 0.581
#> item_18_D_reduced_meat_or_fish     -0.541 -0.326 -0.485 0.634
#> 
#> Rotated SS loadings:  8.571 3.31 3.235 
#> 
#> Factor correlations: 
#> 
#>    F1 F2 F3
#> F1  1  0  0
#> F2  0  1  0
#> F3  0  0  1

Distribution of latent factors

Computing the household scores of the latent factors.

factorsdata <- fidata |>
    bind_cols(as_tibble(fscores(model_selected, rotate = rotation))) |>
    dplyr::select(registro, any_children, matches("^F[1-3]$")) |>
    tidyr::pivot_longer(matches("^F[1-3]$"), names_to = "factor_label",
                        values_to = "factor_value")

Global distribution of factors

Normalised histogram per factor.

factorsdata |>
    ggplot(aes(x = factor_value, y = after_stat(density))) +
        geom_histogram(binwidth = 0.2) +
        facet_wrap(~ factor_label) +
        labs(x = "latent factor")

Distribution of factors by children-type households

Normalised histogram per factor by children-type household.

factorsdata |>
    ggplot(aes(x = factor_value, y = after_stat(density))) +
        geom_histogram(aes(fill = any_children)) +
        # facet_wrap(~ any_children + factor_label, scales = "free") +
        facet_grid(any_children ~ factor_label, scales = "free") +
        theme(legend.position = "none") +
        labs(x = "latent factor")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Spatial analysis of latent factors

Spatial distribution

factorsdata |>
    st_jitter(factor = 0.02) |>
    ggplot() +
        geom_sf(aes(col = factor_value), size = 1) +
        facet_wrap(~ factor_label) +
        scale_color_distiller(palette = "RdBu", direction = 1) +
        theme_bw(base_size = 7) +
        theme(panel.background = element_rect(fill = 'gray10'),
              panel.grid.major = element_line(color = 'gray30'),
              panel.grid.minor = element_line(color = 'green', linewidth = 2),
              legend.position = "bottom") +
        labs(col = "")

Association of latent factors and coordinates

factorsdata[c("longitude_3857", "latitude_3857")] <-
    st_coordinates(st_transform(factorsdata, 3857))

factorsdata |>
    ggplot(aes(longitude_3857, factor_value)) +
        geom_jitter() +
        facet_wrap(~ factor_label) +
        theme_gray(base_size = 7)

factorsdata |>
    ggplot(aes(latitude_3857, factor_value)) +
        geom_point() +
        facet_wrap(~ factor_label) +
        theme_gray(base_size = 7)

Variogram of latent factors

Computing extent of coordinates.

extent <- factorsdata |>
  st_distance() |>
  max() |>
  as.numeric()

Computing variograms for each factor.

gg_variog <- factorsdata |>
  st_set_geometry(NULL) |>
  group_by(factor_label) |>
  tidyr::nest() |>
  mutate(variogs = map(data,
    ~ gstat::variogram(factor_value ~ 1, ~ longitude_3857 + latitude_3857, .,
                       cutoff = extent * 0.7, width = extent / 200)
    )
  ) |>
  dplyr::select(-data) |>
  tidyr::unnest(col = variogs)

Visualizing variograms per factor.

gg_variog |>
  ggplot(aes(dist, gamma)) +
    # geom_point(aes(size = np)) +
    geom_point(aes(alpha = np)) +
    geom_smooth() +
    expand_limits(y = 0, x = 0) +
    facet_wrap(~factor_label, ncol = 3, scales = "free_y") +
    labs(x = "distance", y = "semi-variogram") +
    theme_gray(base_size = 7)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>   3.768   0.642   4.001