Urban Ipixuna: Correlation analysis of items


Load required libraries and data

rm(list = ls())
library(day2day)
library(ggplot2)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(corrplot)
#> corrplot 0.92 loaded
library(RColorBrewer)
library(patchwork)
library(purrr)

path_proj <- git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")

list.files(file.path(path_proj, "src"), full.names = TRUE) |>
    purrr::walk(source)

fidata <- st_read(file.path(path_processed, "fi-items-ipixuna-urban.gpkg"), as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source 
#>   `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg' 
#>   using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS:  WGS 84

Prepare correlation data

We compute the correlation matrix for:

  • all observations
  • only dry season
  • only wet season
items_cor <- fidata |>
    st_set_geometry(NULL) |>
    dplyr::select(season, matches("^item_[0-9]+_[A-D]")) |>
    rename_with(clean_item_names, matches("^item_[0-9]+_[A-D]")) %>%
    bind_rows(., .) |>
    within(season[(nrow(fidata)+1):(2*nrow(fidata))] <- "all") |>
    mutate(season = factor(season, levels = c("all", "dry", "wet"),
                           labels = c("All", "Dry", "Wet"))) |>
    group_by(season) |>
    tidyr::nest() |>
    arrange(season) |>
    mutate(cor_matrix = map(data, ~ cor(., use = "pairwise.complete.obs")))
items_cor
#> # A tibble: 3 × 3
#> # Groups:   season [3]
#>   season data                cor_matrix     
#>   <fct>  <list>              <list>         
#> 1 All    <tibble [200 × 18]> <dbl [18 × 18]>
#> 2 Dry    <tibble [100 × 18]> <dbl [18 × 18]>
#> 3 Wet    <tibble [100 × 18]> <dbl [18 × 18]>

Visualize correlation

mycorr <- function(x) {
    pal <- colorRampPalette(brewer.pal(11, "RdBu"))
    corrplot(x, tl.cex = 0.5, tl.col = "black", tl.srt = 70,
             col.lim = c(0, 1), col = rep(pal(100), 2))
}

cor_basic <- items_cor |>
    mutate(cor_plot = map(cor_matrix, ~ wrap_elements(~ mycorr(.)))) |>
    mutate(cor_plot = map2(cor_plot, season, ~ .x + ggtitle(paste("Season:", .y))))
reduce(cor_basic$cor_plot, `+`)

Items clustering

mycorrclust <- function(x, ndim) {
    pal <- colorRampPalette(brewer.pal(11, "RdBu"))
    corrplot(x, order = "hclust", addrect = ndim,
             tl.cex = 0.5, tl.col = "black", tl.srt = 70,
             col.lim = c(0, 1), col = rep(pal(100), 2))
}
custom_plot <- function (ndim) {
    cat("\n\n")
    cat(paste0("### Number of clusters: ", ndim , "\n\n"))
    items_cor |>
    mutate(cor_plot = map(cor_matrix, ~ wrap_elements(~ mycorrclust(., ndim)))) |>
    mutate(cor_plot = map2(cor_plot, season, ~ .x + ggtitle(paste("Season:", .y)))) |>
    pull(cor_plot) |>
    reduce(`+`) |>
    print()
}

walk(2:6, ~ custom_plot(.))

Number of clusters: 2

Number of clusters: 3

Number of clusters: 4

Number of clusters: 5

Number of clusters: 6

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>   7.046   0.381   7.131