Urban Ipixuna: Correlation analysis of items
Load required libraries and data
rm(list = ls())
library(day2day)
library(ggplot2)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(corrplot)
#> corrplot 0.92 loaded
library(RColorBrewer)
library(patchwork)
library(purrr)
path_proj <- git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")
list.files(file.path(path_proj, "src"), full.names = TRUE) |>
purrr::walk(source)
fidata <- st_read(file.path(path_processed, "fi-items-ipixuna-urban.gpkg"), as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS: WGS 84
Prepare correlation data
We compute the correlation matrix for:
- all observations
- only dry season
- only wet season
items_cor <- fidata |>
st_set_geometry(NULL) |>
dplyr::select(season, matches("^item_[0-9]+_[A-D]")) |>
rename_with(clean_item_names, matches("^item_[0-9]+_[A-D]")) %>%
bind_rows(., .) |>
within(season[(nrow(fidata)+1):(2*nrow(fidata))] <- "all") |>
mutate(season = factor(season, levels = c("all", "dry", "wet"),
labels = c("All", "Dry", "Wet"))) |>
group_by(season) |>
tidyr::nest() |>
arrange(season) |>
mutate(cor_matrix = map(data, ~ cor(., use = "pairwise.complete.obs")))
items_cor
#> # A tibble: 3 × 3
#> # Groups: season [3]
#> season data cor_matrix
#> <fct> <list> <list>
#> 1 All <tibble [200 × 18]> <dbl [18 × 18]>
#> 2 Dry <tibble [100 × 18]> <dbl [18 × 18]>
#> 3 Wet <tibble [100 × 18]> <dbl [18 × 18]>
Visualize correlation
mycorr <- function(x) {
pal <- colorRampPalette(brewer.pal(11, "RdBu"))
corrplot(x, tl.cex = 0.5, tl.col = "black", tl.srt = 70,
col.lim = c(0, 1), col = rep(pal(100), 2))
}
cor_basic <- items_cor |>
mutate(cor_plot = map(cor_matrix, ~ wrap_elements(~ mycorr(.)))) |>
mutate(cor_plot = map2(cor_plot, season, ~ .x + ggtitle(paste("Season:", .y))))
reduce(cor_basic$cor_plot, `+`)
Items clustering
mycorrclust <- function(x, ndim) {
pal <- colorRampPalette(brewer.pal(11, "RdBu"))
corrplot(x, order = "hclust", addrect = ndim,
tl.cex = 0.5, tl.col = "black", tl.srt = 70,
col.lim = c(0, 1), col = rep(pal(100), 2))
}
custom_plot <- function (ndim) {
cat("\n\n")
cat(paste0("### Number of clusters: ", ndim , "\n\n"))
items_cor |>
mutate(cor_plot = map(cor_matrix, ~ wrap_elements(~ mycorrclust(., ndim)))) |>
mutate(cor_plot = map2(cor_plot, season, ~ .x + ggtitle(paste("Season:", .y)))) |>
pull(cor_plot) |>
reduce(`+`) |>
print()
}
walk(2:6, ~ custom_plot(.))
Number of clusters: 2
Number of clusters: 3
Number of clusters: 4
Number of clusters: 5
Number of clusters: 6
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 7.046 0.381 7.131