Urban Ipixuna: Descriptive analysis of items


Load required libraries and data

rm(list = ls())
library(day2day)
library(ggplot2)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_processed <- file.path(path_data, "processed")

fidata <- st_read(file.path(path_processed, "fi-items-ipixuna-urban.gpkg"), as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source 
#>   `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg' 
#>   using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS:  WGS 84

Food insecurity score

First, we show the empirical distribution of the food insecurity score for all the households. The score between household with an without children can not be compared given that some items can be answered only for families with children.

# Functions to change plot order of bars, change levels in underlying factor
as_count <- function(x) factor(x, levels = 0:max(x, na.rm = TRUE))
ggplot(fidata, aes(as_count(fi_score))) +
    geom_bar() +
    labs(x = "score", title = "Absolute frequency of food insecurity score") +
    facet_wrap(~ any_children, ncol = 1)

Food insecurity proportion

Absolute frequency of food insecurity proportion by group

It is more important to visualize the proportion of endorsed items. The proportion is comparable in households with and without children because the proportion can take a value between 0 and 1.

ggplot(fidata, aes(fi_proportion)) +
    geom_histogram(binwidth = 0.05, color = "white", center = 0.025) +
    labs(x = "proportion", title = "Absolute frequency of food insecurity proportion") +
    facet_wrap(~ any_children, ncol = 1)

Relative frequency of food insecurity proportion by group

Compare the distribution of proportion of endorsed items with and without children. The empirical mean proportion of households with children is higher than households without children.

fi_proportion_by_children <- fidata |>
    st_set_geometry(NULL) |>
    group_by(any_children) |>
    summarise(mean = mean(fi_proportion))

ggplot(fidata, aes(fi_proportion)) +
    geom_histogram(aes(y = stat(width * density)), binwidth = 0.05,
                   color = "white", center = 0.025) +
    geom_vline(aes(xintercept = mean), fi_proportion_by_children, col = 2) +
    labs(x = "proportion", title = "Relative frequency of food insecurity proportion",
         y = "relative frequency") +
    facet_wrap(~ any_children, ncol = 1)
#> Warning: `stat(width * density)` was deprecated in ggplot2 3.4.0.
#> ℹ Please use `after_stat(width * density)` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

Relative frequency of food insecurity proportion by season

Compare the distribution of proportion of endorsed items by season. The proportion mean per season are close; however, it can be seen that there is a high frequency of households high low proportion of endorsed items (low food insecurity).

fi_proportion_by_season <- fidata |>
    st_set_geometry(NULL) |>
    group_by(season) |>
    summarise(mean = mean(fi_proportion))

ggplot(fidata, aes(fi_proportion)) +
    geom_histogram(aes(y = stat(width * density)), binwidth = 0.05,
                   color = "white", center = 0.025) +
    geom_vline(aes(xintercept = mean), fi_proportion_by_season, col = 2) +
    labs(x = "proportion", title = "Relative frequency of food insecurity proportion",
         y = "relative frequency") +
    facet_wrap(~ season, ncol = 1)

Relative frequency of food insecurity proportion by season and children

Compare the distribution of proportion of endorsed items by season and children.

fi_proportion_by_season_children <- fidata |>
    st_set_geometry(NULL) |>
    group_by(season, any_children) |>
    summarise(mean = mean(fi_proportion)) |>
    ungroup()
#> `summarise()` has grouped output by 'season'. You can override using the `.groups` argument.
ggplot(fidata, aes(fi_proportion)) +
    geom_histogram(aes(y = stat(width * density)), binwidth = 0.05,
                   color = "white", center = 0.025) +
    geom_vline(aes(xintercept = mean), fi_proportion_by_season_children, col = 2) +
    labs(x = "proportion", title = "Relative frequency of food insecurity proportion",
         y = "relative frequency") +
    facet_wrap(~ season + any_children, ncol = 1)

Summary of Items by season

Descriptive analysis of items by season.

fi_items <- fidata |>
    st_set_geometry(NULL) |>
    dplyr::select(matches("^item"), registro, season) %>%
    bind_rows(., .) |>
    within(season[(nrow(fidata)+1):(2*nrow(fidata))] <- "all") |>
    tidyr::pivot_longer(-c(registro, season),
                        names_to = c("item", "section", "question"),
                        names_pattern = "item_([[:digit:]]+)_([[:alpha:]]+)_(.+)",
                        values_to = "varvalue") |>
    mutate(item = as.integer(item), question = gsub("_", " ", question))


fi_items |>
    group_by(season, item, section, question) |>
    summarise(
        n_na = sum(is.na(varvalue)),
        mean = mean(varvalue, na.rm = TRUE),
        median = median(varvalue, na.rm = TRUE),
        variance = var(varvalue, na.rm = TRUE)
    ) |>
    ungroup() |>
    tidyr::pivot_longer(n_na:variance, names_to = "vars", values_to = "value") |>
    mutate(vars = paste0(vars, "-", season)) |>
    dplyr::select(- season) |>
    tidyr::pivot_wider(names_from = vars, values_from = value) |>
    relocate(matches("^n_na"), matches("^mean"), matches("^median"), matches("^variance"),
             .after = question) |>
    knitr::kable()
#> `summarise()` has grouped output by 'season', 'item', 'section'. You can override using the
#> `.groups` argument.
item section question n_na-all n_na-dry n_na-wet mean-all mean-dry mean-wet median-all median-dry median-wet variance-all variance-dry variance-wet
1 A worried that food ends 0 0 0 0.5650000 0.5200000 0.6100000 1 1 1 0.2470101 0.2521212 0.2403030
2 A run out of food 0 0 0 0.5200000 0.5800000 0.4600000 1 1 0 0.2508543 0.2460606 0.2509091
3 A ate few food types 0 0 0 0.6400000 0.7100000 0.5700000 1 1 1 0.2315578 0.2079798 0.2475758
4 B skipped a meal 0 0 0 0.3050000 0.2800000 0.3300000 0 0 0 0.2130402 0.2036364 0.2233333
5 B ate less than required 0 0 0 0.4100000 0.4100000 0.4100000 0 0 0 0.2431156 0.2443434 0.2443434
6 B hungry but did not eat 0 0 0 0.2400000 0.2400000 0.2400000 0 0 0 0.1833166 0.1842424 0.1842424
7 B at most one meal per day 0 0 0 0.2600000 0.2500000 0.2700000 0 0 0 0.1933668 0.1893939 0.1990909
8 C ate few food types 25 9 16 0.4857143 0.5604396 0.4047619 0 1 0 0.2512315 0.2490842 0.2438325
9 C ate less than required 25 9 16 0.3142857 0.3296703 0.2976190 0 0 0 0.2167488 0.2234432 0.2115605
10 C decreased food quantity 25 9 16 0.3600000 0.3956044 0.3214286 0 0 0 0.2317241 0.2417582 0.2207401
11 C skipped a meal 25 9 16 0.2285714 0.1978022 0.2619048 0 0 0 0.1773399 0.1604396 0.1956397
12 C hungry but did not eat 25 9 16 0.2000000 0.1538462 0.2500000 0 0 0 0.1609195 0.1316239 0.1897590
13 C at most one meal per day 25 9 16 0.1771429 0.1648352 0.1904762 0 0 0 0.1466010 0.1391941 0.1560528
14 D food just with farinha 0 0 0 0.1650000 0.1700000 0.1600000 0 0 0 0.1384673 0.1425253 0.1357576
15 D credit for eating 0 0 0 0.6750000 0.7100000 0.6400000 1 1 1 0.2204774 0.2079798 0.2327273
16 D borrowed food 0 0 0 0.1400000 0.2100000 0.0700000 0 0 0 0.1210050 0.1675758 0.0657576
17 D had meals at neighbors 0 0 0 0.1750000 0.2300000 0.1200000 0 0 0 0.1451005 0.1788889 0.1066667
18 D reduced meat or fish 0 0 0 0.5350000 0.5500000 0.5200000 1 1 1 0.2500251 0.2500000 0.2521212
fi_items |>
    group_by(season, item, section, question) |>
    filter(season != "all") |>
    summarise(prop = mean(varvalue, na.rm = TRUE)) |>
    mutate(item = factor(paste(item, section, sep = "-"))) |>
    ggplot() +
        geom_col(aes(item, prop, fill = season), position = position_dodge()) +
        theme_bw()
#> `summarise()` has grouped output by 'season', 'item', 'section'. You can override using the
#> `.groups` argument.

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>   2.725   0.394   2.806