Urban Ipixuna: Descriptive analysis of items
Load required libraries and data
rm(list = ls())
library(day2day)
library(ggplot2)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_processed <- file.path(path_data, "processed")
fidata <- st_read(file.path(path_processed, "fi-items-ipixuna-urban.gpkg"), as_tibble = TRUE)
#> Reading layer `fi-items-ipixuna-urban' from data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items-ipixuna-urban.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 200 features and 36 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -71.70038 ymin: -7.06058 xmax: -71.68109 ymax: -7.03724
#> Geodetic CRS: WGS 84
Food insecurity score
First, we show the empirical distribution of the food insecurity score for all the households. The score between household with an without children can not be compared given that some items can be answered only for families with children.
# Functions to change plot order of bars, change levels in underlying factor
as_count <- function(x) factor(x, levels = 0:max(x, na.rm = TRUE))
ggplot(fidata, aes(as_count(fi_score))) +
geom_bar() +
labs(x = "score", title = "Absolute frequency of food insecurity score") +
facet_wrap(~ any_children, ncol = 1)
Food insecurity proportion
Absolute frequency of food insecurity proportion by group
It is more important to visualize the proportion of endorsed items. The proportion is comparable in households with and without children because the proportion can take a value between 0 and 1.
ggplot(fidata, aes(fi_proportion)) +
geom_histogram(binwidth = 0.05, color = "white", center = 0.025) +
labs(x = "proportion", title = "Absolute frequency of food insecurity proportion") +
facet_wrap(~ any_children, ncol = 1)
Relative frequency of food insecurity proportion by group
Compare the distribution of proportion of endorsed items with and without children. The empirical mean proportion of households with children is higher than households without children.
fi_proportion_by_children <- fidata |>
st_set_geometry(NULL) |>
group_by(any_children) |>
summarise(mean = mean(fi_proportion))
ggplot(fidata, aes(fi_proportion)) +
geom_histogram(aes(y = stat(width * density)), binwidth = 0.05,
color = "white", center = 0.025) +
geom_vline(aes(xintercept = mean), fi_proportion_by_children, col = 2) +
labs(x = "proportion", title = "Relative frequency of food insecurity proportion",
y = "relative frequency") +
facet_wrap(~ any_children, ncol = 1)
#> Warning: `stat(width * density)` was deprecated in ggplot2 3.4.0.
#> ℹ Please use `after_stat(width * density)` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Relative frequency of food insecurity proportion by season
Compare the distribution of proportion of endorsed items by season. The proportion mean per season are close; however, it can be seen that there is a high frequency of households high low proportion of endorsed items (low food insecurity).
fi_proportion_by_season <- fidata |>
st_set_geometry(NULL) |>
group_by(season) |>
summarise(mean = mean(fi_proportion))
ggplot(fidata, aes(fi_proportion)) +
geom_histogram(aes(y = stat(width * density)), binwidth = 0.05,
color = "white", center = 0.025) +
geom_vline(aes(xintercept = mean), fi_proportion_by_season, col = 2) +
labs(x = "proportion", title = "Relative frequency of food insecurity proportion",
y = "relative frequency") +
facet_wrap(~ season, ncol = 1)
Relative frequency of food insecurity proportion by season and children
Compare the distribution of proportion of endorsed items by season and children.
fi_proportion_by_season_children <- fidata |>
st_set_geometry(NULL) |>
group_by(season, any_children) |>
summarise(mean = mean(fi_proportion)) |>
ungroup()
#> `summarise()` has grouped output by 'season'. You can override using the `.groups` argument.
ggplot(fidata, aes(fi_proportion)) +
geom_histogram(aes(y = stat(width * density)), binwidth = 0.05,
color = "white", center = 0.025) +
geom_vline(aes(xintercept = mean), fi_proportion_by_season_children, col = 2) +
labs(x = "proportion", title = "Relative frequency of food insecurity proportion",
y = "relative frequency") +
facet_wrap(~ season + any_children, ncol = 1)
Summary of Items by season
Descriptive analysis of items by season.
fi_items <- fidata |>
st_set_geometry(NULL) |>
dplyr::select(matches("^item"), registro, season) %>%
bind_rows(., .) |>
within(season[(nrow(fidata)+1):(2*nrow(fidata))] <- "all") |>
tidyr::pivot_longer(-c(registro, season),
names_to = c("item", "section", "question"),
names_pattern = "item_([[:digit:]]+)_([[:alpha:]]+)_(.+)",
values_to = "varvalue") |>
mutate(item = as.integer(item), question = gsub("_", " ", question))
fi_items |>
group_by(season, item, section, question) |>
summarise(
n_na = sum(is.na(varvalue)),
mean = mean(varvalue, na.rm = TRUE),
median = median(varvalue, na.rm = TRUE),
variance = var(varvalue, na.rm = TRUE)
) |>
ungroup() |>
tidyr::pivot_longer(n_na:variance, names_to = "vars", values_to = "value") |>
mutate(vars = paste0(vars, "-", season)) |>
dplyr::select(- season) |>
tidyr::pivot_wider(names_from = vars, values_from = value) |>
relocate(matches("^n_na"), matches("^mean"), matches("^median"), matches("^variance"),
.after = question) |>
knitr::kable()
#> `summarise()` has grouped output by 'season', 'item', 'section'. You can override using the
#> `.groups` argument.
item | section | question | n_na-all | n_na-dry | n_na-wet | mean-all | mean-dry | mean-wet | median-all | median-dry | median-wet | variance-all | variance-dry | variance-wet |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | A | worried that food ends | 0 | 0 | 0 | 0.5650000 | 0.5200000 | 0.6100000 | 1 | 1 | 1 | 0.2470101 | 0.2521212 | 0.2403030 |
2 | A | run out of food | 0 | 0 | 0 | 0.5200000 | 0.5800000 | 0.4600000 | 1 | 1 | 0 | 0.2508543 | 0.2460606 | 0.2509091 |
3 | A | ate few food types | 0 | 0 | 0 | 0.6400000 | 0.7100000 | 0.5700000 | 1 | 1 | 1 | 0.2315578 | 0.2079798 | 0.2475758 |
4 | B | skipped a meal | 0 | 0 | 0 | 0.3050000 | 0.2800000 | 0.3300000 | 0 | 0 | 0 | 0.2130402 | 0.2036364 | 0.2233333 |
5 | B | ate less than required | 0 | 0 | 0 | 0.4100000 | 0.4100000 | 0.4100000 | 0 | 0 | 0 | 0.2431156 | 0.2443434 | 0.2443434 |
6 | B | hungry but did not eat | 0 | 0 | 0 | 0.2400000 | 0.2400000 | 0.2400000 | 0 | 0 | 0 | 0.1833166 | 0.1842424 | 0.1842424 |
7 | B | at most one meal per day | 0 | 0 | 0 | 0.2600000 | 0.2500000 | 0.2700000 | 0 | 0 | 0 | 0.1933668 | 0.1893939 | 0.1990909 |
8 | C | ate few food types | 25 | 9 | 16 | 0.4857143 | 0.5604396 | 0.4047619 | 0 | 1 | 0 | 0.2512315 | 0.2490842 | 0.2438325 |
9 | C | ate less than required | 25 | 9 | 16 | 0.3142857 | 0.3296703 | 0.2976190 | 0 | 0 | 0 | 0.2167488 | 0.2234432 | 0.2115605 |
10 | C | decreased food quantity | 25 | 9 | 16 | 0.3600000 | 0.3956044 | 0.3214286 | 0 | 0 | 0 | 0.2317241 | 0.2417582 | 0.2207401 |
11 | C | skipped a meal | 25 | 9 | 16 | 0.2285714 | 0.1978022 | 0.2619048 | 0 | 0 | 0 | 0.1773399 | 0.1604396 | 0.1956397 |
12 | C | hungry but did not eat | 25 | 9 | 16 | 0.2000000 | 0.1538462 | 0.2500000 | 0 | 0 | 0 | 0.1609195 | 0.1316239 | 0.1897590 |
13 | C | at most one meal per day | 25 | 9 | 16 | 0.1771429 | 0.1648352 | 0.1904762 | 0 | 0 | 0 | 0.1466010 | 0.1391941 | 0.1560528 |
14 | D | food just with farinha | 0 | 0 | 0 | 0.1650000 | 0.1700000 | 0.1600000 | 0 | 0 | 0 | 0.1384673 | 0.1425253 | 0.1357576 |
15 | D | credit for eating | 0 | 0 | 0 | 0.6750000 | 0.7100000 | 0.6400000 | 1 | 1 | 1 | 0.2204774 | 0.2079798 | 0.2327273 |
16 | D | borrowed food | 0 | 0 | 0 | 0.1400000 | 0.2100000 | 0.0700000 | 0 | 0 | 0 | 0.1210050 | 0.1675758 | 0.0657576 |
17 | D | had meals at neighbors | 0 | 0 | 0 | 0.1750000 | 0.2300000 | 0.1200000 | 0 | 0 | 0 | 0.1451005 | 0.1788889 | 0.1066667 |
18 | D | reduced meat or fish | 0 | 0 | 0 | 0.5350000 | 0.5500000 | 0.5200000 | 1 | 1 | 1 | 0.2500251 | 0.2500000 | 0.2521212 |
fi_items |>
group_by(season, item, section, question) |>
filter(season != "all") |>
summarise(prop = mean(varvalue, na.rm = TRUE)) |>
mutate(item = factor(paste(item, section, sep = "-"))) |>
ggplot() +
geom_col(aes(item, prop, fill = season), position = position_dodge()) +
theme_bw()
#> `summarise()` has grouped output by 'season', 'item', 'section'. You can override using the
#> `.groups` argument.
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 2.725 0.394 2.806