Prepare food insecurity data
Select and join demographic and food insecurity variables. The main variables needed are:
- Food insecurity questions from
7_foodinsecurity.RData
. - Seasonality and household data from
1_demography.RData
. - Income per capita from
4_1_cashtransfers.RData
and4_2_earnedincome.RData
. - Education from
1_demography.RData
. - Fishsing status from
6_bushmeat.RData
.
Load required libraries and data
rm(list = ls())
library(day2day)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
path_main <- git_path()
path_data <- file.path(path_main, "data")
path_raw <- file.path(path_data, "raw")
path_fi <- file.path(path_raw, "Food-Security-Amazonia")
path_processed <- file.path(path_data, "processed")
(load(file.path(path_fi, "1_demography.RData")))
#> [1] "demo"
(load(file.path(path_fi, "7_foodinsecurity.RData")))
#> [1] "foodins"
(load(file.path(path_fi, "4_1_cashtransfers.RData")))
#> [1] "transfers"
(load(file.path(path_fi, "4_2_earnedincome.RData")))
#> [1] "income"
(load(file.path(path_fi, "6_bushmeat.RData")))
#> [1] "fauna"
Cleaning and subset demography section
Check seasonality.
with(demo, table(Lseason, data.ano), useNA = "ifany")
#> data.ano
#> Lseason 2015 2016
#> 1 549 0
#> 2 0 560
with(demo, table(Lseason, substr(registro, 2, 2)), useNA = "ifany")
#>
#> Lseason 1 2
#> 1 549 0
#> 2 0 560
with(demo, table(Lseason, data.mes), useNA = "ifany")
#> data.mes
#> Lseason 3 4 5 6 7 8 9 10 11 12
#> 1 0 0 1 0 2 126 93 167 133 27
#> 2 140 138 141 140 0 0 0 0 1 0
# month seems to have some errors, but we do not need the precise date.
Check and clean municipality name.
demo <- demo |>
mutate(municipio = iconv(municipio, "latin1", "utf8"))
demo <- within(demo, municipio[municipio == "Jutaí/Fonte Boa"] <- "Jutaí")
with(demo, table(municipio))
#> municipio
#> Caapiranga Ipixuna Jutaí Maués
#> 278 280 272 279
Select and clean variables of interest from demography section.
demo <- demo |>
dplyr::mutate(
Lseason = factor(c("dry", "wet")[demo$Lseason]),
any.children =
factor(any.children, c("no", "yes"), c("without children", "with children"))
) |>
dplyr::select(
registro, date_year = data.ano, date_month = data.mes, season = Lseason,
municipality = municipio, latitude = gps_latitude, longitude = gps_longitude,
is_rural = rural, n_people, any_children = any.children, max_edu)
Cleaning and subset predictors
- Maximum education:
max_edu
is indemo
section. - Earned income:
earned.income
- Recent fishing times:
fishing.recent
income <- income |>
dplyr::select(registro, earned_income = earned.income)
fauna <- fauna |>
dplyr::select(registro, fishing_times = x6_4_2_pesca.vezes.30.dias)
transfers <- transfers |>
dplyr::select(registro, transfer_total = transfers.tot.r)
Join demography, food insecurity and predictors
fidata <- foodins %>%
dplyr::select(registro, preocup_1:reduce.meat.fish.numeric, fi_score = fi.score) %>%
left_join(demo, by = "registro") %>%
left_join(income, by = "registro") %>%
left_join(fauna, by = "registro") %>%
left_join(transfers, by = "registro") %>%
mutate(
earned_income_capita = earned_income / n_people,
total_income = earned_income + transfer_total,
total_income_capita = total_income / n_people
)
Fill NA for households without children
insert_na <- function(var, insert) {
var[insert] <- NA
return(var)
}
fidata <- fidata |>
mutate(across(matches("^b18"), ~ insert_na(., any_children == "without children")))
# total rural and urban
with(fidata, table(is_rural, municipality, useNA = "ifany"))
#> municipality
#> is_rural Caapiranga Ipixuna Jutaí Maués
#> FALSE 198 200 201 199
#> TRUE 80 80 71 80
# total households w/ and w/o children per rural and urban
with(fidata, table(is_rural, any_children, municipality, useNA = "ifany"))
#> , , municipality = Caapiranga
#>
#> any_children
#> is_rural without children with children
#> FALSE 48 150
#> TRUE 9 71
#>
#> , , municipality = Ipixuna
#>
#> any_children
#> is_rural without children with children
#> FALSE 25 175
#> TRUE 7 73
#>
#> , , municipality = Jutaí
#>
#> any_children
#> is_rural without children with children
#> FALSE 28 173
#> TRUE 8 63
#>
#> , , municipality = Maués
#>
#> any_children
#> is_rural without children with children
#> FALSE 45 154
#> TRUE 18 62
Rename items for better understanding
fi_varname <- c(
"1 A worried that food ends" = "preocup_1",
"2 A run out of food" = "aliment_2",
"3 A ate few food types" = "poucos_3",
"4 B skipped a meal" = "ref_4",
"5 B ate less than required" = "menos_5",
"6 B hungry but did not eat" = "forme_6",
"7 B at most one meal per day" = "refdia_7",
"8 C ate few food types" = "b18pouco_8",
"9 C ate less than required" = "b18insf_9",
"10 C decreased food quantity" = "b18quant_10",
"11 C skipped a meal" = "b18ref_11",
"12 C hungry but did not eat" = "b18forme_12",
"13 C at most one meal per day" = "b18dia_13",
"14 D food just with farinha" = "farinha",
"15 D credit for eating" = "credito",
"16 D borrowed food" = "emprest",
"17 D had meals at neighbors" = "vizin",
"18 D reduced meat or fish" = "reduce.meat.fish.numeric"
)
names(fi_varname) <- gsub(" ", "_", paste("item", names(fi_varname)))
fidata <- rename(fidata, !!fi_varname)
str(fidata)
#> 'data.frame': 1109 obs. of 36 variables:
#> $ registro : chr "11001" "11002" "11003" "11004" ...
#> $ item_1_A_worried_that_food_ends : num 0 0 0 0 1 0 1 1 1 1 ...
#> $ item_2_A_run_out_of_food : num 0 0 0 0 0 0 1 1 1 0 ...
#> $ item_3_A_ate_few_food_types : num 0 0 1 0 0 1 1 1 1 1 ...
#> $ item_4_B_skipped_a_meal : num 0 0 0 0 0 0 0 0 1 0 ...
#> $ item_5_B_ate_less_than_required : num 0 0 0 0 0 0 1 1 1 0 ...
#> $ item_6_B_hungry_but_did_not_eat : num 0 0 0 0 0 0 0 0 1 0 ...
#> $ item_7_B_at_most_one_meal_per_day : num 0 0 0 0 0 0 0 0 1 0 ...
#> $ item_8_C_ate_few_food_types : num 0 0 1 0 0 0 1 NA 1 1 ...
#> $ item_9_C_ate_less_than_required : num 0 0 0 0 0 0 1 NA 1 0 ...
#> $ item_10_C_decreased_food_quantity : num 0 0 0 0 0 0 1 NA 1 0 ...
#> $ item_11_C_skipped_a_meal : num 0 0 0 0 0 0 0 NA 1 0 ...
#> $ item_12_C_hungry_but_did_not_eat : num 0 0 0 0 0 0 0 NA 1 0 ...
#> $ item_13_C_at_most_one_meal_per_day: num 0 0 0 0 0 0 0 NA 1 0 ...
#> $ item_14_D_food_just_with_farinha : num 0 0 0 0 0 0 0 1 1 0 ...
#> $ item_15_D_credit_for_eating : num 0 1 0 0 1 1 1 0 1 1 ...
#> $ item_16_D_borrowed_food : num 0 0 0 0 0 0 0 0 1 0 ...
#> $ item_17_D_had_meals_at_neighbors : num 0 0 0 0 0 0 0 1 1 0 ...
#> $ item_18_D_reduced_meat_or_fish : num 0 0 1 0 0 0 1 1 1 0 ...
#> $ fi_score : num 0 1 3 0 2 2 9 7 18 4 ...
#> $ date_year : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
#> $ date_month : num 8 8 8 9 8 8 8 8 8 8 ...
#> $ season : Factor w/ 2 levels "dry","wet": 1 1 1 1 1 1 1 1 1 1 ...
#> $ municipality : chr "Ipixuna" "Ipixuna" "Ipixuna" "Ipixuna" ...
#> $ latitude : num -7.05 -7.05 -7.05 -7.05 -7.05 ...
#> $ longitude : num -71.7 -71.7 -71.7 -71.7 -71.7 ...
#> $ is_rural : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
#> $ n_people : int 4 7 4 6 4 7 5 3 6 5 ...
#> $ any_children : Factor w/ 2 levels "without children",..: 2 2 2 2 2 2 2 1 2 2 ...
#> $ max_edu : num 12 12 12 13 13 12 12 6 5 13 ...
#> $ earned_income : num 2062 0 2000 5000 12000 ...
#> $ fishing_times : num 1 0 0 2 1 6 0 0 15 0 ...
#> $ transfer_total : num 193 4780 0 0 0 112 309 904 330 0 ...
#> $ earned_income_capita : num 516 0 500 833 3000 ...
#> $ total_income : num 2256 4780 2000 5000 12000 ...
#> $ total_income_capita : num 564 683 500 833 3000 ...
Count number of non-missing items
fidata$fi_nitems <- fidata |>
dplyr::select(matches("^item_[0-9]+")) |>
mutate(across(everything(), ~ !is.na(.))) |>
rowSums()
fidata <- fidata %>%
mutate(fi_proportion = fi_score / fi_nitems) |>
relocate(fi_nitems:fi_proportion, .after = fi_score)
Check missingness
Check missing values per covariate by municipality and rurality. Missing values for food insecurity items correspond to section C for households without children.
miss_aux <- fidata |>
group_by(municipality, is_rural) |>
summarise(across(everything(), ~ sum(is.na(.))), .groups = "drop") |>
mutate(municipality = paste0(municipality, "-", ifelse(is_rural, "rural", "urban"))) |>
dplyr::select(- is_rural)
t(miss_aux[,-1]) |>
data.frame() |>
setNames(miss_aux[[1]])
#> Caapiranga-urban Caapiranga-rural Ipixuna-urban Ipixuna-rural
#> registro 0 0 0 0
#> item_1_A_worried_that_food_ends 0 0 0 0
#> item_2_A_run_out_of_food 0 0 0 0
#> item_3_A_ate_few_food_types 0 0 0 0
#> item_4_B_skipped_a_meal 0 0 0 0
#> item_5_B_ate_less_than_required 0 0 0 0
#> item_6_B_hungry_but_did_not_eat 0 0 0 0
#> item_7_B_at_most_one_meal_per_day 0 0 0 0
#> item_8_C_ate_few_food_types 48 9 25 7
#> item_9_C_ate_less_than_required 48 9 25 7
#> item_10_C_decreased_food_quantity 48 9 25 7
#> item_11_C_skipped_a_meal 48 9 25 7
#> item_12_C_hungry_but_did_not_eat 48 9 25 7
#> item_13_C_at_most_one_meal_per_day 48 9 25 7
#> item_14_D_food_just_with_farinha 0 0 0 0
#> item_15_D_credit_for_eating 0 0 0 0
#> item_16_D_borrowed_food 0 0 0 0
#> item_17_D_had_meals_at_neighbors 0 0 0 0
#> item_18_D_reduced_meat_or_fish 0 0 0 0
#> fi_score 0 0 0 0
#> fi_nitems 0 0 0 0
#> fi_proportion 0 0 0 0
#> date_year 0 0 0 0
#> date_month 0 0 0 0
#> season 0 0 0 0
#> latitude 0 0 0 0
#> longitude 0 0 0 0
#> n_people 0 0 0 0
#> any_children 0 0 0 0
#> max_edu 0 0 0 0
#> earned_income 0 0 0 0
#> fishing_times 0 0 0 0
#> transfer_total 0 0 0 0
#> earned_income_capita 0 0 0 0
#> total_income 0 0 0 0
#> total_income_capita 0 0 0 0
#> Jutaí-urban Jutaí-rural Maués-urban Maués-rural
#> registro 0 0 0 0
#> item_1_A_worried_that_food_ends 0 0 0 0
#> item_2_A_run_out_of_food 0 0 0 0
#> item_3_A_ate_few_food_types 0 0 0 0
#> item_4_B_skipped_a_meal 0 0 0 0
#> item_5_B_ate_less_than_required 0 0 0 0
#> item_6_B_hungry_but_did_not_eat 0 0 0 0
#> item_7_B_at_most_one_meal_per_day 0 0 0 0
#> item_8_C_ate_few_food_types 28 8 45 18
#> item_9_C_ate_less_than_required 28 8 45 18
#> item_10_C_decreased_food_quantity 28 8 45 18
#> item_11_C_skipped_a_meal 28 8 45 18
#> item_12_C_hungry_but_did_not_eat 28 8 45 18
#> item_13_C_at_most_one_meal_per_day 28 8 45 18
#> item_14_D_food_just_with_farinha 0 0 0 0
#> item_15_D_credit_for_eating 0 0 0 0
#> item_16_D_borrowed_food 0 0 0 0
#> item_17_D_had_meals_at_neighbors 0 0 0 0
#> item_18_D_reduced_meat_or_fish 0 0 0 0
#> fi_score 0 0 0 0
#> fi_nitems 0 0 0 0
#> fi_proportion 0 0 0 0
#> date_year 0 0 0 0
#> date_month 0 0 0 0
#> season 0 0 0 0
#> latitude 0 0 0 0
#> longitude 0 0 0 0
#> n_people 0 0 0 0
#> any_children 0 0 0 0
#> max_edu 0 0 0 0
#> earned_income 0 0 0 0
#> fishing_times 0 0 0 0
#> transfer_total 0 0 0 0
#> earned_income_capita 0 0 0 0
#> total_income 0 0 0 0
#> total_income_capita 0 0 0 0
Convert to a spatial dataset
fidata <- st_as_sf(fidata, coords = c("longitude", "latitude"), crs = 4326)
fidata_path <- file.path(path_processed, "fi-items.gpkg")
st_write(fidata, fidata_path, delete_dsn = TRUE)
#> Deleting source `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items.gpkg' using driver `GPKG'
#> Writing layer `fi-items' to data source
#> `/home/rstudio/documents/projects/food-insecurity-mapping/data/processed/fi-items.gpkg' using driver `GPKG'
#> Writing 1109 features with 36 fields and geometry type Point.
Visualize locations
ggplot(fidata) +
geom_sf(aes(col = municipality, shape = is_rural), size = 1) +
ggtitle("Surveyed household locations") +
labs(shape = "Is Rural?", color = "Municipality") +
facet_wrap(~ is_rural, ncol = 1)
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 1.943 0.376 2.002