Observed data: consolidate land cover dataset
Joining raster data and filtering coordinates inside countries. The variables used are:
- land cover counts per class (
landcover
) - elevation (
elev
) - slope computed with 8 neighbors (
slope_nb8
) - residential population (
pop
) - distance to major cities (
dist
) - gross disposable household income per head (
gdhi
) - country (
country
) - growing degree day (
gdd
) - maximum annual temperature (
max_tmax
) - minimum annual temperature (
min_tmin
) - soil moisture deficit (
smd
) - soil moisture surplus (
sms
)
Load packages, read data and source custom scripts
rm(list = ls())
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(stars)
#> Loading required package: abind
#> Loading required package: sf
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_raw <- file.path(path_data, "raw")
path_cleaned <- file.path(path_data, "cleaned")
path_processed <- file.path(path_data, "processed")
source(file.path(path_proj, "src", "38-rasters-to-df.R"))
vars_no_chess <- c("landcover", "elev", "slope_nb8", "pop", "dist", "gdhi", "country")
files_no_chess <- file.path(path_processed, paste0("uk_1km_", vars_no_chess, ".tif"))
chess_regex <- "^uk_1km_chess_.+-mean-annual_1991.+-2011.+\\.tif"
files_chess <- list.files(path_processed, chess_regex, full = TRUE)
files <- c(files_no_chess, files_chess)
vars <- gsub("_20yr.+(\\.tif)", "\\1", basename(files)) %>%
gsub("(uk_1km_chess|uk_1km)_(.+)\\.tif", "\\2", .)
land_labels <- readRDS(file.path(path_cleaned, "landcover_reclass_labels.rds"))
Join datasets
- Join land classes counts and covariates.
- Create id to transform back to raster.
- Remove coordinates outside countries.
land_df <- rasters_to_df(files, vars, additional_vars = TRUE, land_labels = land_labels)
table(land_df$count_total)
#>
#> 1600
#> 244105
head(land_df)
#> x y count_0_outside count_1_arable count_2_wetland count_3_improved_grassland
#> 8346 460500 1217500 57 0 1433 0
#> 8347 461500 1217500 572 0 906 0
#> 8349 463500 1217500 620 0 0 252
#> 9003 460500 1216500 39 0 1481 0
#> 9004 461500 1216500 637 0 856 0
#> 9006 463500 1216500 0 0 481 34
#> count_4_forest count_5_semi_natural_grassland count_6_urban count_7_other elev slope_nb8
#> 8346 0 0 0 110 156.19780 2.9454014
#> 8347 0 0 0 122 93.83371 0.6556401
#> 8349 0 407 0 321 72.43649 5.2689939
#> 9003 0 0 0 80 124.31470 0.7709251
#> 9004 0 0 0 107 61.10265 2.3109703
#> 9006 0 988 0 97 202.09216 2.5376723
#> pop dist gdhi country gdd max_tas max_tasmax maxmonth_tas min_tas min_tasmin smd sms id
#> 8346 0 77103.83 20124 3 NA NA NA NA NA NA NA NA 8346
#> 8347 0 77278.71 20124 3 NA NA NA NA NA NA NA NA 8347
#> 8349 0 77665.95 20124 3 NA NA NA NA NA NA NA NA 8349
#> 9003 0 76118.33 20124 3 NA NA NA NA NA NA NA NA 9003
#> 9004 0 76295.48 20124 3 NA NA NA NA NA NA NA NA 9004
#> 9006 0 76687.68 20124 3 NA NA NA NA NA NA NA NA 9006
#> count_total count_no0 count_no_urban
#> 8346 1600 1543 1600
#> 8347 1600 1028 1600
#> 8349 1600 980 1600
#> 9003 1600 1561 1600
#> 9004 1600 963 1600
#> 9006 1600 1600 1600
dim(land_df)
#> [1] 244105 28
Write to a fast to read format
fst::write_fst(land_df, file.path(path_processed, "uk_1km_dataframe.fst"))
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 39.298 2.329 41.542