Observed data: consolidate land cover dataset


Joining raster data and filtering coordinates inside countries. The variables used are:

  • land cover counts per class (landcover)
  • elevation (elev)
  • slope computed with 8 neighbors (slope_nb8)
  • residential population (pop)
  • distance to major cities (dist)
  • gross disposable household income per head (gdhi)
  • country (country)
  • growing degree day (gdd)
  • maximum annual temperature (max_tmax)
  • minimum annual temperature (min_tmin)
  • soil moisture deficit (smd)
  • soil moisture surplus (sms)

Load packages, read data and source custom scripts

rm(list = ls())
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(stars)
#> Loading required package: abind
#> Loading required package: sf
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_raw <- file.path(path_data, "raw")
path_cleaned <- file.path(path_data, "cleaned")
path_processed <- file.path(path_data, "processed")

source(file.path(path_proj, "src", "38-rasters-to-df.R"))

vars_no_chess <- c("landcover", "elev", "slope_nb8", "pop", "dist", "gdhi", "country")
files_no_chess <- file.path(path_processed, paste0("uk_1km_", vars_no_chess, ".tif"))
chess_regex <- "^uk_1km_chess_.+-mean-annual_1991.+-2011.+\\.tif"
files_chess <- list.files(path_processed, chess_regex, full = TRUE)
files <- c(files_no_chess, files_chess)
vars <- gsub("_20yr.+(\\.tif)", "\\1", basename(files)) %>%
    gsub("(uk_1km_chess|uk_1km)_(.+)\\.tif", "\\2", .)
land_labels <- readRDS(file.path(path_cleaned, "landcover_reclass_labels.rds"))

Join datasets

  • Join land classes counts and covariates.
  • Create id to transform back to raster.
  • Remove coordinates outside countries.
land_df <- rasters_to_df(files, vars, additional_vars = TRUE, land_labels = land_labels)
table(land_df$count_total)
#> 
#>   1600 
#> 244105
head(land_df)
#>           x       y count_0_outside count_1_arable count_2_wetland count_3_improved_grassland
#> 8346 460500 1217500              57              0            1433                          0
#> 8347 461500 1217500             572              0             906                          0
#> 8349 463500 1217500             620              0               0                        252
#> 9003 460500 1216500              39              0            1481                          0
#> 9004 461500 1216500             637              0             856                          0
#> 9006 463500 1216500               0              0             481                         34
#>      count_4_forest count_5_semi_natural_grassland count_6_urban count_7_other      elev slope_nb8
#> 8346              0                              0             0           110 156.19780 2.9454014
#> 8347              0                              0             0           122  93.83371 0.6556401
#> 8349              0                            407             0           321  72.43649 5.2689939
#> 9003              0                              0             0            80 124.31470 0.7709251
#> 9004              0                              0             0           107  61.10265 2.3109703
#> 9006              0                            988             0            97 202.09216 2.5376723
#>      pop     dist  gdhi country gdd max_tas max_tasmax maxmonth_tas min_tas min_tasmin smd sms   id
#> 8346   0 77103.83 20124       3  NA      NA         NA           NA      NA         NA  NA  NA 8346
#> 8347   0 77278.71 20124       3  NA      NA         NA           NA      NA         NA  NA  NA 8347
#> 8349   0 77665.95 20124       3  NA      NA         NA           NA      NA         NA  NA  NA 8349
#> 9003   0 76118.33 20124       3  NA      NA         NA           NA      NA         NA  NA  NA 9003
#> 9004   0 76295.48 20124       3  NA      NA         NA           NA      NA         NA  NA  NA 9004
#> 9006   0 76687.68 20124       3  NA      NA         NA           NA      NA         NA  NA  NA 9006
#>      count_total count_no0 count_no_urban
#> 8346        1600      1543           1600
#> 8347        1600      1028           1600
#> 8349        1600       980           1600
#> 9003        1600      1561           1600
#> 9004        1600       963           1600
#> 9006        1600      1600           1600
dim(land_df)
#> [1] 244105     28

Write to a fast to read format

fst::write_fst(land_df, file.path(path_processed, "uk_1km_dataframe.fst"))

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>  39.298   2.329  41.542