Prepare land suitability dataset to model
Prepare the final data to be used as input for land suitability model at 1 km resolution.
Load packages, read data and source custom scripts
rm(list = ls())
library(ggplot2)
library(scales)
library(stars)
#> Loading required package: abind
#> Loading required package: sf
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")
land_df <- fst::read_fst(file.path(path_processed, "uk_1km_dataframe.fst"))
Prepare input data for modelling
Remove all the rows with at least one missing value.
land_df <- na.omit(land_df)
Visualize full dataset for modelling
Visualize the pixels that will be modelled.
ggplot(land_df, aes(x, y)) +
geom_tile(aes(fill = factor(country))) +
labs(x = NULL, y = NULL, color = NULL, fill = "Country") +
theme_bw() +
coord_fixed() +
scale_x_continuous(labels = label_number(scale = 1 / 1e3, suffix = " km")) +
scale_y_continuous(labels = label_number(scale = 1 / 1e3, suffix = " km"))
Pseudo mask-out urban
quantile(land_df$count_6_urban, seq(0.45, 1, 0.05))
#> 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% 100%
#> 0 2 10 14 22 32 49 81 147 313 813 1600
land_df_maskout <- subset(land_df, count_6_urban < quantile(count_6_urban, 0.9))
Visualize masked out dataset for modelling
ggplot(land_df_maskout, aes(x, y)) +
geom_tile(aes(fill = factor(country))) +
labs(x = NULL, y = NULL, color = NULL, fill = "Country") +
theme_bw() +
coord_fixed() +
scale_x_continuous(labels = label_number(scale = 1 / 1e3, suffix = " km")) +
scale_y_continuous(labels = label_number(scale = 1 / 1e3, suffix = " km"))
Write data to be modelled
fst::write_fst(land_df, file.path(path_processed, "uk_1km_dataframe_train_full.fst"))
fst::write_fst(land_df_maskout, file.path(path_processed, "uk_1km_dataframe_train_maskout.fst"))
Time to execute the task
Only useful when executed with Rscript
.
proc.time()
#> user system elapsed
#> 56.876 1.765 58.559