Births frequency per hydrological week


Summarise the number of births per week and model it using Poisson regression with cyclic cubic splines.

Load packages, read data and source custom scripts

rm(list = ls())
library(ggplot2)
library(bamlss)
#> Loading required package: coda
#> Loading required package: colorspace
#> Loading required package: mgcv
#> Loading required package: nlme
#> This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
#> 
#> Attaching package: 'bamlss'
#> The following object is masked from 'package:mgcv':
#> 
#>     smooth.construct
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:bamlss':
#> 
#>     n
#> The following object is masked from 'package:nlme':
#> 
#>     collapse
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
path_proj <- day2day::git_path()
path_data <- file.path(path_proj, "data")
path_processed <- file.path(path_data, "processed")
path_modelled <- file.path(path_data, "modelled")
path_summarised <- file.path(path_data, "summarised")

source(file.path(path_proj, "src", "51-bamlss.R"))
source(file.path(path_proj, "src", "56-bamlss-vis-gg.R"))

bwdata_model <- fst::read_fst(file.path(path_processed, "bwdata_41_model.fst"))

Modelling number of births

births_per_rivwk <- bwdata_model %>%
    group_by(rivwk_conception) %>%
    summarise(n = n())
#> `summarise()` ungrouping output (override with `.groups` argument)
birth_model <- bamlss(n ~ s(rivwk_conception, bs = "cc"), "poisson", births_per_rivwk)
#> AICc 2322.906 logPost -1169.95 logLik -1150.36 edf 8.9998 eps 0.0079 iteration   1
#> AICc 718.6427 logPost -356.284 logLik -348.231 edf 8.9981 eps 0.0069 iteration   2
#> AICc 716.5127 logPost -343.868 logLik -347.190 edf 8.9816 eps 0.0002 iteration   3
#> AICc 716.2027 logPost -333.656 logLik -347.269 edf 8.8244 eps 0.0000 iteration   4
#> AICc 716.2027 logPost -333.656 logLik -347.269 edf 8.8244 eps 0.0000 iteration   4
#> elapsed time:  0.33sec
#> Starting the sampler...
#> 
#> |                    |   0%  6.19sec
#> |*                   |   5%  6.08sec  0.32sec
#> |**                  |  10%  5.69sec  0.63sec
#> |***                 |  15%  5.66sec  1.00sec
#> |****                |  20%  5.68sec  1.42sec
#> |*****               |  25%  5.26sec  1.75sec
#> |******              |  30%  4.89sec  2.09sec
#> |*******             |  35%  4.54sec  2.44sec
#> |********            |  40%  4.79sec  3.19sec
#> |*********           |  45%  4.31sec  3.53sec
#> |**********          |  50%  3.87sec  3.87sec
#> |***********         |  55%  3.44sec  4.20sec
#> |************        |  60%  3.03sec  4.54sec
#> |*************       |  65%  2.63sec  4.88sec
#> |**************      |  70%  2.23sec  5.21sec
#> |***************     |  75%  1.85sec  5.54sec
#> |****************    |  80%  1.47sec  5.88sec
#> |*****************   |  85%  1.10sec  6.22sec
#> |******************  |  90%  0.73sec  6.55sec
#> |******************* |  95%  0.36sec  6.88sec
#> |********************| 100%  0.00sec  7.22sec

Visualize births per hydrological week

gg <- gg_smooths_river(
    list(birth_model), births_per_rivwk, "lambda", "s(rivwk_conception)",
    types = factor(c("modelled")), intercept = TRUE, FUN = c95_exp
    ) +
    geom_line(aes(rivwk_conception, n, col = "raw"), births_per_rivwk,
              alpha = 0.5, size = rel(0.5)) +
    labs(y = "Number of births", x = "Seasonal river level index", col = NULL) +
    guides(fill = "none", linetype = "none") +
    theme(legend.position = "bottom", plot.margin = margin(0, 3, 0, 3),
          legend.margin = margin(0, 0, 0, 0), legend.box.margin = margin(-10, 0, 0, 0))
#> Note: Using an external vector in selections is ambiguous.
#> ℹ Use `all_of(var)` instead of `var` to silence this message.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
ggsave(file.path(path_summarised, "data-births-per-week.pdf"), gg,
       width = 10, height = 6.5, units = "cm", device = cairo_pdf)

gg

Time to execute the task

Only useful when executed with Rscript.

proc.time()
#>    user  system elapsed 
#>  20.403   0.605  20.980