Load libraries
library(tidyverse)
library(mgcv)
library(mgcViz)
library(lubridate)
Import data
df100<-read_csv("processed_data/nsd_db100_recurse.csv")
Some extra data cleaning
df100$year<-year(df100$timestamp)
df100$juliand<-yday(df100$timestamp)
df100$age_yr<-as.factor(df100$age_yr)
df100$id<-as.factor(df100$id)
df100<-df100%>%filter(id!="3A (Barb Wire)")
df100<-df100%>%filter(id!='0M (N of Mud Lake)')
df100<-df100%>%filter(!(id=="7E (Nora Township colt)"&year==2015&juliand==205))
df100<-df100%>%filter(!(id=="7E (Nora Township colt)"&year==2015&juliand==206))
df100<-df100%>%filter(!(id=="7E (Nora Township colt)"&year==2018&juliand==114))
# Drop hatch year data
df100<-df100[df100$age_yr!='hatch-yr',]
Run the generalized additive model
r5<-gam(revisits~age_yr+s(juliand, by=age_yr, k=20)+s(id, bs="re"), family=nb(), data=df100, REML=TRUE)
This fitted model object has been saved as the file r5.rds.
Model Diagnostics
r5<-getViz(r5)
check(r5, a.qq = list(method='tnorm'), a.hist = list(bins=20), a.respoi = list(size=1))
##
## Method: REML Optimizer: outer newton
## full convergence after 11 iterations.
## Gradient range [-5.172696e-05,0.1325516]
## (score 1824045 & scale 1).
## Hessian positive definite, eigenvalue range [3.517226,198434.4].
## Model rank = 117 / 117
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(juliand):age_yradult 19.0 18.7 0.91 0.62
## s(juliand):age_yrfourth-yr 19.0 14.4 0.91 0.64
## s(juliand):age_yrsecond-yr 19.0 18.9 0.91 0.65
## s(juliand):age_yrthird-yr 19.0 17.6 0.91 0.71
## s(id) 37.0 35.0 NA NA
Create dummy dataset to hold model predictions
newdat<-expand.grid(id=unique(df100$id), age_yr=unique(df100$age_yr), juliand=unique(df100$juliand))
Calculate predictions for each julian day
pred<-predict(r5, newdata = newdat, type = "response", se.fit = TRUE, exclude="s(id)")
pred.db<-as.data.frame(pred)
newdat$predfit<-pred.db$fit
newdat$pred.se<-pred.db$se.fit
newdat<-newdat%>%
mutate(age_yr=case_when(
age_yr=='second-yr'~'Second-Year',
age_yr=='third-yr'~'Third-Year',
age_yr=='fourth-yr'~'Fourth-Year',
age_yr=='adult'~'Adult'
))
newdat$age_yr<-factor(newdat$age_yr, levels = c('Second-Year', 'Third-Year','Fourth-Year','Adult'))
# Remove dates before April 1 and after October 1st
newdat1<-newdat%>%filter(juliand>90)
newdat1<-newdat1%>%filter(juliand<275)
Plot
ggplot(newdat1, aes(x=as.Date(juliand,origin=as.Date("2018-01-01")),y=predfit,color=age_yr))+geom_line(size=1)+scale_x_date(date_breaks='1 month', date_labels="%B")+xlab("")+geom_ribbon(aes(ymin=predfit-1.96*pred.se, ymax=predfit+1.96*pred.se), linetype=1, alpha=0.1)+ylab("Mean number of revisits\n")+xlab('\nMonth')+theme_bw()+labs(col="Age category")
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18362)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lubridate_1.7.4 mgcViz_0.1.4 rgl_0.100.30 qgam_1.3.0
## [5] mgcv_1.8-28 nlme_3.1-139 forcats_0.4.0 stringr_1.4.0
## [9] dplyr_0.8.3 purrr_0.3.3 readr_1.3.1 tidyr_1.0.0
## [13] tibble_2.1.3 ggplot2_3.2.1 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] viridis_0.5.1 httr_1.4.1
## [3] viridisLite_0.3.0 jsonlite_1.6
## [5] splines_3.6.0 foreach_1.4.7
## [7] modelr_0.1.5 shiny_1.4.0
## [9] assertthat_0.2.1 cellranger_1.1.0
## [11] yaml_2.2.0 pillar_1.4.2
## [13] backports_1.1.5 lattice_0.20-38
## [15] glue_1.3.1 digest_0.6.22
## [17] manipulateWidget_0.10.0 RColorBrewer_1.1-2
## [19] promises_1.1.0 minqa_1.2.4
## [21] rvest_0.3.4 colorspace_1.4-1
## [23] htmltools_0.4.0 httpuv_1.5.2
## [25] Matrix_1.2-17 plyr_1.8.4
## [27] pkgconfig_2.0.3 broom_0.5.2
## [29] haven_2.1.1 xtable_1.8-4
## [31] scales_1.0.0 webshot_0.5.1
## [33] later_1.0.0 lme4_1.1-21
## [35] generics_0.0.2 withr_2.1.2
## [37] lazyeval_0.2.2 cli_1.1.0
## [39] magrittr_1.5 crayon_1.3.4
## [41] readxl_1.3.1 mime_0.7
## [43] evaluate_0.14 GGally_1.4.0
## [45] MASS_7.3-51.4 doParallel_1.0.15
## [47] xml2_1.2.2 tools_3.6.0
## [49] hms_0.5.1 matrixStats_0.55.0
## [51] lifecycle_0.1.0 gamm4_0.2-5
## [53] munsell_0.5.0 compiler_3.6.0
## [55] rlang_0.4.0 nloptr_1.2.1
## [57] grid_3.6.0 iterators_1.0.12
## [59] rstudioapi_0.10 htmlwidgets_1.5.1
## [61] crosstalk_1.0.0 miniUI_0.1.1.1
## [63] labeling_0.3 rmarkdown_1.16
## [65] boot_1.3-22 gtable_0.3.0
## [67] codetools_0.2-16 reshape_0.8.8
## [69] R6_2.4.0 gridExtra_2.3
## [71] knitr_1.25 fastmap_1.0.1
## [73] zeallot_0.1.0 KernSmooth_2.23-15
## [75] stringi_1.4.3 parallel_3.6.0
## [77] Rcpp_1.0.2 vctrs_0.2.0
## [79] tidyselect_0.2.5 xfun_0.10