Part 1: Fit the model

Load libraries

library(tidyverse)
library(mgcv)
library(mgcViz)
library(lubridate)

Import data

df100<-read_csv("processed_data/nsd_db100_recurse.csv")

Some extra data cleaning

df100$year<-year(df100$timestamp)
df100$juliand<-yday(df100$timestamp)
df100$age_yr<-as.factor(df100$age_yr)
df100$id<-as.factor(df100$id)
df100<-df100%>%filter(id!="3A  (Barb Wire)") 
df100<-df100%>%filter(id!='0M (N of Mud Lake)')
df100<-df100%>%filter(!(id=="7E (Nora Township colt)"&year==2015&juliand==205))
df100<-df100%>%filter(!(id=="7E (Nora Township colt)"&year==2015&juliand==206))  
df100<-df100%>%filter(!(id=="7E (Nora Township colt)"&year==2018&juliand==114))

# Drop hatch year data
df100<-df100[df100$age_yr!='hatch-yr',]

Run the generalized additive model

r5<-gam(revisits~age_yr+s(juliand, by=age_yr, k=20)+s(id, bs="re"), family=nb(), data=df100, REML=TRUE)

This fitted model object has been saved as the file r5.rds.

Model Diagnostics

r5<-getViz(r5)
check(r5,  a.qq = list(method='tnorm'), a.hist = list(bins=20), a.respoi = list(size=1))
## 
## Method: REML   Optimizer: outer newton
## full convergence after 11 iterations.
## Gradient range [-5.172696e-05,0.1325516]
## (score 1824045 & scale 1).
## Hessian positive definite, eigenvalue range [3.517226,198434.4].
## Model rank =  117 / 117 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'  edf k-index p-value
## s(juliand):age_yradult     19.0 18.7    0.91    0.62
## s(juliand):age_yrfourth-yr 19.0 14.4    0.91    0.64
## s(juliand):age_yrsecond-yr 19.0 18.9    0.91    0.65
## s(juliand):age_yrthird-yr  19.0 17.6    0.91    0.71
## s(id)                      37.0 35.0      NA      NA

Part 2: Plot the predictions

Create dummy dataset to hold model predictions

newdat<-expand.grid(id=unique(df100$id), age_yr=unique(df100$age_yr), juliand=unique(df100$juliand))

Calculate predictions for each julian day

pred<-predict(r5, newdata = newdat, type = "response", se.fit = TRUE, exclude="s(id)")
pred.db<-as.data.frame(pred)
newdat$predfit<-pred.db$fit
newdat$pred.se<-pred.db$se.fit

newdat<-newdat%>%
  mutate(age_yr=case_when(
    age_yr=='second-yr'~'Second-Year',
    age_yr=='third-yr'~'Third-Year',
    age_yr=='fourth-yr'~'Fourth-Year',
    age_yr=='adult'~'Adult'
  ))
newdat$age_yr<-factor(newdat$age_yr, levels = c('Second-Year', 'Third-Year','Fourth-Year','Adult'))

# Remove dates before April 1 and after October 1st
newdat1<-newdat%>%filter(juliand>90)
newdat1<-newdat1%>%filter(juliand<275)

Plot

ggplot(newdat1, aes(x=as.Date(juliand,origin=as.Date("2018-01-01")),y=predfit,color=age_yr))+geom_line(size=1)+scale_x_date(date_breaks='1 month', date_labels="%B")+xlab("")+geom_ribbon(aes(ymin=predfit-1.96*pred.se, ymax=predfit+1.96*pred.se), linetype=1, alpha=0.1)+ylab("Mean number of revisits\n")+xlab('\nMonth')+theme_bw()+labs(col="Age category")

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18362)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.7.4 mgcViz_0.1.4    rgl_0.100.30    qgam_1.3.0     
##  [5] mgcv_1.8-28     nlme_3.1-139    forcats_0.4.0   stringr_1.4.0  
##  [9] dplyr_0.8.3     purrr_0.3.3     readr_1.3.1     tidyr_1.0.0    
## [13] tibble_2.1.3    ggplot2_3.2.1   tidyverse_1.2.1
## 
## loaded via a namespace (and not attached):
##  [1] viridis_0.5.1           httr_1.4.1             
##  [3] viridisLite_0.3.0       jsonlite_1.6           
##  [5] splines_3.6.0           foreach_1.4.7          
##  [7] modelr_0.1.5            shiny_1.4.0            
##  [9] assertthat_0.2.1        cellranger_1.1.0       
## [11] yaml_2.2.0              pillar_1.4.2           
## [13] backports_1.1.5         lattice_0.20-38        
## [15] glue_1.3.1              digest_0.6.22          
## [17] manipulateWidget_0.10.0 RColorBrewer_1.1-2     
## [19] promises_1.1.0          minqa_1.2.4            
## [21] rvest_0.3.4             colorspace_1.4-1       
## [23] htmltools_0.4.0         httpuv_1.5.2           
## [25] Matrix_1.2-17           plyr_1.8.4             
## [27] pkgconfig_2.0.3         broom_0.5.2            
## [29] haven_2.1.1             xtable_1.8-4           
## [31] scales_1.0.0            webshot_0.5.1          
## [33] later_1.0.0             lme4_1.1-21            
## [35] generics_0.0.2          withr_2.1.2            
## [37] lazyeval_0.2.2          cli_1.1.0              
## [39] magrittr_1.5            crayon_1.3.4           
## [41] readxl_1.3.1            mime_0.7               
## [43] evaluate_0.14           GGally_1.4.0           
## [45] MASS_7.3-51.4           doParallel_1.0.15      
## [47] xml2_1.2.2              tools_3.6.0            
## [49] hms_0.5.1               matrixStats_0.55.0     
## [51] lifecycle_0.1.0         gamm4_0.2-5            
## [53] munsell_0.5.0           compiler_3.6.0         
## [55] rlang_0.4.0             nloptr_1.2.1           
## [57] grid_3.6.0              iterators_1.0.12       
## [59] rstudioapi_0.10         htmlwidgets_1.5.1      
## [61] crosstalk_1.0.0         miniUI_0.1.1.1         
## [63] labeling_0.3            rmarkdown_1.16         
## [65] boot_1.3-22             gtable_0.3.0           
## [67] codetools_0.2-16        reshape_0.8.8          
## [69] R6_2.4.0                gridExtra_2.3          
## [71] knitr_1.25              fastmap_1.0.1          
## [73] zeallot_0.1.0           KernSmooth_2.23-15     
## [75] stringi_1.4.3           parallel_3.6.0         
## [77] Rcpp_1.0.2              vctrs_0.2.0            
## [79] tidyselect_0.2.5        xfun_0.10