# package names
packages<-c("tidyverse", "here", "mcp", "lubridate", "knitr", "ezknitr", "loo", "flextable")
# install any packages not previously installed
installed_packages<-packages %in% rownames(installed.packages())
if(any(installed_packages == FALSE)){
install.packages(packages[!installed_packages])
}
# load packages
invisible(lapply(packages, library, character.only = TRUE))
Read in accelerometry dataset
df<-read_csv(here::here("DRUM_materials/data_files_for_DRUM/odba_temp.csv"))
df<-df %>%
select(UTC_datetime, acc_x, acc_y, acc_z)
Filter by date and create bursts
# get UTC_datetime in the right format
df$UTC_datetime<-mdy_hm(df$UTC_datetime)
df<-df %>% filter(UTC_datetime>"2021-04-13 15:00:00")
# create burst number
df<-df %>%
group_by(burst=cut(UTC_datetime, "5 min")) %>%
mutate(burst_row=1:n())
df$burstID<-df %>%
group_indices(burst=cut(UTC_datetime, "5 min"))
Now there are unique ID’s for each burst and row numbers within burst
df<-df %>%
group_by(burstID) %>%
mutate(burst_count=n())
table(df$burst_count)
##
## 1 2 6 7 10 12 13 18 19 20 24 25 30
## 1081 1102 84 35 10 72 13 54 95 20 408 50 52650
## 31
## 27001
# only 1.5% of bursts are less than 30, so I'll filter these out
df<-df %>%
filter(burst_count>29)
Calculate overall dynamic body acceleration values
df<-df %>%
group_by(burstID) %>%
mutate(ODBA=sum(abs(acc_x-mean(acc_x)),abs(acc_y-mean(acc_y)),abs(acc_z-mean(acc_z))))
Calculate hourly averages and filter dataset to 1 row per hour.
df$hour_index<-df %>%
group_indices(burst=cut(UTC_datetime, "1 hour"))
df<-df %>%
group_by(hour_index) %>%
mutate(hr_odba=mean(ODBA))
df<-df %>%
distinct(hour_index, hr_odba)
# One outlier to remove. Could be from a predator moving carcass.
df<-df[-96,]
Now run piecewise regression for a segment before and after the time of death.
m1<-list(
hr_odba~1,
~1
)
fit1<-mcp(m1, df[,c("hr_odba", "hour_index")],
par_x="hour_index")
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 228
## Unobserved stochastic nodes: 4
## Total graph size: 2756
##
## Initializing model
m2<-list(
hr_odba~1,
~1+sigma(1)
)
fit2<-mcp(m2, df[, c("hr_odba", "hour_index")],
par_x="hour_index")
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 228
## Unobserved stochastic nodes: 5
## Total graph size: 4121
##
## Initializing model
Cross-validation
fit1$loo<-loo(fit1)
fit2$loo<-loo(fit2)
loo_compare(fit1$loo, fit2$loo)
## elpd_diff se_diff
## model2 0.0 0.0
## model1 -461.3 23.6
The expected log predictive density (ELPD) indicates that the second model with a change in variance performs best.
Model output
summary(fit2)
## Family: gaussian(link = 'identity')
## Iterations: 9000 from 3 chains.
## Segments:
## 1: hr_odba ~ 1
## 2: hr_odba ~ 1 ~ 1 + sigma(1)
##
## Population-level parameters:
## name mean lower upper Rhat n.eff
## cp_1 53 52 53 1 4943
## int_1 3900 3036 4815 1 6240
## int_2 372 358 386 1 5617
## sigma_1 3379 2770 4011 1 5237
## sigma_2 95 85 106 1 5321
Rhat values indicate good convergence of chains. The changepoint parameter indicates that the break in the time-series (representative of the mortality) was at 54 hours.
Plot the model
plot(fit2, q_fit = T)+
ggtitle("Overall Dynamic Body Acceleration")+
xlab("")+ylab("Mean hourly ODBA value")
We now show an additional application of piecewise regression that is also capable of providing evidence for whether a collared individual died or had the collar malfunction.
Import data
temp<-read_csv(here("DRUM_materials/data_files_for_DRUM/odba_temp.csv"))
Format and filter.
temp$UTC_datetime<-mdy_hm(temp$UTC_datetime)
temp<-temp %>% filter(datatype=="GPS")
temp<-temp %>% filter(UTC_datetime>"2021-04-13 15:00:00")
temp<-temp %>%
select(UTC_datetime, temperature_C)
temp<-temp %>% drop_na()
Create burst IDs for each hour
temp$hour_index<-temp %>%
group_indices(burst=cut(UTC_datetime, "1 hour"))
temp<-temp %>%
group_by(hour_index) %>%
mutate(burst_count=n())
Check that the time-series isn’t missing a lot of data. There should be 4 data point for each hourly burst because sensors were programmed to take a temperature every 15 minutes.
table(temp$burst_count)
##
## 3 4
## 3 1436
Looks good.
Calculate hourly temperature values and filter down to just hourly values
temp<-temp %>%
group_by(hour_index) %>%
mutate(hr_temp=mean(temperature_C))
temp<-temp %>%
distinct(hr_temp, hour_index)
# filter so the time period is the same as for the accelerometer data
temp<-temp %>%
filter(hour_index<236)
Plot the data
ggplot(temp, aes(hour_index, hr_temp))+geom_point()
This example is much less straightforward than the ODBA example. One thing we have observed about sensor data in the case of mortalities is that the collar will shift from tracking the temperature on the swan (not internal temp, but not ambient) to tracking the ambient air temperature which produces a more prolonged cyclical pattern. A common approach for fitting these kind of patterns is to use autoregressive terms, which fit to ‘lagged correlation’.
acf(temp$hr_temp, main="Autocorrelation Function of Temperature time series")
A plot of the autocorrelation function (ACF) of the time series confirms that there is a high degree of autocorrelation. The correlation between points at varying lag lengths are shown. Values beyond the dashed blue lines indicate statistically significant correlations. Data without autocorrelation should have ACF values near 0.
Partial autocorrelation functions (PACF) can be useful for choosing an appropriate time series model. A PACF plot shows the correlation between any two observation that shorter lags do not explain. Because we are interested in modeling the change in autocorrelation, I’ll look at the PACF between the two suspected segments before and after the point of mortality using the change point location from the ODBA model.
# plot separately based on changepoint from odba
pacf(temp$hr_temp[1:50], main="Partial Autocorrelation Function before potential mortality")
The PACF plot suggests that a first-order autoregressive term is most appropriate for the first segment.
pacf(temp$hr_temp[51:235], main="Partial Autocorrelation Function after potential mortality")
The PACF plot suggests that a third-order autoregressive term is most appropriate for the second segment.
Below, I fit a series of different time series models to see which best fits the data.
# Model syntax
# Simple change in intercepts
m1<-list(
hr_temp~1,
~1
)
# Change in variability
m2<-list(
hr_temp~1,
~1+sigma(1)
)
# Change in autocorrelation
# Separate first order AR term for each segment
m3<-list(
hr_temp~1+ar(1),
~1+ar(1)
)
# Going off the PACF plots, a first-order AR term for the
# first segment and a third-order term for the second segment.
m4<-list(
hr_temp~1+ar(1),
~1+ar(3)
)
# Fit models
f1<-mcp(m1, temp, par_x="hour_index")
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 235
## Unobserved stochastic nodes: 4
## Total graph size: 2840
##
## Initializing model
f2<-mcp(m2, temp, par_x="hour_index")
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 235
## Unobserved stochastic nodes: 5
## Total graph size: 4247
##
## Initializing model
f3<-mcp(m3, temp, par_x="hour_index")
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 235
## Unobserved stochastic nodes: 6
## Total graph size: 4486
##
## Initializing model
f4<-mcp(m4, temp, par_x="hour_index")
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 235
## Unobserved stochastic nodes: 8
## Total graph size: 5422
##
## Initializing model
temp_mods<-list(f1, f2, f3, f4)
Leave-one-out cross validation for model comparison.
# Calculate expected log predictive density using loo
for(i in 1:length(temp_mods)){
temp_mods[[i]]$loo<-loo(temp_mods[[i]])
}
loos<-loo_compare(temp_mods[[1]]$loo,
temp_mods[[2]]$loo,
temp_mods[[3]]$loo,
temp_mods[[4]]$loo)
Visualize cross validation info in a table.
loo_df<-as.data.frame(loos[1:4, 1:2])
loo_df[,1:2]<-lapply(loo_df[,1:2], round, 2)
loo_df<-cbind.data.frame('Model Syntax'=c("AR(1) / AR(3)",
"AR(1) / AR(1)",
"Change in Intercept",
"Change in Intercept and Variance"), loo_df)
loo_df %>% flextable() %>%
align(part = "all") %>% # left align
set_header_labels(values=list("Model Syntax" = "Model Syntax",
"elpd_diff" = "ELPD Difference",
"se_diff" = "SE of Difference")) %>%
set_caption(caption = "Leave-one-out Cross Validation was used to compute the Estimated Log Predictive Density (ELPD) for 4 models Note, the models with autoregressive (AR) terms also have a change in the intercept for of the two segments. The highest ELPD for the model with change in intercepts as well as a first-order autoregressive term in the first segment and a third-order autoregressive term in the second segment indicates that it has the highest predictive accuracy.") %>%
# font(fontname = "Calibri (Body)", part = "all") %>%
fontsize(size = 12, part = "body") %>%
# add footer if you want
# add_footer_row(values = "* p < 0.05. ** p < 0.01. *** p < 0.001.",
# colwidths = 4) %>%
theme_booktabs() %>% # default theme
border_outer() %>%
vline(part="header") %>%
align(align ='center', part = 'all') %>%
bg(part="header", bg="#dbdbdb") %>%
bold(part="header") %>%
colformat_num(digits = 2) %>%
autofit()
Model Syntax | ELPD Difference | SE of Difference |
AR(1) / AR(3) | 0.0 | 0.0 |
AR(1) / AR(1) | -14.8 | 5.6 |
Change in Intercept | -230.1 | 25.0 |
Change in Intercept and Variance | -241.1 | 23.4 |
The model with a AR(1) term in the first segment and a AR(3) term in the second segment had the lowest expected log predictive density (ELPD) and was therefore chosen as best.
Model summary
summary(temp_mods[[4]])
## Family: gaussian(link = 'identity')
## Iterations: 9000 from 3 chains.
## Segments:
## 1: hr_temp ~ 1 + ar(1)
## 2: hr_temp ~ 1 ~ 1 + ar(3)
##
## Population-level parameters:
## name mean lower upper Rhat n.eff
## ar1_1 0.69 0.57 0.81 1 4812
## ar1_2 0.96 0.88 1.00 1 765
## ar2_2 0.49 0.23 0.75 1 221
## ar3_2 -0.59 -0.81 -0.37 1 248
## cp_1 50.50 50.04 50.99 1 7115
## int_1 17.52 15.83 19.27 1 4151
## int_2 3.59 1.58 5.78 1 3843
## sigma_1 2.11 1.93 2.32 1 5391
Plot fitted model
plot(temp_mods[[4]], q_fit=T)
The first segment, in which the live swan temperature is correlated with high variability can be expressed as: \[ \mu_{i}=\beta_{0}+\epsilon_{i} \] with \[ \epsilon_{i}=\phi\epsilon_{i-1}+\omega_{i} \] and \[ \omega_{i}\sim_{iid}\mathcal{N}(0, \sigma^{2}) \]
where \(\phi\), is the partial autocorrelation parameter.
The second segment can be modeled as: \[ \mu_{i}=\beta_{0}+\epsilon_{i} \] with \[ \epsilon_{i}=\phi_{1}\epsilon_{i-1}+\phi_{2}\epsilon_{i-2}+\phi_{3}\epsilon_{i-3}+\omega_{i} \] and \[ \omega_{i}\sim_{iid}\mathcal{N}(0, \sigma^{2}) \]
Plot both models to verify that they each identify the correct breakpoint at the point of mortality.
p1<-plot(fit2, q_fit=T)+
ggtitle("Overall Dynamic Body Acceleration")+
xlab("Hourly Index")+ylab("Mean hourly ODBA value")+
theme_bw(base_size = 14)+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.title.x=element_blank())
p2<-plot(temp_mods[[4]], q_fit = T)+
ggtitle("Temperature")+
xlab("\nHourly Index")+ylab("Mean hourly temp (in Celsius)\n")+
theme_bw(base_size = 14)+
theme(plot.title=element_text(hjust=0.5))
cowplot::plot_grid(p1, p2, align = "v", axis="l", ncol=1)
Footer
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] flextable_0.6.8 loo_2.4.1 ezknitr_0.6 knitr_1.30
## [5] lubridate_1.8.0 mcp_0.3.2 here_0.1 forcats_0.5.1
## [9] stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4 readr_2.1.2
## [13] tidyr_1.2.0 tibble_3.1.6 tidyverse_1.3.1 ggplot2_3.3.5
##
## loaded via a namespace (and not attached):
## [1] matrixStats_0.57.0 fs_1.5.0 httr_1.4.2
## [4] rprojroot_1.3-2 tensorA_0.36.2 tools_4.0.2
## [7] backports_1.1.10 bslib_0.3.1 utf8_1.1.4
## [10] R6_2.4.1 ggdist_3.1.1 DBI_1.1.1
## [13] colorspace_1.4-1 withr_2.3.0 tidyselect_1.1.2
## [16] compiler_4.0.2 cli_3.2.0 rvest_1.0.2
## [19] HDInterval_0.2.2 arrayhelpers_1.1-0 xml2_1.3.2
## [22] officer_0.4.0 labeling_0.3 posterior_1.2.1
## [25] sass_0.4.0 scales_1.1.1 checkmate_2.0.0
## [28] systemfonts_0.2.3 digest_0.6.28 rmarkdown_2.11
## [31] base64enc_0.1-3 pkgconfig_2.0.3 htmltools_0.5.2
## [34] dbplyr_2.1.1 fastmap_1.1.0 rlang_1.0.2
## [37] readxl_1.3.1 rstudioapi_0.13 farver_2.0.3
## [40] jquerylib_0.1.4 generics_0.0.2 svUnit_1.0.3
## [43] jsonlite_1.8.0 distributional_0.3.0 zip_2.1.1
## [46] magrittr_2.0.1 patchwork_1.1.1 Rcpp_1.0.8
## [49] munsell_0.5.0 fansi_0.4.1 abind_1.4-5
## [52] gdtools_0.2.2 lifecycle_1.0.1 stringi_1.5.3
## [55] yaml_2.2.1 grid_4.0.2 parallel_4.0.2
## [58] crayon_1.5.0 lattice_0.20-41 cowplot_1.1.1
## [61] haven_2.3.1 hms_1.1.1 pillar_1.7.0
## [64] uuid_0.1-4 codetools_0.2-16 reprex_2.0.1
## [67] glue_1.6.2 evaluate_0.14 data.table_1.13.0
## [70] modelr_0.1.8 vctrs_0.3.8 tzdb_0.2.0
## [73] cellranger_1.1.0 gtable_0.3.0 assertthat_0.2.1
## [76] xfun_0.26 broom_0.7.12 coda_0.19-3
## [79] rjags_4-10 tidybayes_3.0.2 ellipsis_0.3.2