boostrap_d2c.utf8.md

Cluster-level bootstrap for distance-to-centroid values

Load libraries

library(tidyverse)
library(lubridate)
library(tlocoh)
library(sp)
library(reshape2)

Source external script for ‘distance-to-centroid’ function

source("scripts/distance_to_centroid_function.R")

Import dataset

trim<-read_csv("processed_data/nsd_db100_recurse.csv")
df<-read_csv("processed_data/nsd_db100.csv")

A few housekeeping issues and a few outliers not caught in the data cleaning script

colnames(df)[5]<-'timestamp'
colnames(df)[1]<-'id'
trim1<-left_join(trim, df[,c('id', 'timestamp', 'utm_x', 'utm_y')])
trim1$juliand<-yday(trim1$timestamp)
trim1$year<-year(trim1$timestamp)

trim1<-trim1%>%filter(id!="3A  (Barb Wire)") # issues with data feed; included locations before capture
trim1<-trim1%>%filter(id!='0M (N of Mud Lake)')
trim1<-trim1%>%filter(!(id=="7E (Nora Township colt)"&year==2015&juliand==205)) #day of capture; locations before capture included
trim1<-trim1%>%filter(!(id=="7E (Nora Township colt)"&year==2015&juliand==206))  # day after capture
trim1<-trim1%>%filter(!(id=="7E (Nora Township colt)"&year==2018&juliand==114)) # an outlier

The function that calculuates the distance to centroid is an edit of a function in the t-locoh package, so it’s easiest to create a t-locoh object out of the dataset before further filtering.

# make spatial object
df.sp<-SpatialPoints(trim1[,c('utm_x', 'utm_y')], 
                     proj4string = CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5
                                  +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs " ))
utms<-coordinates(df.sp)

dt<-trim1$timestamp
# create t-locoh object
df.lxy<-xyt.lxy(xy = utms, dt = dt,
                  id=trim1$id, 
                proj4string = CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5
                                  +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs " ))

Now loop through all ID’s and years and pull out the D2C information and put everything back together. Code is show commented out to suppress the ‘distance-to-centroid’ output from each individual being shown. If rerunning, remove all hash marks from this chunk or delete the whole thing and change the chunk options in the next chunk to include generate output (i.e. delete ‘include=FALSE’)

# trim1$year<-year(trim1$timestamp)
# 
# uids<-unique(trim1$id)
# results<-NULL
# tmp.yr<-NULL
# tmp.db<-NULL
# tmp.loco<-NULL
# tmp.d2c<-NULL
# tmp.sp<-NULL
# utms<-NULL
# years<-NULL
# tmp.crane.db<-NULL
# 
# for(i in 1:length(uids)){
#   tmp.db<-trim1%>%filter(id==uids[i])
#   years<-unique(tmp.db$year)
#   for(j in 1:length(years)){
#     tmp.yr<-tmp.db%>%filter(year==years[j])
#     tmp.sp<-SpatialPoints(tmp.yr[,c('utm_x', 'utm_y')], 
#                           proj4string = CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5
#                                             +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs " ))
#     utms<-coordinates(tmp.sp)
#     tmp.loco<-xyt.lxy(xy=utms, dt = tmp.yr$timestamp, id=tmp.yr$id,
#                       proj4string =CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5
#                                   +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs "))
#     tmp.d2c<-d2ctr(tmp.loco)
#     colnames(tmp.d2c)[1]<-'timestamp'
#     tmp.yr<-left_join(tmp.yr, tmp.d2c)
#     tmp.crane.db<-rbind(tmp.crane.db, tmp.yr)
#     }
#   results<-rbind(results, tmp.crane.db)
#   tmp.crane.db<-NULL
# }

As far as modeling d2c as response values, a gam with a smoother term including julian day by age-category didn’t converge after 7 hours.
A much more stripped down version with monthly d2c averages by age category didn’t have enough variation to portray the relationship well.

So, instead, here is a cluster-level bootstrap where the level of ‘cluster’ is each different age category.

set.seed(1234)
results$month<-month(results$timestamp)
res<-results
uid<-unique(res$id)
ncranes<-length(uid)
boot.out<-NULL
nboots<-1000

df1<-res%>%
  group_by(id, month)%>%
  mutate(mean.d2c=mean(dist2centroid/1000),
         med.d2c=median(dist2centroid/1000))

for(i in 1:nboots){
  # create bootstrap
  bootIDs <- data.frame(id = sample(x = uid, size = ncranes, replace = TRUE))
  bootDat <- left_join(bootIDs, df1)
  
  #cluster-level is the age categories
  tmp<-bootDat%>%
    group_by(age_yr, month)%>%
    summarise(month.means=mean(mean.d2c),
              month.medians=mean(med.d2c))
  boot.out<-rbind(boot.out, tmp) 
}

std.err<-function(x){
  sd(x)/sqrt(sum(!is.na(x)))
}
bc<-boot.out%>%
  group_by(age_yr, month)%>%
  summarize(pop.mean.d2c=mean(month.means),
            pop.median.d2c=median(month.medians),
            pop.mean.se=std.err(month.means),
            pop.median.se=std.err(month.medians))

Now to do some wrangling to get the dataframe into what ggplot expects.

bc$month<-as.factor(bc$month)
bc1<-melt(bc)

## Using age_yr, month as id variables

# make separate columns for lower and upper CI limits
bc1$CI.lower<-NA
bc1$CI.upper<-NA

# mean d2c  (the average of the age-category mean of the mean individual monthly d2c value)
for(i in 1:44){
  bc1[i,5]<-bc1[i,4]-1.96*bc1[i+88,4]
  bc1[i,6]<-bc1[i,4]+1.96*bc1[i+88,4]
}

# median d2c (the median of the age-category mean of the individual median monthly d2c value)
for(i in 45:88){
  bc1[i,5]<-bc1[i,4]-1.96*bc1[i+88,4]
  bc1[i,6]<-bc1[i,4]+1.96*bc1[i+88,4]
}

# don't need the rows with standard errors anymore
bc2<-bc1[1:88,]

# Don't need October-December since this is focusing on breeding season
bc2$month<-as.numeric(as.character(bc2$month))
bc3<-bc2%>%filter(month<10)

# Just retain the means, discard the medians
means<-bc3%>%filter(variable=='pop.mean.d2c')
means$month<-as.factor(means$month)

# rename the age categories values to not be abbreviated
means<-means%>%
  mutate(age_yr=case_when(
    age_yr=='hatch-yr'~'Hatch-Year',
    age_yr=='second-yr'~'Second-Year',
    age_yr=='third-yr'~'Third-Year',
    age_yr=='fourth-yr'~'Fourth-Year',
    age_yr=='adult'~'Adult'
  ))
means$age_yr<-factor(means$age_yr, levels=c('Hatch-Year', 'Second-Year', 'Third-Year', 'Fourth-Year', 'Adult'))

# Remove hatch-year data
sub<-means[means$age_yr!='Hatch-Year',]
sub$age_yr<-factor(sub$age_yr, levels=c('Second-Year', 'Third-Year', 'Fourth-Year', 'Adult'))

Plot

ggplot(sub, aes(x=month, y=value, color=age_yr, group=age_yr))+
  geom_errorbar(aes(ymin=CI.lower, ymax=CI.upper), width=0.1)+
  geom_line(size=0.8)+geom_point()+ylab('Distance-to-Centroid (in kilometers)\n')+
  scale_x_discrete(labels=c('3'='March', '4'='April', '5'='May', '6'='June', '7'='July', '8'='August', '9'='September'))+
  labs(color='Age Category')+xlab("Month")+theme_bw()

sessionInfo()

## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18362)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] reshape2_1.4.3  tlocoh_1.40.07  sp_1.3-1        lubridate_1.7.4
##  [5] forcats_0.4.0   stringr_1.4.0   dplyr_0.8.3     purrr_0.3.3    
##  [9] readr_1.3.1     tidyr_1.0.0     tibble_2.1.3    ggplot2_3.2.1  
## [13] tidyverse_1.2.1
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5 xfun_0.10        pbapply_1.4-2    haven_2.1.1     
##  [5] lattice_0.20-38  colorspace_1.4-1 vctrs_0.2.0      generics_0.0.2  
##  [9] htmltools_0.4.0  yaml_2.2.0       rlang_0.4.0      pillar_1.4.2    
## [13] glue_1.3.1       withr_2.1.2      modelr_0.1.5     readxl_1.3.1    
## [17] plyr_1.8.4       lifecycle_0.1.0  munsell_0.5.0    gtable_0.3.0    
## [21] cellranger_1.1.0 rvest_0.3.4      evaluate_0.14    labeling_0.3    
## [25] knitr_1.25       parallel_3.6.0   broom_0.5.2      Rcpp_1.0.2      
## [29] scales_1.0.0     backports_1.1.5  jsonlite_1.6     FNN_1.1.3       
## [33] hms_0.5.1        digest_0.6.22    stringi_1.4.3    grid_3.6.0      
## [37] rgdal_1.4-6      cli_1.1.0        tools_3.6.0      magrittr_1.5    
## [41] lazyeval_0.2.2   crayon_1.3.4     pkgconfig_2.0.3  zeallot_0.1.0   
## [45] xml2_1.2.2       assertthat_0.2.1 rmarkdown_1.16   httr_1.4.1      
## [49] rstudioapi_0.10  R6_2.4.0         nlme_3.1-139     compiler_3.6.0