Data augmentation

Primary Investigator: Althea ArchMiller

Collaborators: J. Fieberg, B. Dorazio, K. St. Clair

Description This document takes all of the operational survey and then augments it for analysis.

Preamble

# Documentation-related
library(devtools)
library(knitr)
library(ezknitr)

# Analysis-related
library(doBy)
library(tidyr)
library(dplyr)

# Remove environment and set seed
remove(list=ls())
set.seed(4587)

Year(s) of analysis

year <- 2005:2016

Load Files

Operational survey data

Note: This data has been updated by the DNR post-publication, some slight changes from QA/QC processing

obsdat <- read.csv("data/moose0416.csv")

Operational survey sampling characteristics

srates <- read.csv("data/srates0416.csv")

Only keep the appropriate year(s) for analysis

srates <- srates[srates$year %in% year,]
obsdat <- obsdat[obsdat$year %in% year,]

Delete out stratum 4

obsdat <- obsdat[obsdat$stratum!=4,]
srates <- srates[srates$stratum!=4,]

Define year code relative to first year of analysis

srates$yrcode <- srates$year - (min(year)-1)
obsdat$yrcode <- obsdat$year - (min(year)-1)

Data processing

Sampling characteristics

nh <- srates$nh # number of sampled plots
Nh <- srates$Nh # number of total plots
yrh <- srates$yrcode 
nyrL <- length(year) # total number of years in model
nstrata <- unique(srates$stratum)
nstrataL <- length(nstrata)

Variables from operational survey data

y <- obsdat$total
x <- obsdat$voc/100
h <- obsdat$stratum
plotid <- obsdat$subunit
yr <- obsdat$yrcode

Model parameters

for these, have to update for the records that were from plots that were sampled but NO moose were detected

q <- ifelse(y==0, NA, 1) # latent variable for presence (re: data augmentation)
z <- ifelse(y==0, 0, 1) # latent variable for detection 
y <- ifelse(y==0, NA, y) # when no moose were detected, change group size to NA

Data augmentation

Bh is the size of the augmented population in each plots given the stratum h. When a plot was sampled and moose were detected, augment with Bh-mi animals; when a plot was sampled, and no moose were detected, augment with Bh-1 animals

Augmented data will have:

B <- c(40, 60, 100)

Augment data

for(yy in 1:nyrL){
  # Operational survey for year yy
  odyr <- obsdat[obsdat$yrcode==yy,]

  # Sampled plots in year yy
  samp.plot <- unique(odyr$subunit)

  # for each sampled plot pp
  for(pp in 1:length(samp.plot)){
    # Data from plot pp (will be empty if no moose were detected)
    tempd <- odyr[odyr$subunit==samp.plot[pp] & odyr$total!=0,]
    temph <- unique(odyr$stratum[odyr$subunit==samp.plot[pp]])

    # number of detected moose per plot pp
    mi <- nrow(tempd)

    ### if no moose were observed, nm = Bh-1
    ### if moose were observed, nm = Bh-mi
    nm <- ifelse(mi==0, B[temph] - 1, B[temph] - mi)

    # Augment data 
    y <- c(y, rep(NA, nm))
    x <- c(x, rep(NA, nm))
    q <- c(q, rep(NA, nm))
    z <- c(z, rep(0, nm))
    plotid <- c(plotid, rep(samp.plot[pp], nm))
    h <- c(h, rep(temph, nm))
    yr <- c(yr, rep(yy, nm))
  }
}

Variable updating and sorting for models

Total number of groups in augmented dataset

Ngroups <- length(y)

Shift y for Poisson

ym1 <- y-1

Sort data

yrsort <- order(yr)
y <- y[yrsort]
x <- x[yrsort]
h <- h[yrsort]
q <- q[yrsort]
z <- z[yrsort]
yr <- yr[yrsort]
ym1 <- ym1[yrsort]
plotid <- plotid[yrsort]

Add 0.001 to x

x[x==0] <- 0.001

Other variables for model

plot.yr <- paste(plotid, yr, sep=":")
Nplots.yr <- length(unique(plot.yr))
year.aug <- yr + (min(year)-1)

Bind augmented operational data together

operdata.augmented.temp <- cbind.data.frame(year.aug, yr, plotid, h, 
                                            y, ym1, x, z, q, plot.yr)

View examples of augmented operational data set

For plot 1, should have h=1 for all sampled years

table(operdata.augmented.temp$h[operdata.augmented.temp$plotid==1])
## 
##   1 
## 120
table(operdata.augmented.temp$year.aug[operdata.augmented.temp$plotid==1])
## 
## 2006 2008 2013 
##   40   40   40

For plot 7, operational data-designated stratum changes from year to year (although for 2016-GIS data, stratum 1 for every year)–see discrepancies in data/transmittals/toDNR20170310/

table(operdata.augmented.temp$h[operdata.augmented.temp$plotid==7])
## 
##  1  2 
## 40 60
table(operdata.augmented.temp$year.aug[operdata.augmented.temp$plotid==7])
## 
## 2007 2011 
##   60   40

Convert augmented operational survey dataset into plot-level data

library(dplyr)
plotdata.augmented <- 
  operdata.augmented.temp %>% distinct(year.aug, plotid, .keep_all = TRUE)

plotdata.augmented[plotdata.augmented$plotid==1,]
##     year.aug yr plotid h  y ym1  x z  q plot.yr
## 37      2006  2      1 1 NA  NA NA 0 NA     1:2
## 114     2008  4      1 1 NA  NA NA 0 NA     1:4
## 314     2013  9      1 1 NA  NA NA 0 NA     1:9
plotdata.augmented[plotdata.augmented$plotid==7,]
##     year.aug yr plotid h  y ym1  x z  q plot.yr
## 74      2007  3      7 2 NA  NA NA 0 NA     7:3
## 234     2011  7      7 1 NA  NA NA 0 NA     7:7

Add sequential integer as an index across years

plotdata.augmented$plot.index.xYrs <- 1:nrow(plotdata.augmented)

Remove columns from plot level data except plot.yr, plotid, year, stratum, index

colnames(plotdata.augmented)
##  [1] "year.aug"        "yr"              "plotid"         
##  [4] "h"               "y"               "ym1"            
##  [7] "x"               "z"               "q"              
## [10] "plot.yr"         "plot.index.xYrs"
plotdata.augmented <- plotdata.augmented[,c(1,2,3,4,10,11)]
colnames(plotdata.augmented)
## [1] "year.aug"        "yr"              "plotid"          "h"              
## [5] "plot.yr"         "plot.index.xYrs"

Add index back onto augmented dataset

operdata.augmented <- left_join(operdata.augmented.temp, 
                                plotdata.augmented, 
                                by=c("year.aug", "yr", "plotid", "h", "plot.yr"))

View examples of augmented operational data set

For plot 1, should have h=1 for all sampled years

table(operdata.augmented$h[operdata.augmented$plotid==1])
## 
##   1 
## 120
table(operdata.augmented$year.aug[operdata.augmented$plotid==1])
## 
## 2006 2008 2013 
##   40   40   40
table(operdata.augmented$plot.index.xYrs[operdata.augmented$plotid==1])
## 
##  37 114 314 
##  40  40  40

For plot 7, operational data-designated stratum changes from year to year (although for 2016-GIS data, stratum 1 for every year)–see discrepancies in data/transmittals/toDNR20170310/

table(operdata.augmented$h[operdata.augmented$plotid==7])
## 
##  1  2 
## 40 60
table(operdata.augmented$year.aug[operdata.augmented$plotid==7])
## 
## 2007 2011 
##   60   40
table(operdata.augmented$plot.index.xYrs[operdata.augmented$plotid==7])
## 
##  74 234 
##  60  40

Save data

save(operdata.augmented, file="data/augdata.Rdata")
save(plotdata.augmented, file="data/augdata_plotlvl.Rdata")

Footer

Session Information

devtools::session_info()
## Session info --------------------------------------------------------------
##  setting  value                       
##  version  R version 3.3.2 (2016-10-31)
##  system   x86_64, darwin13.4.0        
##  ui       RStudio (1.0.136)           
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2017-03-29
## Packages ------------------------------------------------------------------
##  package     * version date       source        
##  abind         1.4-5   2016-07-21 CRAN (R 3.3.0)
##  assertthat    0.1     2013-12-06 CRAN (R 3.3.0)
##  boot          1.3-18  2016-02-23 CRAN (R 3.3.2)
##  coda        * 0.19-1  2016-12-08 CRAN (R 3.3.2)
##  DBI           0.5-1   2016-09-10 CRAN (R 3.3.0)
##  devtools    * 1.12.0  2016-06-24 CRAN (R 3.3.0)
##  digest        0.6.12  2017-01-27 CRAN (R 3.3.2)
##  doBy        * 4.5-15  2016-03-31 CRAN (R 3.3.0)
##  dplyr       * 0.5.0   2016-06-24 CRAN (R 3.3.0)
##  evaluate      0.10    2016-10-11 CRAN (R 3.3.0)
##  ezknitr     * 0.6     2016-09-16 CRAN (R 3.3.0)
##  knitr       * 1.15.1  2016-11-22 CRAN (R 3.3.2)
##  lattice       0.20-34 2016-09-06 CRAN (R 3.3.2)
##  lazyeval      0.2.0   2016-06-12 CRAN (R 3.3.0)
##  magrittr      1.5     2014-11-22 CRAN (R 3.3.0)
##  markdown      0.7.7   2015-04-22 CRAN (R 3.3.0)
##  MASS          7.3-45  2016-04-21 CRAN (R 3.3.2)
##  Matrix        1.2-7.1 2016-09-01 CRAN (R 3.3.2)
##  memoise       1.0.0   2016-01-29 CRAN (R 3.3.0)
##  R.methodsS3   1.7.1   2016-02-16 CRAN (R 3.3.0)
##  R.oo          1.21.0  2016-11-01 CRAN (R 3.3.0)
##  R.utils       2.5.0   2016-11-07 CRAN (R 3.3.0)
##  R2jags      * 0.5-7   2015-08-23 CRAN (R 3.3.0)
##  R2WinBUGS     2.1-21  2015-07-30 CRAN (R 3.3.0)
##  R6            2.2.0   2016-10-05 CRAN (R 3.3.0)
##  Rcpp          0.12.9  2017-01-14 CRAN (R 3.3.2)
##  rjags       * 4-6     2016-02-19 CRAN (R 3.3.0)
##  rstudioapi    0.6     2016-06-27 CRAN (R 3.3.0)
##  stringi       1.1.2   2016-10-01 CRAN (R 3.3.0)
##  stringr       1.1.0   2016-08-19 CRAN (R 3.3.0)
##  tibble        1.2     2016-08-26 CRAN (R 3.3.0)
##  tidyr       * 0.6.1   2017-01-10 CRAN (R 3.3.2)
##  withr         1.0.2   2016-06-20 CRAN (R 3.3.0)

spun with: ezknitr::ezspin(“programs/ms_programs/b_augment_data.R”, out_dir = “output”, fig_dir = “figures”, keep_md = F)