#' # Analysis of Elk data from WA
#'
#' Programmer:  John Fieberg
#' 
#' R Code in support of Fieberg et al. Do Capture and Survey Methods Influence Whether Marked Animals are Representative of Unmarked Animals?  

#' Steps:  
#'  1.  Created new input files (without headers or lines at the end, so I could read in the data using functions in RMark)
#'  2.  Transformed the capture history character variable into a "long" data frame with 1 observation for each survey in which the capture field was not "."
#'  3.  Created a Year variable assoicated with each survey.  The first 2 obs. go with year 1, the next two with year 2, etc...
#'  4.  Created a capure year variable:  the value was set equal to the first year in which the capture history was a 0 or 1.
#'  5.  Modeled detection (0,1) as a function of time since capture, included a random effect for individual, and other relevant predictors.
#'  
  rm(list = ls())

#+ warning=FALSE, message=FALSE
# Load libraries
  library(RMark)
  library(knitr)
  library(lme4)
  library(ggplot2)
  library(doBy)
  
# set global option for output
  opts_chunk$set(comment=NA, fig.width=6, fig.height=6)

#' ## Mt. St. Helens data 
  MSH<-import.chdata("../Data/MSH.inp", header=F, field.names=c("ch", "AdF", "BA"))
  head(MSH)
  
#' ## Create data set for R
#' There are 8 capture histories (some of which are missing), create a "long" data set from this file.
#+ warning=F 
  cdat.MSH<-NULL
  for(i in 1:nrow(MSH)){
    temp<-MSH[i,]
    first.cap=NA
    for(j in 1:8){
      temp2<-as.numeric(substr(temp$ch,j,j))
      if(is.na(temp2)==F){
        if(is.na(first.cap)==T){first.cap<-(j+1)%/%2}#first.cap<-j  note, first captures were all in week 1
        cdat.MSH<-rbind(cdat.MSH, data.frame(id=rownames(temp), ID=i, year=(j+1)%/%2, week=(j+1)%%2+1, observed=temp2, first.cap=first.cap, IAdF=temp$AdF))}
    }
  }
  
# Time since capture (assumes week=1 or 2 does not matter...)
  cdat.MSH$time.cap<-cdat.MSH$year-cdat.MSH$first.cap
  head(cdat.MSH, 20)
  
#' ### Plots and analyses
#' Lets just plot the proportion seen versus year since capture. 
#' 
  par(bty="L")
  plot(0:3,prop.table(table(cdat.MSH$observed, cdat.MSH$time.cap), 2)[2,], ylim=c(0,1), xlab="Years since capture", ylab="Proportion observed")  

#' No apparent relationship, but to be sure, lets fit a few mixed binary regression models.
#' 
#' ## Mixed models 
#' First, look at a model with Year (as a factor), Week, ID (random)+time since capture + AdF
  fit.temp1a<-glmer(observed~time.cap+as.factor(week)+as.factor(year)+(1|id)+IAdF, family=binomial(), data=cdat.MSH)
  summary(fit.temp1a)

#'  Not much there, but perhaps a significant sex effect (AdF).  Also, there is quite a bit of variation from individual to individual.
#'  Lets try modeling year and week as random
  fit.temp1b<-glmer(observed~time.cap+(1 | year/week)  +(1|id)+IAdF, family=binomial(), data=cdat.MSH)
  summary(fit.temp1b)

  
#'  Lets try dropping the year and week effects, which have small variance components.  
#'  Lets also consider a random slope effect for time since capture.  I.e., are there some individuals that
#'  happen to respond behaviorally more than others?  
  fit.temp2<-glmer(observed~time.cap+(1+time.cap|id)+IAdF, family=binomial(), data=cdat.MSH)
  summary(fit.temp2)
  
  
#' Conculsion regarding time.cap is robust to how we look at the data.  Lets construct a plot to look at
#'  individual-level variation in predicted probabilities of detection.  Appears to be significant variation
#' among individuals in their propensity to be detected. 
   cdat.MSH$phat<-predict(fit.temp2, type="resp")
   qplot(time.cap, phat, data=cdat.MSH, geom="line", group=id, colour=IAdF, xlab="Years since capture", ylab="Predicted probability of being observed")

 
# Look at distribution of p_observed across individuals  
  pobs<-summaryBy(IAdF+observed~id, data=cdat.MSH)
  pobs[,2]<-pobs[,2]-1
  pobs[,2]<-as.character(pobs[,2])
  ggplot(pobs, aes(observed.mean))+geom_histogram(aes(y=..density..), binwidth=0.01)+facet_grid(IAdF.mean~.)
  
     
#' ## Nooksack data 
#' 
#+warning=FALSE 
  Nook<-import.chdata("../Data/Nook.inp", header=F, field.names=c("ch", "Female", "Male"))
  head(Nook)
  
#' ## Create data set for R
#' There are 8 capture histories (some of which are missing), create a "long" data set from this file.
#+ warning=F 
  cdat.NS<-NULL
  for(i in 1:nrow(Nook)){
    temp<-Nook[i,]
    first.cap=NA
    for(j in 1:16){
      temp2<-as.numeric(substr(temp$ch,j,j))
      if(is.na(temp2)==F){
        if(is.na(first.cap)==T){first.cap<-(j+1)%/%2}#first.cap<-j  note, first captures were all in week 1
        cdat.NS<-rbind(cdat.NS, data.frame(id=rownames(temp), ID=i, year=(j+1)%/%2, week=(j+1)%%2+1, observed=temp2, first.cap=first.cap, Female=temp$Female))}
    }
  }
  
# Time since capture (assumes week=1 or 2 does not matter...)
  cdat.NS$time.cap<-cdat.NS$year-cdat.NS$first.cap
  cdat.NS$Female<-as.factor(cdat.NS$Female)
  head(cdat.NS, 20)
  
#' ### Plots and analyses
#' Lets just plot the proportion seen versus year since capture. 
#' 
  par(bty="L")
  plot(0:7,prop.table(table(cdat.NS$observed, cdat.NS$time.cap), 2)[2,], ylim=c(0,1), xlab="Years since capture", ylab="Proportion observed")  

#' Nothing too strong there... 
#' 
#' ## Mixed models 
#' First, look at a model with Year (as a factor), Week, ID (random)+time since capture + sex
  fit.temp1a<-glmer(observed~time.cap+as.factor(week)+as.factor(year)+(1|id)+Female, family=binomial(), data=cdat.NS)
  summary(fit.temp1a)

#'  Pretty big year-to-year variation, females also more likely to be seen.  Week also marginallky significant.  Lets turn year and week into a random effects.
  fit.temp1a<-glmer(observed~time.cap+(1|year/week)+(1|id)+Female, family=binomial(), data=cdat.NS)
  summary(fit.temp1a)

  
#'  Now,  consider a random slope for time since capture along with the other randome effects
#' Big difference between males and females, and significant animal-to-animal variation.  No strong signature wrt time since capture.
  fit.temp2<-glmer(observed~time.cap+(1+time.cap|id)+Female+(1|year/week), family=binomial(), data=cdat.NS)
  summary(fit.temp2)
  
  
#' Lets create a plot to look at individual-level variation in predicted probabilities of detectoin.  Appears to be significant variation
#' among individuals in their propensity to be detected. 
  b.id<-ranef(fit.temp2)$id 
  names(b.id)<-c("Int", "beta.time.cap")
  b.id$id<-rownames(b.id)
  beta.hat<-fixef(fit.temp2)
  cdat.NS.new<-merge(cdat.NS, b.id, by="id", all=T)
  cdat.NS.new$phat<-plogis(cdat.NS.new$Int+beta.hat[1]+cdat.NS.new$time.cap*(cdat.NS.new$beta.time.cap+beta.hat[2]))


# Similar plots  
  qplot(time.cap, phat, data=cdat.NS.new, geom=c("line", "point"), group=id, colour=Female, xlab="Years since capture", ylab="Predicted probability of being observed")

# Look at distribution of p_observed across individuals  
  pobs<-summaryBy(Female+observed~id, data=cdat.NS)
  pobs[,2]<-pobs[,2]-1
  pobs[,2]<-as.character(pobs[,2])
  ggplot(pobs, aes(observed.mean))+geom_histogram(aes(y=..density..), binwidth=0.01)+facet_grid(Female.mean~.)
  
  
#' Now, combine everything into 1 plot
#+ fig.height=12, fig.width=12
  #x11(width=4, height=4)
  par(mfrow=c(2,2), bty="L", cex.lab=1.8, cex.axis=1.8, mar=c(5,6,4.1,2.1) , xpd=T, oma=c(0,1,0,0)) 
  with(cdat.MSH,plot(time.cap, phat, type="n", xlab="Years since capture", ylab="Probability of detection"))
  uid<-unique(cdat.MSH$id)
  luid<-length(uid)
  cols<-c("black", "gray")
  lwds<-c(2,1)
  for(i in 1:luid){
    tempdat<-cdat.MSH[cdat.MSH$i==uid[i],]
    with(tempdat,lines(time.cap, phat, col=cols[as.numeric(IAdF)], lwd=lwds[as.numeric(IAdF)]))
  }
  legend("top", inset=-3, col=c("black", "gray"), lty=1, lwd=c(2,1),bty="n", legend=c("No", "Yes"), title="Adult Female", ncol=2, cex=1.4)
  mtext(outer=F, line=2, side=3, cex=1.4, "A)", adj=0)
  
# Look at distribution of p_observed across individuals  
  pobs.MSH<-with(cdat.MSH, tapply(observed,id,mean))
  hist(pobs.MSH, xlab="", ylab="Distribution", main="")
  mtext(outer=F, line=2, side=3, cex=1.4, "B) Proportion of times observed", adj=0)
  
  with(cdat.NS.new,plot(time.cap, phat, type="n", xlab="Years since capture", ylab="Probability of Detection"))
  uid<-unique(cdat.NS.new$id)
  luid<-length(uid)
  cols<-c("black", "gray")
  lwds<-c(2,1)
  for(i in 1:luid){
    tempdat<-cdat.NS.new[cdat.NS.new$i==uid[i],]
    with(tempdat,lines(time.cap, phat,lwd=lwds[as.numeric(Female)], col=cols[as.numeric(Female)]))
  }
  mtext(outer=F, line=2, side=3, cex=1.4, "C)", adj=0)
  
# Look at distribution of p_observed across individuals  
  pobs.NS<-with(cdat.NS.new, tapply(observed,id,mean))
  hist(pobs.NS, xlab="", ylab="Distribution", main="")
  mtext(outer=F, line=2, side=3, cex=1.4, "D) Proportion of times observed", adj=0)
  
# Print out Session info
  sessionInfo()