#' # Analysis of Elk data from WA #' #' Programmer: John Fieberg #' #' R Code in support of Fieberg et al. Do Capture and Survey Methods Influence Whether Marked Animals are Representative of Unmarked Animals? #' Steps: #' 1. Created new input files (without headers or lines at the end, so I could read in the data using functions in RMark) #' 2. Transformed the capture history character variable into a "long" data frame with 1 observation for each survey in which the capture field was not "." #' 3. Created a Year variable assoicated with each survey. The first 2 obs. go with year 1, the next two with year 2, etc... #' 4. Created a capure year variable: the value was set equal to the first year in which the capture history was a 0 or 1. #' 5. Modeled detection (0,1) as a function of time since capture, included a random effect for individual, and other relevant predictors. #' rm(list = ls()) #+ warning=FALSE, message=FALSE # Load libraries library(RMark) library(knitr) library(lme4) library(ggplot2) library(doBy) # set global option for output opts_chunk$set(comment=NA, fig.width=6, fig.height=6) #' ## Mt. St. Helens data MSH<-import.chdata("../Data/MSH.inp", header=F, field.names=c("ch", "AdF", "BA")) head(MSH) #' ## Create data set for R #' There are 8 capture histories (some of which are missing), create a "long" data set from this file. #+ warning=F cdat.MSH<-NULL for(i in 1:nrow(MSH)){ temp<-MSH[i,] first.cap=NA for(j in 1:8){ temp2<-as.numeric(substr(temp$ch,j,j)) if(is.na(temp2)==F){ if(is.na(first.cap)==T){first.cap<-(j+1)%/%2}#first.cap<-j note, first captures were all in week 1 cdat.MSH<-rbind(cdat.MSH, data.frame(id=rownames(temp), ID=i, year=(j+1)%/%2, week=(j+1)%%2+1, observed=temp2, first.cap=first.cap, IAdF=temp$AdF))} } } # Time since capture (assumes week=1 or 2 does not matter...) cdat.MSH$time.cap<-cdat.MSH$year-cdat.MSH$first.cap head(cdat.MSH, 20) #' ### Plots and analyses #' Lets just plot the proportion seen versus year since capture. #' par(bty="L") plot(0:3,prop.table(table(cdat.MSH$observed, cdat.MSH$time.cap), 2)[2,], ylim=c(0,1), xlab="Years since capture", ylab="Proportion observed") #' No apparent relationship, but to be sure, lets fit a few mixed binary regression models. #' #' ## Mixed models #' First, look at a model with Year (as a factor), Week, ID (random)+time since capture + AdF fit.temp1a<-glmer(observed~time.cap+as.factor(week)+as.factor(year)+(1|id)+IAdF, family=binomial(), data=cdat.MSH) summary(fit.temp1a) #' Not much there, but perhaps a significant sex effect (AdF). Also, there is quite a bit of variation from individual to individual. #' Lets try modeling year and week as random fit.temp1b<-glmer(observed~time.cap+(1 | year/week) +(1|id)+IAdF, family=binomial(), data=cdat.MSH) summary(fit.temp1b) #' Lets try dropping the year and week effects, which have small variance components. #' Lets also consider a random slope effect for time since capture. I.e., are there some individuals that #' happen to respond behaviorally more than others? fit.temp2<-glmer(observed~time.cap+(1+time.cap|id)+IAdF, family=binomial(), data=cdat.MSH) summary(fit.temp2) #' Conculsion regarding time.cap is robust to how we look at the data. Lets construct a plot to look at #' individual-level variation in predicted probabilities of detection. Appears to be significant variation #' among individuals in their propensity to be detected. cdat.MSH$phat<-predict(fit.temp2, type="resp") qplot(time.cap, phat, data=cdat.MSH, geom="line", group=id, colour=IAdF, xlab="Years since capture", ylab="Predicted probability of being observed") # Look at distribution of p_observed across individuals pobs<-summaryBy(IAdF+observed~id, data=cdat.MSH) pobs[,2]<-pobs[,2]-1 pobs[,2]<-as.character(pobs[,2]) ggplot(pobs, aes(observed.mean))+geom_histogram(aes(y=..density..), binwidth=0.01)+facet_grid(IAdF.mean~.) #' ## Nooksack data #' #+warning=FALSE Nook<-import.chdata("../Data/Nook.inp", header=F, field.names=c("ch", "Female", "Male")) head(Nook) #' ## Create data set for R #' There are 8 capture histories (some of which are missing), create a "long" data set from this file. #+ warning=F cdat.NS<-NULL for(i in 1:nrow(Nook)){ temp<-Nook[i,] first.cap=NA for(j in 1:16){ temp2<-as.numeric(substr(temp$ch,j,j)) if(is.na(temp2)==F){ if(is.na(first.cap)==T){first.cap<-(j+1)%/%2}#first.cap<-j note, first captures were all in week 1 cdat.NS<-rbind(cdat.NS, data.frame(id=rownames(temp), ID=i, year=(j+1)%/%2, week=(j+1)%%2+1, observed=temp2, first.cap=first.cap, Female=temp$Female))} } } # Time since capture (assumes week=1 or 2 does not matter...) cdat.NS$time.cap<-cdat.NS$year-cdat.NS$first.cap cdat.NS$Female<-as.factor(cdat.NS$Female) head(cdat.NS, 20) #' ### Plots and analyses #' Lets just plot the proportion seen versus year since capture. #' par(bty="L") plot(0:7,prop.table(table(cdat.NS$observed, cdat.NS$time.cap), 2)[2,], ylim=c(0,1), xlab="Years since capture", ylab="Proportion observed") #' Nothing too strong there... #' #' ## Mixed models #' First, look at a model with Year (as a factor), Week, ID (random)+time since capture + sex fit.temp1a<-glmer(observed~time.cap+as.factor(week)+as.factor(year)+(1|id)+Female, family=binomial(), data=cdat.NS) summary(fit.temp1a) #' Pretty big year-to-year variation, females also more likely to be seen. Week also marginallky significant. Lets turn year and week into a random effects. fit.temp1a<-glmer(observed~time.cap+(1|year/week)+(1|id)+Female, family=binomial(), data=cdat.NS) summary(fit.temp1a) #' Now, consider a random slope for time since capture along with the other randome effects #' Big difference between males and females, and significant animal-to-animal variation. No strong signature wrt time since capture. fit.temp2<-glmer(observed~time.cap+(1+time.cap|id)+Female+(1|year/week), family=binomial(), data=cdat.NS) summary(fit.temp2) #' Lets create a plot to look at individual-level variation in predicted probabilities of detectoin. Appears to be significant variation #' among individuals in their propensity to be detected. b.id<-ranef(fit.temp2)$id names(b.id)<-c("Int", "beta.time.cap") b.id$id<-rownames(b.id) beta.hat<-fixef(fit.temp2) cdat.NS.new<-merge(cdat.NS, b.id, by="id", all=T) cdat.NS.new$phat<-plogis(cdat.NS.new$Int+beta.hat[1]+cdat.NS.new$time.cap*(cdat.NS.new$beta.time.cap+beta.hat[2])) # Similar plots qplot(time.cap, phat, data=cdat.NS.new, geom=c("line", "point"), group=id, colour=Female, xlab="Years since capture", ylab="Predicted probability of being observed") # Look at distribution of p_observed across individuals pobs<-summaryBy(Female+observed~id, data=cdat.NS) pobs[,2]<-pobs[,2]-1 pobs[,2]<-as.character(pobs[,2]) ggplot(pobs, aes(observed.mean))+geom_histogram(aes(y=..density..), binwidth=0.01)+facet_grid(Female.mean~.) #' Now, combine everything into 1 plot #+ fig.height=12, fig.width=12 #x11(width=4, height=4) par(mfrow=c(2,2), bty="L", cex.lab=1.8, cex.axis=1.8, mar=c(5,6,4.1,2.1) , xpd=T, oma=c(0,1,0,0)) with(cdat.MSH,plot(time.cap, phat, type="n", xlab="Years since capture", ylab="Probability of detection")) uid<-unique(cdat.MSH$id) luid<-length(uid) cols<-c("black", "gray") lwds<-c(2,1) for(i in 1:luid){ tempdat<-cdat.MSH[cdat.MSH$i==uid[i],] with(tempdat,lines(time.cap, phat, col=cols[as.numeric(IAdF)], lwd=lwds[as.numeric(IAdF)])) } legend("top", inset=-3, col=c("black", "gray"), lty=1, lwd=c(2,1),bty="n", legend=c("No", "Yes"), title="Adult Female", ncol=2, cex=1.4) mtext(outer=F, line=2, side=3, cex=1.4, "A)", adj=0) # Look at distribution of p_observed across individuals pobs.MSH<-with(cdat.MSH, tapply(observed,id,mean)) hist(pobs.MSH, xlab="", ylab="Distribution", main="") mtext(outer=F, line=2, side=3, cex=1.4, "B) Proportion of times observed", adj=0) with(cdat.NS.new,plot(time.cap, phat, type="n", xlab="Years since capture", ylab="Probability of Detection")) uid<-unique(cdat.NS.new$id) luid<-length(uid) cols<-c("black", "gray") lwds<-c(2,1) for(i in 1:luid){ tempdat<-cdat.NS.new[cdat.NS.new$i==uid[i],] with(tempdat,lines(time.cap, phat,lwd=lwds[as.numeric(Female)], col=cols[as.numeric(Female)])) } mtext(outer=F, line=2, side=3, cex=1.4, "C)", adj=0) # Look at distribution of p_observed across individuals pobs.NS<-with(cdat.NS.new, tapply(observed,id,mean)) hist(pobs.NS, xlab="", ylab="Distribution", main="") mtext(outer=F, line=2, side=3, cex=1.4, "D) Proportion of times observed", adj=0) # Print out Session info sessionInfo()