source('ZebraFuncs.R') B <- 1e4 #size of bootstrap, increase for more precision manuscript uses 10^4 library(dplyr) library(MASS) library(stringr) library(ggplot2) library(rgdal) library(Distance) library(spatstat) library(mgcv) library(RColorBrewer) library(kableExtra) library(numDeriv) library(nloptr) #source("../Code/ZebraFuncs.R") #numerous functions used to format the raw data. ###This chunk reads in the datasets, the formats them for our analyses### # read in the datasets Hab <- read.csv(file="HabitatCharacterizat_DATA_2017-10-12_0821.csv", na.string="9999") #habitat data Trans <- read.csv(file="TransectData_DATA_2017-10-23_1150.csv", na.string="9999") #transect data ZM <- read.csv(file="ZebraMusselEncouter_DATA_2017-10-12_0823.csv", na.string="9999") #encounter data #renames some transect columns for consistency with encounter and habitat datasets. Finally subsets data to include only lake Sylvia and lake Burgan Trans <- Trans %>% rename(Region.Label = lake_name) %>% rename(gps_transect_n="gps_location_n") %>% rename(gps_transect_w="gps_location_w") %>% subset(Region.Label == "Sylvia" | Region.Label == "Burgen") #renames encounter entries to be consistent with what is needed for distance analyses, create a record_id for each detection, converts detection distance to meters. ZM <- ZM %>% rename(distance=perp_distance) %>% rename(encounter_id=record_id) %>% rename(size=number_mussels) %>% mutate(record_id = as.factor(str_split(encounter_id, pattern="_E", n=2, simplify = TRUE)[,1])) %>% mutate(distance=distance/100) #impute missing values #Trans[which(is.na(Trans$clarity)),]$clarity <- mean(Trans$clarity, na.rm=TRUE) # interpolates missing Trans data in lake sylvia Trans <- interpolate(Trans) #' merges transect information and encounter information ZM <- merge(Trans, ZM, by='record_id') #' Determine which encounters are associated with the second observers. Link those encounters to observers 5 & 6. for(i in 1:length(ZM$encounter_id)) { temp <- str_sub(ZM$encounter_id[i], start=nchar(as.character(ZM$encounter_id[i]))-1, end=nchar(as.character(ZM$encounter_id[i]))) if(temp == "_2") { ZM$observer___1[i] <- 0 ZM$observer___2[i] <- 0 ZM$observer___5[i] <- 1 ZM$observer___6[i] <- 1 } } #this ection links habitat type to each encounter. ZM <- cbind(ZM, substrate1=rep(NA, dim(ZM)[1]), substrate2=rep(NA, dim(ZM)[1]), substrate3=rep(NA, dim(ZM)[1]), substrate4=rep(NA, dim(ZM)[1]), substrate5=rep(NA, dim(ZM)[1]), substrate6=rep(NA, dim(ZM)[1]), substrate7=rep(NA, dim(ZM)[1]), plantcover=rep(NA, dim(ZM)[1]), Effort=rep(NA, dim(ZM)[1]), Area=rep(NA, dim(ZM)[1])) #fix one mislabeled entry Hab$record_id <- recode(Hab$record_id, "21004900_T03_30"="21004900_T03_D30") # cleanup hab dataset such as adding in effort for transects without any observations temp <- Link.HabCounts(Hab, ZM) HabZM <- temp[[1]] #linked hab/encounter data Hab <- temp[[2]] #original habitat data temp <- matrix(NA, dim(HabZM)[1], 2) for(i in 1:dim(HabZM)[1]) { temp[i,] <- detection.location(HabZM[i,]) } HabZM <- mutate(HabZM, gps_detect_w=temp[,1], gps_detect_n=temp[,2]) #encounter data for each lake SylviaDat <- filter(HabZM, Region.Label == "Sylvia") BurgenDat <- filter(HabZM, Region.Label == "Burgen") BurgenDat <- mutate(BurgenDat, observer=(BurgenDat$observer___1==1) + 2*(BurgenDat$observer___5==1)) #create observer variable for Lake Burgen #create segments out of transect data. Used in count analysis. SegDat <- create.Seg(Hab, Trans, lakename="Burgen") #match observations made by each observer, using the distance criterion described in manuscript. BurgenDDF <- match.function(BurgenDat, horDis=0.2) ObsDat <- create.Obs(BurgenDat, SegDat=SegDat, lakename="Burgen") #makes the count data in each segment ##Sylvia detection analysis #fits the halfnormal detection function to sylvia. sylvia.onepiece.fit <- optimize(f=onepiece.lik, interval=c(1e-6, 100), y=SylviaDat$distance, maximum=FALSE) #this section fits the twopiece normal detection functions to sylvia. opts <- list("algorithm"="NLOPT_LN_SBPLX", "xtol_rel"=1.0e-8, maxeval=1e4) x0 <- c(log(0.2), log(0.4), log(0.4)) #initial guesses sylvia.twopiece.fit <- nloptr(x0=x0, eval_f=twopiece.lik, y=SylviaDat$distance, lb=c(log(0.001), -Inf, -Inf), ub=c(log(.9), log(100), log(10)), opts=opts) sylvia.twopiece.fit.same <- nloptr(x0=c(log(0.2), log(0.5)), eval_f=twopiece.lik.same, y=SylviaDat$distance, lb=c(log(0.001), -Inf), ub=c(log(.9), log(10)), opts=opts) #calcuate the Hessian matrix for each detection model sylvia.onepiece.fit$hessian <- numDeriv::hessian(func=onepiece.lik, x=sylvia.onepiece.fit$minimum, y=SylviaDat$distance) sylvia.twopiece.fit$hessian <- numDeriv::hessian(func=twopiece.lik, x=sylvia.twopiece.fit$solution, y=SylviaDat$distance) sylvia.twopiece.fit.same$hessian <- numDeriv::hessian(func=twopiece.lik.same, x=sylvia.twopiece.fit.same$solution, y=SylviaDat$distance, method.args=list(eps=1e-10, d=0.001, zero.tol=sqrt(.Machine$double.eps), r=10, v=2, show.details=FALSE)) #calculate the AIC value for each lake sylvia.onepiece.fit$AIC <- 2*sylvia.onepiece.fit$objective + 2 sylvia.twopiece.fit$AIC <- 2*sylvia.twopiece.fit$objective + 2*length(sylvia.twopiece.fit$x0) sylvia.twopiece.fit.same$AIC <- 2*sylvia.twopiece.fit.same$objective + 2*length(sylvia.twopiece.fit.same$x0) #get estimates from best model MCDS.sylvia.varcov <- solve(sylvia.onepiece.fit$hessian) MCDS.sylvia.est <- sylvia.onepiece.fit$minimum #this section calcuates the average and standard error of the detection function by a bootstrap dis.vec <- seq(0, 1, length.out=1e3) par.draw <- rnorm(B, MCDS.sylvia.est, MCDS.sylvia.varcov) twopiece.sylvia <- vector('numeric', B) for(i in 1:B) { twopiece.sylvia[i] <- sum(halfnormal.func(sig=(par.draw[i]), dis.vec))*(dis.vec[2]-dis.vec[1]) } sylvia.int <- mean(twopiece.sylvia) sylvia.int.sd <- sd(twopiece.sylvia) #now estimate the detection function for lake Burgan BurgenDDF <- BurgenDDF[-which(is.na(BurgenDDF$clarity)),] #remove data points with missing covariates BurganUnique <- BurgenDDF for(i in 1:69) { ind <- which(BurganUnique == i) if(length(ind) > 1) { BurganUnique[-ind[-1],] } } #fit the sight-resight component of the detection function ddf.dsModel4 <- ddf(method="io", dsmodel=~cds(key="hn", formula=~ 1), mrmodel=~glm(link="logit", formula=~ observer + plantcover + clarity), data=BurgenDDF, meta.data=list(width=1)) #fit the distance component of the detection function burgan.onepiece.fit <- optimize(f=onepiece.lik, interval=c(1e-6, 100), y=BurganUnique$distance, maximum=FALSE) opts <- list("algorithm"="NLOPT_LN_NELDERMEAD", "xtol_rel"=1.0e-8, maxeval=1e4) x0 <- c(log(0.2), log(0.4), log(0.4)) #initial parameter guess burgan.twopiece.fit <- nloptr(x0=x0, eval_f=twopiece.lik, y=BurganUnique$distance, lb=c(log(0.01), log(0.01), log(0.01)), ub=c(log(.9), log(10), log(10)), opts=opts) burgan.twopiece.fit.same <- nloptr(x0=c(log(0.2), log(0.4)), eval_f=twopiece.lik.same, y=BurganUnique$distance, lb=c(log(0.01), -Inf), ub=c(log(.9), log(10)), opts=opts) #calculate the estimated Hessian matrix burgan.twopiece.fit$hessian <- numDeriv::hessian(func=twopiece.lik, x=burgan.twopiece.fit$solution, y=BurganUnique$distance) burgan.twopiece.fit.same$hessian <- numDeriv::hessian(func=twopiece.lik.same, x=burgan.twopiece.fit.same$solution, y=BurganUnique$distance) #calculate AIC values for estimated models burgan.onepiece.fit$AIC <- 2*burgan.onepiece.fit$objective + 2 burgan.twopiece.fit$AIC <- 2*burgan.twopiece.fit$objective + 2*length(burgan.twopiece.fit$x0) burgan.twopiece.fit.same$AIC <- 2*burgan.twopiece.fit.same$objective + 2*length(burgan.twopiece.fit.same$x0) MCDS.burgan.varcov <- solve(burgan.twopiece.fit$hessian) MCDS.burgan.est <- burgan.twopiece.fit$solution #draw from detection function to get mean and variance of phat. pars <- ddf.dsModel4$mr$par varcov <- solve(ddf.dsModel4$mr$hessian) p1.nocover <- p1.cover <- p2.nocover <- p2.cover <- vector('numeric', B) for(i in 1:B) { p1.nocover[i] <- logit.rand(pars, varcov, x=c(1, 0, 0, mean(BurgenDDF$clarity))) p1.cover[i] <- logit.rand(pars, varcov, x=c(1, 0, 1, mean(BurgenDDF$clarity))) p2.nocover[i] <- logit.rand(pars, varcov, x=c(1, 1, 0, mean(BurgenDDF$clarity))) p2.cover[i] <- logit.rand(pars, varcov, x=c(1, 1, 1, mean(BurgenDDF$clarity))) } dis.vec <- seq(0, 1, length.out=1e3) par.draw <- rmvn(B, MCDS.burgan.est, MCDS.burgan.varcov) twopiece.burgan <- vector('numeric', B) for(i in 1:B) { twopiece.burgan[i] <- sum(Two.piece.normal.func(sig1=exp(par.draw[i,2]), sig2=exp(par.draw[i,3]), mu=exp(par.draw[i,1]), dis.vec))*(dis.vec[2]-dis.vec[1]) } burgan.int <- mean(twopiece.burgan) burgan.int.sd <- sd(twopiece.burgan) #pdf(file="Fig3_stackedHist.pdf", height=4) ###This section calculates the detection functions, evaluated under the MLE's. dis.vec <- seq(0, 1, length.out=100) sigma <- exp(ddf.dsModel4$ds$par) sig1.burgan <- exp(MCDS.burgan.est[2]) sig2.burgan <- exp(MCDS.burgan.est[3]) mew.burgan <- exp(MCDS.burgan.est[1]) p1nc <- mean(p1.nocover) p1c <- mean(p1.cover) p2nc <- mean(p2.nocover) p2c <- mean(p2.cover) twopiece.burgan <- Two.piece.normal.func(sig1=sig1.burgan, sig2=sig2.burgan, mu=mew.burgan, dis.vec) onepiece.sylvia <- halfnormal.func(sig=MCDS.sylvia.est, dis.vec) detect.data <- data.frame(Distance=c(dis.vec, dis.vec), Prob=c(p1nc*twopiece.burgan, p2nc*twopiece.burgan), Team=as.factor(c(rep(1, length(dis.vec)), rep(2, length(dis.vec))))) #make a count-level summary for each segment in Burgan count.dat <- create.CountSumm.twopiece(SegDat, ObsDat, BurgenDDF, ddf.model=ddf.dsModel4, MCDS.est=MCDS.burgan.est, det=TRUE) #fit density models using reml and ml fit.dens4.reml <- gam(Counts ~ depth + plant_cover + substrate___3, offset=log(phat*Area), family=nb(link="log"), data=count.dat, method="REML") fit.dens4 <- gam(Counts ~ depth + plant_cover + substrate___3, offset=log(phat*Area), family=nb(link="log"), data=count.dat, method="ML") fit.dens4.gam <- gam(Counts ~ depth + plant_cover + substrate___3 + s(utm_easting, utm_northing), offset=log(phat*Area), family=nb(link="log"), data=count.dat, method="ML") ## offset <- count.dat$phat*count.dat$Area Area <- count.dat$Area count.est <- predict(fit.dens4, type='response')*offset count.se <- predict(fit.dens4, type='response', se.fit=T)$se.fit*offset coef <- summary(fit.dens4)$p.coeff varcov <- summary(fit.dens4)$se ml.X <- exp(predict(fit.dens4, se.fit=TRUE, type="link")$fit)*count.dat$Area*count.dat$phat #estimated counts ml.N <- ml.X/count.dat$phat #estimated population size ##these variance-covariance matrix for the MLE's X <- model.matrix(fit.dens4) #design matrix predict.VarCov <- X%*%fit.dens4$Vp%*%t(X) predictX.varcov <- diag(ml.X)%*%predict.VarCov%*%diag(ml.X) predictN.varcov <- diag(ml.N)%*%predict.VarCov%*%diag(ml.N) predictD.varcov <- diag(ml.N/count.dat$Area)%*%predict.VarCov%*%diag(ml.N/count.dat$Area) #parameteric bootstraps of detectability correction (theta) and counts to get covariances theta.out <- create.phatVar(count.dat, ddf.dsModel4, burgan.twopiece.fit, SegDat, ObsDat, B=B, type="inverse") #estimating of counts, then do bootstrap to get variance estimates. nb.temp <- glm.nb(Counts ~ depth + plant_cover + substrate___3 + offset(log(phat*Area)), data=count.dat, link=log) #glm.nb allows us to get the uncertainty in size parameter. gam doesn't let us do that modX <- model.matrix(nb.temp) #covariate matrix beta <- coef(nb.temp) #coefficient estimates offset <- count.dat$phat*count.dat$Area #this is the term used to correct survey counts opts <- list("algorithm"="NLOPT_LN_NELDERMEAD", "xtol_rel"=1.0e-8, maxeval=1e4) #reestimate regression so we know std error of theta on log scale. nb.reg <- nloptr(x0=c(beta, log(nb.temp$theta)), eval_f=nbin.regression, X=modX, y=count.dat$Counts, offset=offset, opts=opts) beta <- nb.reg$solution[-length(nb.reg$solution)] theta <- exp(nb.reg$solution[length(nb.reg$solution)]) #get variances nb.reg$hessian <- numDeriv::hessian(func=nbin.regression, x=nb.reg$x0, X=modX, y=count.dat$Counts, offset=offset) nb.reg$varcov <- solve(nb.reg$hessian) nbin.draw <- mvrnorm(B*100, nb.reg$sol, nb.reg$varcov) count.draw <- corrCount.draw <- nbin.mean <- matrix(NA, B*100, dim(count.dat)[1]) gfit.draw <- vector('numeric', B*100) for(i in 1:B) { nbin.mean[i,] <- modX%*%nbin.draw[i,-dim(nbin.draw)[2]] count.draw[i,] <- rnbinom(length(nbin.mean[1,]), mu=exp(nbin.mean[i,] + log(offset)), size=exp(nbin.draw[i,dim(nbin.draw)[2]])) } corcounts <- count.draw%*%diag(1/count.dat$phat) count.cov <- cov(count.draw, use="complete") corcount.cov <- diag(1/count.dat$phat)%*%count.cov%*%diag(1/count.dat$phat) theta.varcov <- theta.out[[2]] strata.vec <- rep(1, length(count.dat$transect_no)) strata.vec[which(count.dat$transect_no == 3 | count.dat$transect_no == 12 | count.dat$transect_no == 13 | count.dat$transect_no == 14 | count.dat$transect_no == 15 | count.dat$transect_no == 16 | count.dat$transect_no == 17 | count.dat$transect_no == 18)] <- 2 weight.vec <- c(10,1) frac.vec <- c(length(count.dat$Area[strata.vec==1]), length(count.dat$Area[strata.vec==2]))/length(count.dat$Area) weight.vec <- weight.vec/sum(weight.vec) count.est.strata <- dens.est.strata <- abund.est.strata <- dens.sd.strata <- vector('numeric', 2) count.var.strata <- theta.var.strata <- abund.var.strata <- dens.var.strata <- abund.totvar.strata <- dens.var.strata.detect <- matrix(NA, 2,2) area.vec <- c(sum(count.dat$Area[strata.vec==1]), sum(count.dat$Area[strata.vec==2])) abund.est.strata[1] <- sum(ml.N[which(strata.vec==1)]) abund.est.strata[2] <- sum(ml.N[which(strata.vec==2)]) dens.est.strata[1] <- abund.est.strata[1]/sum(count.dat$Area[which(strata.vec==1)]) dens.est.strata[2] <- abund.est.strata[2]/sum(count.dat$Area[which(strata.vec==2)]) count.var.strata[1,2] <- count.var.strata[2,1] <- sum(count.cov[which(strata.vec==1), which(strata.vec==2)]) count.var.strata[1,1] <- sum(count.cov[which(strata.vec==1), which(strata.vec==1)]) count.var.strata[2,2] <- sum(count.cov[which(strata.vec==2), which(strata.vec==2)]) abund.var.strata[1,2] <- abund.var.strata[2,1] <- sum(corcount.cov[which(strata.vec==1), which(strata.vec==2)]) abund.var.strata[1,1] <- sum(corcount.cov[which(strata.vec==1), which(strata.vec==1)]) abund.var.strata[2,2] <- sum(corcount.cov[which(strata.vec==2), which(strata.vec==2)]) theta.var.strata[1,2] <- theta.var.strata[2,1] <- sum(theta.varcov[which(strata.vec==1), which(strata.vec==2)]) theta.var.strata[1,1] <- sum(theta.varcov[which(strata.vec==1), which(strata.vec==1)]) theta.var.strata[2,2] <- sum(theta.varcov[which(strata.vec==2), which(strata.vec==2)]) abund.totvar.strata <- abund.var.strata + theta.var.strata dens.totvar.strata <- diag(1/area.vec)%*%abund.totvar.strata%*%diag(1/area.vec) dens.var.strata.detect <- diag(1/area.vec)%*%theta.var.strata%*%diag(1/area.vec) strata.area <- c(sum(count.dat$Area[strata.vec==1]), sum(count.dat$Area[strata.vec==2])) density.overall <- sum(dens.est.strata*weight.vec) density.var.overall <- t(weight.vec)%*%dens.totvar.strata%*%weight.vec density.var.overall.detect <- t(weight.vec)%*%dens.var.strata.detect%*%weight.vec density.se <- sqrt(density.var.overall) xynew <- data.frame(ID=1:length(count.dat$Counts), counts=count.dat$Counts, X=count.dat$utm_easting, Y=count.dat$utm_easting) coordinates(xynew) <- c("X", "Y") proj4string(xynew) <- CRS("+proj=utm +zone=15 ellps=WGS84") ## for example xycount <- spTransform(xynew, CRS("+proj=longlat +datum=WGS84")) xynew <- data.frame(ID=1:length(fit.dens4$residuals), residuals=fit.dens4$residuals, X=count.dat$utm_easting, Y=count.dat$utm_easting) coordinates(xynew) <- c("X", "Y") proj4string(xynew) <- CRS("+proj=utm +zone=15 ellps=WGS84") ## for example xyresid <- spTransform(xynew, CRS("+proj=longlat +datum=WGS84")) ##power calculation se <- density.se a <- 2*density.overall - 1.96*se b <- 2*density.overall + 1.96*se power2 <- pnorm(a, density.overall, se) + (1-pnorm(b, density.overall, se)) a <- 4*density.overall - 1.96*se b <- 4*density.overall + 1.96*se power4 <- pnorm(a, density.overall, se) + (1-pnorm(b, density.overall, se)) save(list=c('SylviaDat', 'density.se', 'sylvia.int', 'sylvia.int.sd', 'sylvia.onepiece.fit', 'sylvia.twopiece.fit', 'sylvia.twopiece.fit.same', 'burgan.int', 'burgan.int.sd', 'onepiece.sylvia', 'burgan.onepiece.fit', 'burgan.twopiece.fit', 'burgan.twopiece.fit.same', 'p1.cover', 'p1.nocover', 'p2.cover', 'p2.nocover', 'MCDS.sylvia.est', 'MCDS.sylvia.varcov', 'mew.burgan', 'MCDS.burgan.varcov', 'sig1.burgan', 'sig2.burgan', 'count.dat', 'density.overall', 'density.var.overall', 'density.var.overall.detect', 'dens.est.strata', 'dens.totvar.strata', 'fit.dens4.gam', 'fit.dens4', 'ddf.dsModel4', 'power4', 'power2'), file="ZeebDensityResults.Rdata")