#set of functions that are useful for analyzing the zebra dataset twopiece.lik <- function(par.vec, y) { mu <- exp(par.vec[1]) sig1 <- exp(par.vec[2]) sig2 <- exp(par.vec[3]) ind1 <- y < mu ind2 <- y >= mu integrand <- function(x, mu, sig) { exp(-(x - mu)^2/(2*sig^2)) } int1 <- integrate(f=integrand, lower=0, upper=mu, mu=mu, sig=sig1)$value int2 <- integrate(f=integrand, lower=mu, upper=1, mu=mu, sig=sig2)$value ll <- sum(-(y - mu)^2/(sqrt(2)*(ind1*sig1 + ind2*sig2))^2) - length(y)*log(int1+int2) #- sum(ind1*log(int1)) - sum(ind2*log(int2)) return(-ll) } twopiece.lik.same <- function(par.vec, y) { mu <- exp(par.vec[1]) sig1 <- exp(par.vec[2]) sig2 <- sig1 ind1 <- y < mu ind2 <- y >= mu integrand <- function(x, mu, sig) { exp(-(x - mu)^2/(2*sig^2)) } int1 <- integrate(f=integrand, lower=0, upper=mu, mu=mu, sig=sig1)$value int2 <- integrate(f=integrand, lower=mu, upper=1, mu=mu, sig=sig2)$value ll <- sum(-(y - mu)^2/(sqrt(2)*(ind1*sig1 + ind2*sig2))^2) - length(y)*log(int1+int2) #sum(ind1*log(int1+int2)) - sum(ind2*log(int2+int1)) return(-ll) } onepiece.lik <- function(x, y) { sig1 <- x integrand <- function(x, mu, sig) { exp(-x^2/(2*sig^2)) } int1 <- integrate(f=integrand, lower=0, upper=1, mu=0, sig=sig1)$value ll <- sum(-y^2/(sqrt(2)*sig1)^2) - length(y)*log(int1) return(-ll) } #this interpolates the missing lcoation data for Lake Sylvia by averaging locations of the surrounding transects interpolate <- function(Trans) { Sylvia.trans <- filter(Trans, Region.Label=="Sylvia") fillins <- which(is.na(Sylvia.trans$gps_transect_n)) nextthree <- (length(fillins)+1):(length(fillins)+3) nextnextthree <- (length(fillins)+4):(length(fillins)+6) avgN <- mean(Sylvia.trans[c(nextthree, nextnextthree),]$gps_transect_n) avgW <- mean(Sylvia.trans[c(nextthree, nextnextthree),]$gps_transect_w) bearing <- mean(Sylvia.trans[c(nextthree, nextnextthree),]$bearing) xy <- data.frame(ID = 1, X = c(avgW), Y = c(avgN)) coordinates(xy) <- c("X", "Y") proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example res <- spTransform(xy, CRS("+proj=utm +zone=15 ellps=WGS84")) newx <- coordinates(res)[1] + cos(-pi/180*(bearing))*c(-1,-1, -1, -1, 1, 1, 1, 1)*c(1.5, 4.5, 7.5, 10.5, 1.5, 4.5, 7.5, 10.5) newy <- coordinates(res)[2] + sin(-pi/180*(bearing))*c(-1,-1, -1, -1, 1, 1, 1, 1)*c(1.5, 4.5, 7.5, 10.5, 1.5, 4.5, 7.5, 10.5) #just guessin on the order xynew <- data.frame(ID = 1:8, X=newx, Y=newy) coordinates(xynew) <- c("X", "Y") proj4string(xynew) <- CRS("+proj=utm +zone=15 ellps=WGS84") ## for example xytrans <- spTransform(xynew, CRS("+proj=longlat +datum=WGS84")) Sylvia.trans[fillins,]$gps_transect_n <- coordinates(xytrans)[,2] Sylvia.trans[fillins,]$gps_transect_w <- coordinates(xytrans)[,1] Sylvia.trans[fillins,]$bearing <- bearing Trans[which(Trans$Region.Label=="Sylvia"),] <- Sylvia.trans return(Trans) } logit.rand <- function(pars, varcov, x) { parDraw <- rmvn(1, pars, varcov) plogis(sum(parDraw*x)) } #based on the location of the transect and the distance on the transect that a mussel was discovered, this function calculated the gps coordinates of the detection event. detection.location <- function(HabZM_entry) { gps_detect_w <- gps_detect_n <- NULL if(is.na(HabZM_entry$gps_transect_w)) {return(c(NA,NA))} xy <- data.frame(ID = 1, X = HabZM_entry$gps_transect_w, Y = HabZM_entry$gps_transect_n) coordinates(xy) <- c("X", "Y") proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example res <- spTransform(xy, CRS("+proj=utm +zone=15 ellps=WGS84")) newx <- coordinates(res)[1] + sin(HabZM_entry$bearing*pi/180)*HabZM_entry$mussel_distance_along + 2*(HabZM_entry$side-1.5)*HabZM_entry$distance*sin((HabZM_entry$bearing-90)*pi/180) newy <- coordinates(res)[2] + cos(HabZM_entry$bearing*pi/180)*HabZM_entry$mussel_distance_along + 2*(HabZM_entry$side-1.5)*HabZM_entry$distance*cos((HabZM_entry$bearing-90)*pi/180) xynew <- data.frame(ID = 1, X=newx, Y=newy) coordinates(xynew) <- c("X", "Y") proj4string(xynew) <- CRS("+proj=utm +zone=15 ellps=WGS84") ## for example xytrans <- spTransform(xynew, CRS("+proj=longlat +datum=WGS84")) gps_detect_w <- coordinates(xytrans)[1] gps_detect_n <- coordinates(xytrans)[2] return(c(gps_detect_w, gps_detect_n)) } #links together habitat and zebra mussel count data so that it is clear what the habitat is at each detection event Link.HabCounts <- function(Hab, ZM) { #clean hab datasets Hab <- Hab %>%rename(hab_id=record_id) %>% mutate(record_id = as.factor(str_split(hab_id, pattern="_D", n=2, simplify = TRUE)[,1])) %>% mutate(Observer=str_detect(hab_id, "_2")+1) Hab <- Hab[c(1:61, 63, 62, 64:dim(Hab)[1]), ] Hab1 <- Hab %>% filter(Observer==1) HabZM <- ZM for(i in 1:dim(ZM)[1]) { currRec <- as.character(ZM[i,]$record_id) currDis <- ZM[i,]$mussel_distance_along currSet <- which(as.character(Hab1$record_id) == currRec) #uses findinterval to make sure that the current observation is HabZM[i, 56:62] <- Hab1[findInterval(x=currDis, vec=Hab1[currSet,]$distance_along) + 1, 4:10] HabZM[i,]$plantcover <- Hab1$plant_cover[findInterval(x=currDis, vec=Hab1[currSet,]$distance_along) + 1] HabZM$Effort[i] <- Hab1[currSet[length(currSet)],]$distance_along #multiply the transect length by 2 to get the sampled area. This is the cumulative transect length. } #insert missing effort #HabZM <- effort.missing(ZM, HabZM) #get survey areas temp <- vector("numeric", 24) for(i in 1:24) { temp[i] <- filter(HabZM, Region.Label == "Sylvia" & transect_no == i)$Effort[1] } HabZM$Area[which(HabZM$Region.Label == "Sylvia")] <- 2*sum(temp) temp <- vector("numeric", 18) for(i in 1:18) { temp[i] <- filter(HabZM, Region.Label == "Burgen"& transect_no == i)$Effort[1] } HabZM$Area[which(HabZM$Region.Label == "Burgen")] <- 2*sum(temp) #Turn cover into factor and rename mussel substrate columns. HabZM$plantcover <- as.factor(HabZM$plantcover) HabZM$plantcover <- recode(HabZM$plantcover, '0'="Absent", '1'="Present", '2'="Present", '3'="Present", '4'="Present") HabZM <- HabZM %>% rename(Mud=substrate1, Sand=substrate2, Gravel=substrate3, Pebble=substrate4, Rock=substrate5, Silt=substrate6, Other=substrate7) #treat these variables as factors HabZM$Mud <- as.factor(HabZM$Mud) HabZM$Sand <- as.factor(HabZM$Sand) HabZM$Gravel <- as.factor(HabZM$Gravel) HabZM$Pebble <- as.factor(HabZM$Pebble) HabZM$Rock <- as.factor(HabZM$Rock) HabZM$Silt <- as.factor(HabZM$Silt) HabZM$Other <- as.factor(HabZM$Other) HabZM <- HabZM %>% rename(Sample.Label = 'record_id') returnList <- list() returnList[[1]] <- HabZM returnList[[2]] <- Hab return(returnList) } #insert missing effort into Sylvia Lake. #this data was not properly entered into the database for some reason effort.missing <- function(ZM, HabZM) { sylvia.missing <- setdiff(1:24, filter(ZM, Region.Label=="Sylvia")$transect_no) burgen.missing <- setdiff(1:18, filter(ZM, Region.Label=="Burgen")$transect_no) sylvia.missing.distance <- c(15, 10, 15, 15, 15, 15, 15, 27, 15, 18.5) burgen.missing.distance <- c(30) HabZM <- HabZM %>% add_row(record_id="21004900_T10", Region.Label="Burgen", observer___1=1, observer___2=1, observer___6=0, observer___7=0, transect_no=burgen.missing, Effort=burgen.missing.distance) for(i in 1:length(sylvia.missing)) { HabZM <- HabZM %>% add_row(record_id=paste("73024900_T", sylvia.missing[i], sep=''), Region.Label="Sylvia", observer___1=1, observer___2=1, observer___6=0, observer___7=0, transect_no=sylvia.missing[i], Effort=sylvia.missing.distance[i]) } return(HabZM) } #This function classifies two detections from different observers as either being the same group or not. #horDis is the distance away the transect that we will use as criteria for grouping observations #vertDis is the distance along the transect that we will use as criteria for grouping observations #both the actual horizont distance must be less than horDis and the actual vertical distance must be less than vertDis match.function <- function(BurgenDat, horDis=0.1, vertDis=0.25) { BurgenDat <- BurgenDat %>% mutate(nearest=rep(NA,dim(BurgenDat)[1]), horDis=rep(NA,dim(BurgenDat)[1]), vertDis=rep(NA,dim(BurgenDat)[1])) for(i in 1:dim(BurgenDat)[1]) { currTrans <- BurgenDat$transect_no[i] currObs <- which(BurgenDat$transect_no == BurgenDat$transect_no[i]) currLabel <- BurgenDat[i,]$observer if(length(currObs) == 1) { BurgenDat$nearest[i] <- NA BurgenDat$horDis[i] <- NA BurgenDat$vertDis[i] <- NA next() } obsOtherDat <- filter(BurgenDat, transect_no==currTrans, observer==switch(currLabel, 2, 1)) if(dim(obsOtherDat)[1] == 0) { next() } tmp <- crossdist(X=BurgenDat$distance[i]*2*(BurgenDat$side[i]-1.5), Y=BurgenDat$mussel_distance_along[i], x2=obsOtherDat$distance*2*(obsOtherDat$side-1.5), y2=obsOtherDat$mussel_distance_along) min.index <- which.min(tmp[1,]) BurgenDat$nearest[i] <- min(tmp[1,]) #get minimimum distances BurgenDat$horDis[i] <- abs(obsOtherDat[min.index,]$distance*2*(obsOtherDat[min.index,]$side-1.5) - BurgenDat$distance[i]*2*(BurgenDat$side[i]-1.5)) BurgenDat$vertDis[i] <- abs(BurgenDat$mussel_distance_along[i] - obsOtherDat[min.index,]$mussel_distance_along) } #I am just defining a value that determines how close two sightings by different observers are that need to be clustered. Working on a new method for this. BurgenDat <- BurgenDat %>% mutate(object=rep(NA, dim(BurgenDat)[1]), detected=rep(NA, dim(BurgenDat)[1])) if(any(is.na(BurgenDat$distance))) { BurgenDat <- BurgenDat[-which(is.na(BurgenDat$distance)),] } BurgenNew <- NULL #This section looks at each pair of points seen by the different observers and determines whether these detection events are the same mussel or not. object.count <- 1 for(i in unique(BurgenDat$transect_no)) { tempDat <- filter(BurgenDat, transect_no == i) if(dim(tempDat)[1] == 1) { tempDat$object <- object.count BurgenNew <- rbind(BurgenNew, tempDat[1,]) BurgenNew[dim(BurgenNew)[1],]$detected <- 1 BurgenNew <- rbind(BurgenNew, tempDat[1,]) BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew[dim(BurgenNew)[1],]$observer <- switch(tempDat$observer, 2, 1) BurgenNew[dim(BurgenNew)[1],]$detected <- 0 object.count <- object.count + 1 } else { for(k in 1:dim(tempDat)[1]) { if(!is.na(tempDat[k,]$object)) { next() } #this is the last observation if(k == dim(tempDat)[1]) { #print('last one') BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$detected <- 1 BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew[dim(BurgenNew)[1],]$observer <- switch(tempDat[k,]$observer, 2, 1) BurgenNew[dim(BurgenNew)[1],]$detected <- 0 tempDat[k,]$object <- object.count object.count <- object.count + 1 next() } tmp <- crossdist(X=tempDat$distance[k]*2*(tempDat$side[k]-1.5), Y=tempDat$mussel_distance_along[k], x2=tempDat$distance*2*(tempDat$side-1.5), y2=tempDat$mussel_distance_along) tmpHor <- crossdist(X=tempDat$distance[k]*2*(tempDat$side[k]-1.5), Y=0, x2=tempDat$distance*2*(tempDat$side-1.5), y2=rep(0, length(tempDat$mussel_distance_along))) tmpVer <- crossdist(X=0, Y=tempDat$mussel_distance_along[k], x2=rep(0, length(tempDat$distance*2*(tempDat$side-1.5))), y2=tempDat$mussel_distance_along) tmp[k] <- NA tmpHor[k] <- NA tmpVer[k] <- NA nearest <- min(tmp[1, (k+1):dim(tmp)[2]], na.rm=T) which.nearest <- which.min(tmp[1, (k+1):dim(tmp)[2]]) nearestHor <- min(tmpHor[1,(k+1):dim(tmp)[2]], na.rm=T) which.nearestHor <- which.min(tmpHor[1, (k+1):dim(tmp)[2]]) nearestVer <- min(tmpVer[1, (k+1):dim(tmp)[2]], na.rm=T) which.nearestVer <- which.min(tmpVer[1, (k+1):dim(tmp)[2]]) if(is.na(tempDat[k,]$nearest) & nearestHor > horDis | nearestVer > vertDis) { #there is no nearest neighbor, and this detection has not been already counted BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$detected <- 1 BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew[dim(BurgenNew)[1],]$observer <- switch(tempDat[k,]$observer, 2, 1) BurgenNew[dim(BurgenNew)[1],]$detected <- 0 tempDat[k,]$object <- object.count object.count <- object.count + 1 } else { #there is a nearest neighbor and it doesn't yet have a partner #check that all observations within the criteria are observed by different observers valid.match <- which(tmpHor[1, (k+1):dim(tmpHor)[2]] <= horDis & tmpVer[1, (k+1):dim(tmpVer)[2]] <= vertDis) valid.match <- valid.match[which(tempDat$observer[k+valid.match] != tempDat$observer[k])] if(length(valid.match) == 0) { #there are no valid nearest neighbors BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$detected <- 1 BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew[dim(BurgenNew)[1],]$observer <- switch(tempDat[k,]$observer, 2, 1) BurgenNew[dim(BurgenNew)[1],]$detected <- 0 tempDat[k,]$object <- object.count object.count <- object.count + 1 next() } #print('match made') #assign the nearest neighbor of the above set as the partner of the observation best.match <- valid.match[which.min(tempDat$nearest[k + valid.match])] BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$detected <- 1 BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew <- rbind(BurgenNew, tempDat[k,]) BurgenNew[dim(BurgenNew)[1],]$detected <- 1 BurgenNew[dim(BurgenNew)[1],]$object <- object.count BurgenNew[dim(BurgenNew)[1],]$observer <- switch(tempDat[k,]$observer, 2, 1) tempDat[k,]$object <- object.count tempDat[k+best.match,]$object <- object.count object.count <- object.count + 1 }#end else }#for k }#else } BurgenNew$observer <- as.factor(BurgenNew$observer) return(BurgenNew) } #create habitat segments, uded in density/count analysis for mrds. Assumes that habitat does not vary within the segment. See segment.data in ?dsm create.Seg <- function(Hab, Trans, lakename) { SegDat <- merge(Hab, Trans, by="record_id") %>% rename(Sample.Label=hab_id) %>% subset(Observer == 1) #assumes habitat measurments from observer 1 are more accurate SegDat <- SegDat %>% mutate(Effort = rep(NA, dim(SegDat)[1]), Transect=as.numeric(str_split(Sample.Label, pattern="_T", n=2, simplify = TRUE)[,2])) %>% subset(Region.Label==lakename) SegDat$Region.Label <- factor(SegDat$Region.Label) SegDat$Sample.Label <- as.factor(as.character(SegDat$Sample.Label)) SegDat <- SegDat[order(SegDat$record_id, SegDat$distance_along),] for(curr in SegDat$record_id) { ind <- which(SegDat$record_id == curr) #if(SegDat$record_id[ind]=="21004900_T17") {stop()} dis.vec <- c(0, SegDat$distance_along[ind]) SegDat$Effort[ind] <- diff(dis.vec) } #get locations of the segments, use utm coordinates for smoothing later SegDat <- SegDat %>% mutate(utm_easting=rep(NA, dim(SegDat)[1]), utm_northing=rep(NA, dim(SegDat)[1])) for(i in 1:dim(SegDat)[1]) { bearing <- SegDat$bearing[i] gpsN <- SegDat$gps_transect_n[i] gpsW <- SegDat$gps_transect_w[i] xy <- data.frame(ID = 1, X = c(gpsW), Y = c(gpsN)) coordinates(xy) <- c("X", "Y") proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example res <- spTransform(xy, CRS("+proj=utm +zone=15 ellps=WGS84")) SegDat$utm_easting[i] <- coordinates(res)[1] + sin(-pi/180*(bearing))*SegDat$distance_along[i] SegDat$utm_northing[i] <- coordinates(res)[2] + cos(-pi/180*(bearing))*SegDat$distance_along[i] } return(SegDat) } #this creates the ObsDat format needed by the dsm package. See observation.data in ?dsm create.Obs <- function(ObsDat, SegDat, lakename) { ObsDat <- ObsDat %>% subset(!is.na(size)) %>% rename(Transect.Label = Sample.Label, Lake=Region.Label) ObsDat <- ObsDat %>% subset(Lake==lakename) ObsDat$Lake <- factor(ObsDat$Lake) ObsDat <- ObsDat %>% mutate(Sample.Label=rep(NA, dim(ObsDat)[1]), object=1:dim(ObsDat)[1]) #get the proper Sample.Label for each observation to link with segdata for(i in 1:dim(ObsDat)[1]) { currTransect <- ObsDat$Transect.Label[i] #this label possHabs <- which(currTransect == as.character(SegDat$record_id)) #all possible habitats with this transect label currInd <- which(ObsDat$mussel_distance_along[i] <= SegDat$distance_along[possHabs])[1] ObsDat$Sample.Label[i] <- as.character(SegDat$Sample.Label[possHabs[currInd]]) } ObsDat$Sample.Label <- as.factor(ObsDat$Sample.Label) return(ObsDat) } #this creates a total counts in each segment. #for single observer create.CountSumm.single <- function(SegDat, ObsDat, region="Sylvia", ds.model) { segment.data <- subset(SegDat, Region.Label==region) segment.data <- segment.data %>% mutate(Area = abs(Effort*2)) segment.data$plant_cover <- as.factor(segment.data$plant_cover) segment.data$plant_cover <- recode(segment.data$plant_cover, '0'="None", '1'="Present", '2'="Present", '3'="Present", '4'="Present") #for each segment get the total counts levels(ObsDat$Sample.Label) <- levels(segment.data$Sample.Label) count.vec <- vector('numeric', dim(segment.data)[1]) for(i in 1:dim(segment.data)[1]) { currSeg <- segment.data$Sample.Label[i] count.vec[i] <- sum(ObsDat$size[(which(ObsDat$Sample.Label == currSeg))]) } phat.pred <- vector('numeric', dim(segment.data)[1]) segment.data <- mutate(segment.data, phat=phat.pred, Counts=count.vec, Effective.area=phat.pred*segment.data$Area) return(segment.data) } ## create.CountSumm.onepiece <- function(SegDat, ObsDat, BurgenDDF, ddf.model, MCDS.est, det=F) { segment.data <- subset(SegDat, Region.Label=="Burgen") segment.data <- segment.data %>% mutate(Area = Effort*2*(1-dthresh)) segment.data$plant_cover <- as.factor(segment.data$plant_cover) segment.data$plant_cover <- recode(segment.data$plant_cover, '0'="None", '1'="Present", '2'="Present", '3'="Present", '4'="Present") #for each segment get the total counts count.vec <- vector('numeric', dim(segment.data)[1]) for(i in 1:dim(segment.data)[1]) { currSeg <- as.character(segment.data$Sample.Label[i]) count.vec[i] <- sum(ObsDat$size[(which(as.character(ObsDat$Sample.Label) == currSeg))]) } phat.pred <- vector('numeric', dim(segment.data)[1]) #generate p-hats #p0.varcov <- solve(ddf.model$mr$hessian) p0.mean <- ddf.model$mr$par #MCDS.varcov <- solve(MCDS.model$ds.hessian) #MCDS.est <- c(MCDS.model$Scale.parm.est, MCDS.model$Shape.parm.est) sig <- exp(MCDS.est) for(i in 1:dim(segment.data)[1]) { cov.vec <- c(1, 1, segment.data$plant_cover[i]=="Present", segment.data$clarity[i]) p0.obs2 <- p0.mean*cov.vec[1:length(p0.mean)] p0.obs1 <- p0.obs2[-2] p1 <- logit(sum(p0.obs1)) p2 <- logit(sum(p0.obs2)) p0 <- p1 + p2 - p1*p2 dis.vec <- seq(0, 1-dthresh, length=10000) #dis <- dis[-length(dis)] dx <- dis.vec[2] - dis.vec[1] phat.pred[i] <- sum(dx*p0*halfnormal.func(sig=sig, dis.vec)) } segment.data <- mutate(segment.data, phat=phat.pred, Counts=count.vec, Effective.area=phat.pred*segment.data$Area) #remove missing data entries. if(any(is.na(segment.data$clarity))) { segment.data <- segment.data[-which(is.na(segment.data$clarity)),] } return(segment.data) } create.phatVar.onepiece <- function(count.dat, ddf.dsModel, MCDS.fit, SegDat, ObsDat, B=1e2, type="inverse") { phat.mat <- matrix(NA, dim(count.dat)[1], B) p0.varcov <- solve(ddf.dsModel$mr$hessian) p0.mean <- ddf.dsModel$mr$par MCDS.varcov <- solve(MCDS.fit$hessian) MCDS.est <- c(MCDS.fit$minimum) varcov <- solve(ddf.dsModel$mr$hessian) pars2.draw <- mvrnorm(n=B, mu=p0.mean, Sigma=p0.varcov) MCDS.draw <- rnorm(n=B, mean=MCDS.est, sd=MCDS.varcov) for(i in 1:B) { phat.mat[,i] <- resample.detection.onepiece(countDat=count.dat, pars2.draw=pars2.draw[i,], MCDS.draw[i], SegDat=SegDat, ObsDat=ObsDat, det=F)$phat } phat.varcov <- diag(count.est/Area)%*%cov(t(1/phat.mat))%*%diag(count.est/Area) return(list(phat.mat, phat.varcov)) } nbin.regression <- function(pars, X, y, offset) { beta <- pars[-length(pars)] theta <- exp(pars[length(pars)]) mu <- exp(X%*%beta + log(offset)) return(-sum(dnbinom(y, size=theta, mu=mu, log=T))) } ##randomly draw phats from the two-piece normal model resample.detection.twopiece <- function(countDat, pars2.draw, MCDS.draw, SegDat, ObsDat, det=F) { segment.data <- subset(SegDat, Region.Label=="Burgen") segment.data <- segment.data %>% mutate(Area = Effort*2) segment.data$plant_cover <- as.factor(segment.data$plant_cover) segment.data$plant_cover <- recode(segment.data$plant_cover, '0'="None", '1'="Present", '2'="Present", '3'="Present", '4'="Present") #for each segment get the total counts levels(ObsDat$Sample.Label) <- levels(segment.data$Sample.Label) count.vec <- vector('numeric', dim(segment.data)[1]) for(i in 1:dim(segment.data)[1]) { currSeg <- segment.data$Sample.Label[i] count.vec[i] <- sum(ObsDat$size[(which(ObsDat$Sample.Label == currSeg))]) } phat.pred <- vector('numeric', dim(segment.data)[1]) #generate p-hats mew <- exp(MCDS.draw[1]) sig1 <- exp(MCDS.draw[2]) sig2 <- exp(MCDS.draw[3]) dis <- seq(0, 1, length=1e3) dis <- dis[-length(dis)] dx <- dis[2] - dis[1] p.int <- sum(dx*Two.piece.normal.func(sig1=sig1, sig2=sig2, mu=mew, dis.vec=dis)) cov.mat <- cbind(rep(1, dim(segment.data)[1]), rep(1, dim(segment.data)[1]), segment.data$plant_cover=="Present") for(i in 1:dim(segment.data)[1]) { pars2 <- pars2.draw*cov.mat[i,] pars1 <- pars2[-2] p1 <- logit(sum(pars1)) p2 <- logit(sum(pars2)) p0 <- p1 + p2 - p1*p2 phat.pred[i] <- p0*p.int } segment.data <- mutate(segment.data, phat=phat.pred, Counts=count.vec, Effective.area=phat.pred*segment.data$Area) return(segment.data) } # resample.detection.onepiece <- function(countDat, pars2.draw, MCDS.draw, SegDat, ObsDat, det=F) { segment.data <- subset(SegDat, Region.Label=="Burgen") segment.data <- segment.data %>% mutate(Area = Effort*2) segment.data$plant_cover <- as.factor(segment.data$plant_cover) segment.data$plant_cover <- recode(segment.data$plant_cover, '0'="None", '1'="Present", '2'="Present", '3'="Present", '4'="Present") #for each segment get the total counts levels(ObsDat$Sample.Label) <- levels(segment.data$Sample.Label) count.vec <- vector('numeric', dim(segment.data)[1]) for(i in 1:dim(segment.data)[1]) { currSeg <- segment.data$Sample.Label[i] count.vec[i] <- sum(ObsDat$size[(which(ObsDat$Sample.Label == currSeg))]) } phat.pred <- vector('numeric', dim(segment.data)[1]) #generate p-hats sig <- exp(MCDS.draw) dis <- seq(0, 1-dthresh, length=1e3) dis <- dis[-length(dis)] dx <- dis[2] - dis[1] #p.int <- sum(dx*Two.piece.normal.func(sig1=sig1, sig2=sig2, mu=mew, dis.vec=dis)) p.int <- dx*sum(halfnormal.func(sig=(par.draw[i]), dis)) cov.mat <- cbind(rep(1, dim(segment.data)[1]), rep(1, dim(segment.data)[1]), segment.data$plant_cover=="Present") for(i in 1:dim(segment.data)[1]) { pars2 <- pars2.draw*cov.mat[i,] pars1 <- pars2[-2] p1 <- logit(sum(pars1)) p2 <- logit(sum(pars2)) p0 <- p1 + p2 - p1*p2 phat.pred[i] <- p0*p.int } segment.data <- mutate(segment.data, phat=phat.pred, Counts=count.vec, Effective.area=phat.pred*segment.data$Area) return(segment.data) } #error function (integral of normal distribution) erf <- function(x, sigma) { 2*pnorm(x*sqrt(2), 0, sigma) - 1 } #Count up the observations in each segment, then returns dataframe with segment counts #used to model segment counts. #uses the two-piece normal model create.CountSumm.twopiece <- function(SegDat, ObsDat, BurgenDDF, ddf.model, MCDS.est, det=F) { segment.data <- subset(SegDat, Region.Label=="Burgen") segment.data <- segment.data %>% mutate(Area = Effort*2) segment.data$plant_cover <- as.factor(segment.data$plant_cover) segment.data$plant_cover <- recode(segment.data$plant_cover, '0'="None", '1'="Present", '2'="Present", '3'="Present", '4'="Present") #for each segment get the total counts count.vec <- vector('numeric', dim(segment.data)[1]) for(i in 1:dim(segment.data)[1]) { currSeg <- as.character(segment.data$Sample.Label[i]) count.vec[i] <- sum(ObsDat$size[(which(as.character(ObsDat$Sample.Label) == currSeg))]) } phat.pred <- vector('numeric', dim(segment.data)[1]) #generate p-hats #p0.varcov <- solve(ddf.model$mr$hessian) p0.mean <- ddf.model$mr$par #MCDS.varcov <- solve(MCDS.model$ds.hessian) #MCDS.est <- c(MCDS.model$Scale.parm.est, MCDS.model$Shape.parm.est) mew <- exp(MCDS.est[1]) sig1 <- exp(MCDS.est[2]) sig2 <- exp(MCDS.est[3]) for(i in 1:dim(segment.data)[1]) { cov.vec <- c(1, 1, segment.data$plant_cover[i]=="Present") p0.obs2 <- p0.mean*cov.vec[1:length(p0.mean)] p0.obs1 <- p0.obs2[-2] p1 <- logit(sum(p0.obs1)) p2 <- logit(sum(p0.obs2)) p0 <- p1 + p2 - p1*p2 dis <- seq(0, 1, length=1e3) dis <- dis[-length(dis)] dx <- dis[2] - dis[1] phat.pred[i] <- sum(dx*p0*Two.piece.normal.func(sig1=sig1, sig2=sig2, mu=mew, dis.vec=dis)) } segment.data <- mutate(segment.data, phat=phat.pred, Counts=count.vec, Effective.area=phat.pred*segment.data$Area) #remove missing data entries. if(any(is.na(segment.data$clarity))) { segment.data <- segment.data[-which(is.na(segment.data$clarity)),] } return(segment.data) } #function to calculate logistic of value x logit <- function(x) {exp(x)/(1 + exp(x))} #Gavin Simpsons simulate function simulate.gam <- function(object, nsim=1, seed=NULL, newdata, freq=FALSE, unconditional=FALSE) { if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) if (is.null(seed)) RNGstate <- get(".Random.seed", envir = .GlobalEnv) else { R.seed <- get(".Random.seed", envir = .GlobalEnv) set.seed(seed) RNGstate <- structure(seed, kind = as.list(RNGkind())) on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) } if (missing(newdata)) { newdata <- object$model } ## random multivariate Gaussian ## From ?predict.gam in mgcv package ## Copyright Simon N Wood ## GPL >= 2 rmvn <- function(n, mu, sig) { ## MVN random deviates L <- mroot(sig) m <- ncol(L) t(mu + L %*% matrix(rnorm(m * n), m, n)) } Rbeta <- rmvn(n = nsim, mu = coef(object), sig = vcov(object, freq=freq, unconditional=unconditional)) Xp <- predict(object, newdata = newdata, type = "lpmatrix") sims <- Xp %*% t(Rbeta) sims } #bootstrap count prediction covarainces create.abundVar <- function(fit.dens4, theta.est, nsim=1e4, offset) { count.mat <- exp(simulate.gam(fit.dens4, nsim=nsim))*offset count.mean <- apply(count.mat, 1, mean) count.bias <- count.mean - as.vector(count.est) count.varcov <- matrix(NA, dim(count.mat)[1], dim(count.mat)[1]) for(j in 1:dim(count.mat)[1]) { for(k in 1:dim(count.mat)[1]) { count.varcov[j,k] <- sum((count.mat[j,] - count.mean[j])*(count.mat[k,] - count.mean[k]))/(dim(count.mat)[2]-1)*theta.est[j]*theta.est[k] } } return(count.varcov) } ##draw new phats create.phatVar <- function(count.dat, ddf.dsModel, MCDS.fit, SegDat, ObsDat, B=1e2, type="inverse") { phat.mat <- matrix(NA, dim(count.dat)[1], B) p0.varcov <- solve(ddf.dsModel$mr$hessian) p0.mean <- ddf.dsModel$mr$par MCDS.varcov <- solve(MCDS.fit$hessian) MCDS.est <- c(MCDS.fit$solution) varcov <- solve(ddf.dsModel$mr$hessian) pars2.draw <- mvrnorm(n=B, mu=p0.mean, Sigma=p0.varcov) MCDS.draw <- mvrnorm(n=B, mu=MCDS.est, Sigma=MCDS.varcov) for(i in 1:B) { phat.mat[,i] <- resample.detection.twopiece(countDat=count.dat, pars2.draw=pars2.draw[i,], matrix(MCDS.est, B, 3), SegDat=SegDat, ObsDat=ObsDat, det=F)$phat } phat.varcov <- diag(count.est/Area)%*%cov(t(1/phat.mat))%*%diag(count.est/Area) return(list(phat.mat, phat.varcov)) } Two.piece.normal.func <- function(sig1, sig2, mu, dis.vec) { dis1.vec <- dis.vec[dis.vec < mu] dis2.vec <- dis.vec[dis.vec >= mu] return(c(exp(-(dis1.vec - mu)^2/(2*sig1^2)), exp(-(dis2.vec - mu)^2/(2*sig2^2)))) } halfnormal.func <- function(sig, dis.vec) { return(exp(-(dis.vec)^2/(2*sig^2))) } #primary coded as 1 #secondary coded as 2 #secondary sees all that 1 does but then a bit more #creates create.removal.Observer <- function(transect.dat, obs.dat) { primary <- transect.dat$`Primary observer (double observer survey)` secondary <- transect.dat$`Secondary observer (double observer survey)` obs.dat <- obs.dat %>% mutate(primary=primary[obs.dat$`Transect #`- min(obs.dat$`Transect #`)+1], secondary=secondary[obs.dat$`Transect #`- min(obs.dat$`Transect #`)+1], observer = rep(1, dim(obs.dat)[1])) for(i in 1:dim(obs.dat)[1]) { curr.tran <- obs.dat[i,]$`Transect #` #obs.dat[i,]$observer <- as.numeric(obs.dat[i,]$`Observer name` == transect.dat[transect.dat$`Transect number` == curr.tran,]$`Primary observer (double observer survey)`) obs.dat[i,]$observer <- as.numeric(obs.dat[i,]$`Observer name` == transect.dat[transect.dat$`Transect number` == curr.tran,]$`Primary observer (double observer survey)`) } #all observations made by secondary that were not made by primary temp01.obs <- obs.dat[obs.dat$observer==0,] if(dim(temp01.obs)[1]) { temp01.obs$observer <- 1 temp01.obs$detected <- 0 } #print(dim(temp01.obs)[1]) temp012.obs <- temp01.obs if(dim(temp012.obs)[1]) { temp012.obs$observer <- 0 temp012.obs$detected <- 1 } #obs.dat <- rbind(obs.dat, temp.obs) #copy all observations made by the primary observer to the secondary observer temp11.obs <- obs.dat[obs.dat$observer==1,] temp11.obs$observer <- 0 temp11.obs$detected <- 1 temp112.obs <- temp11.obs temp112.obs$observer <- 1 temp112.obs$detected <- 1 #obs.dat <- rbind(obs.dat, temp.obs) obs.dat <- rbind(temp01.obs, temp012.obs, temp11.obs, temp112.obs) obs.dat$observer[which(obs.dat$observer==0)] <- 2 obs.dat <- obs.dat[order(obs.dat$object),] return(obs.dat) } #what does do? doubleObs.duplicate <- function(obs.dat) { num.detect <- which(obs.dat$size > 1) if(length(num.detect) == 0) { return(obs.dat) } for(i in 1:length(num.detect)) { temp.dat <- obs.dat[num.detect[i],] for(j in 2:obs.dat[num.detect[i],]$size) { temp.dat <- rbind(temp.dat, temp.dat[1,]) } obs.dat <- rbind(obs.dat, temp.dat[-1,]) } return(obs.dat) }