#set of functions that are useful for analyzing the zebra mussel survey dataset ##formats raw data, then estimates densities using distance survey ##inputs are the lake names used to extract info from survey (curr.lake) and transect (curr.lake2) data. ##output is an object that contains estimated density, probability of detection, data summaries, survey information, and mrds output. distance.dens.est <- function(curr.lake, curr.lake2) { distance.dat <- read_csv(file="Encounters - Distance.csv", show_col_types = F) distance.dat <- distance.dat %>% subset(`Lake name` == curr.lake[1]) distance.dat <- distance.dat[order(distance.dat$`Transect #`),] distance.dat <- dplyr::rename(distance.dat, size="Number of mussels in cluster", distance="Perpendicular distance from transect (cm)") distance.dat$distance <- distance.dat$distance/100 if(curr.lake == "Little Birch Lake") { distance.dat <- distance.dat %>% filter(`Transect #` > 11) #ensure just the counts from before the reproductive event are included in this analysis } #remove 'empty' observations, these are transects that had no detections. sometimes coded if(any(is.na(distance.dat$`Distance along transect (m)`))) { rm.vec <- which(is.na(distance.dat$`Distance along transect (m)`)) distance.dat <- distance.dat[-rm.vec,] } varS <- var(distance.dat$size)/ length(distance.dat$size) eS <- mean(distance.dat$size) distanceorig.dat <- distance.dat transect.dat <- read_csv(file="Transect datasheets.csv", show_col_types = F) transect.dat <- transect.dat %>% subset(`Lake name:` == curr.lake2[1]) transect.dat <- transect.dat %>% subset(`Survey type` == "Double observer distance") transect.dat <- transect.dat[order(transect.dat$"Transect number"),] setup.time <- 60^2*hour(transect.dat$`Setup time`) + 60*minute(transect.dat$`Setup time`) + second(transect.dat$`Setup time`) hab.time <- 60^2*hour(transect.dat$`Habitat time`) + 60*minute(transect.dat$`Habitat time`) + second(transect.dat$`Habitat time`) enc.time <- 60^2*hour(transect.dat$`Encounter time`) + 60*minute(transect.dat$`Encounter time`) + second(transect.dat$`Encounter time`) trans.length <- sum(transect.dat$`Transect length (if transect survey)`) trans.area <- trans.length*2 distance.dat <- distance.dat %>% mutate(detected = rep(1, dim(distance.dat)[1]), object=1:dim(distance.dat)[1]) distance.dat <- create.removal.Observer(transect.dat=transect.dat, obs.dat=distance.dat) dis.totdetect <- dim(distance.dat)[1] distance.dat$`Observer name` <- as.factor(distance.dat$`Observer name`) if(curr.lake == "Little Birch Lake") { dis.detect.model <- ddf(method="rem", dsmodel=~cds(key="hn", formula=~1), mrmodel=~glm(formula=~1), data=distance.dat, meta.data=list(width=1)) #hr appears to fit better but estimates are consistent with hn } else { dis.detect.model <- ddf(method="rem", dsmodel=~cds(key="hn"), mrmodel=~glm(formula=~1), data=distance.dat, meta.data=list(width=1)) } phat <- summary(dis.detect.model)$average.p phat.se <- summary(dis.detect.model)$average.p.se[1,1] #estimate density ##assume poisson model count.vec <- detect.vec <- rep(NA, 15) for(i in transect.dat$`Transect number`) { count.vec[i] <- sum(distanceorig.dat[which(distanceorig.dat$`Transect #` == i),]$size) detect.vec[i] <- dim(distanceorig.dat[which(distanceorig.dat$`Transect #` == i),])[1] } names(count.vec) <- names(detect.vec) <- transect.dat$`Transect number` if(any(is.na(count.vec))) { count.vec <- count.vec[-which(is.na(count.vec))] detect.vec <- detect.vec[-which(is.na(detect.vec))] } area.vec <- t(as.matrix(2*transect.dat$`Transect length (if transect survey)`))[1,] names(area.vec) <- transect.dat$`Transect number` lambda <- sum(count.vec, na.rm=T)/sum(area.vec) dhat <- dis.detect.model$Nhat/sum(area.vec)*eS dhat.detect <- sum(detect.vec/phat, na.rm=TRUE)/sum(area.vec) df <- data.frame(Detections=detect.vec, Mussels=count.vec, D=dhat, Time=setup.time+hab.time+enc.time, t.set=setup.time, t.hab=hab.time, t.enc=enc.time, Area=area.vec, Length=area.vec/2, Type=rep("Distance", length(area.vec)), Lake=rep(curr.lake, length(area.vec)), Lat=transect.dat$`GPS latitude`, Long=transect.dat$`GPS longitude`) K <- length(detect.vec) l <- area.vec/2 L <- sum(l) n <- detect.vec N <- sum(detect.vec) var.n <- L/(K-1)*sum(l*(n/l - N/L)^2) var.n <- L/(K-1)*sum(l*(n/l - N/L)^2) dhat.se <- sqrt(dhat^2*(var.n/N^2 + varS/K/eS^2 + phat.se^2/phat^2)) return.list <- list(Dhat=dhat, Dhat.se=dhat.se, phat=phat, phat.se=phat.se, Transects=length(area.vec), Area=area.vec, Detections=detect.vec, Mussels=count.vec, Time=setup.time+hab.time+enc.time, t.set=setup.time, t.hab=hab.time, t.enc=enc.time, df=df, ddf=dis.detect.model, distance.dat=distance.dat, eS=eS, varS=varS) return(return.list) } ##estimate density from quadrats ##inputs are the lake names used to extract info from survey (curr.lake) and transect (curr.lake2) data as well as quadrat size. ##output is an object that contains estimate d density, probability of detection, data summaries, and survey information. quadrat.dens.est <- function(curr.lake, curr.lake2, quad.side=0.5) { quadrat.dat <- read_csv(file="Encounters - Quadrats.csv", show_col_types = F) quadrat.dat <- quadrat.dat %>% subset(`Lake name` == curr.lake) if(curr.lake == "Little Birch Lake") { quadrat.dat <- quadrat.dat %>% filter(`Transect #` > 11) #ensure just the counts from before the reproductive event are included in this analysis } quadrat.dat <- quadrat.dat[order(quadrat.dat$`Transect #`),] if(any(is.na(quadrat.dat$`Number of mussels in quadrat`))) { quadrat.dat$`Number of mussels in quadrat`[which(is.na(quadrat.dat$`Number of mussels in quadrat`))] <- 0 } transect.dat <- read_csv(file="Transect datasheets.csv", show_col_types = F) transect.dat <- transect.dat %>% subset(`Lake name:` == curr.lake2) transect.dat <- transect.dat %>% subset(`Survey type` == "Quadrat survey") transect.dat <- transect.dat[order(transect.dat$"Transect number"),] if(curr.lake == "Little Birch Lake") { transect.dat <- transect.dat %>% filter(`Transect number` > 11) #ensure just the counts from before the reproductive event are included in this analysis } setup.time <- 60^2*hour(transect.dat$`Setup time`) + 60*minute(transect.dat$`Setup time`) + second(transect.dat$`Setup time`) hab.time <- 60^2*hour(transect.dat$`Habitat time`) + 60*minute(transect.dat$`Habitat time`) + second(transect.dat$`Habitat time`) enc.time <- 60^2*hour(transect.dat$`Encounter time`) + 60*minute(transect.dat$`Encounter time`) + second(transect.dat$`Encounter time`) #get number of quadrats per transect trans.quads <- trans.counts <- rep(NA, max(quadrat.dat$`Transect #`)) for(i in unique(quadrat.dat$`Transect #`)) { trans.quads[i] <- sum(quadrat.dat$`Transect #` == i) trans.counts[i] <- sum(quadrat.dat[quadrat.dat$`Transect #` == i,]$`Number of mussels in quadrat`) } if(any(is.na(trans.quads))) { rm.vec <- which(is.na(trans.quads)) trans.quads <- trans.quads[-rm.vec] trans.counts <- trans.counts[-rm.vec] } trans.num <- dim(transect.dat)[1] quad.counts <- (quadrat.dat$`Number of mussels in quadrat`) quad.area <- quad.side^2 names(trans.counts) <- transect.dat$`Transect number` area.vec <- trans.quads*quad.area names(area.vec) <- transect.dat$`Transect number` dhat <- sum(trans.counts)/sum(area.vec) df <- data.frame(Detections=trans.counts, Mussels=trans.counts, D=trans.counts/area.vec, Time=setup.time+hab.time+enc.time, t.set=setup.time, t.hab=hab.time, t.enc=enc.time, Area=area.vec, Length=transect.dat$`Transect length (if transect survey)`, Type=rep("Quadrat", length(trans.quads)), Lake=rep(curr.lake, length(trans.quads)), Lat=transect.dat$`GPS latitude`, Long=transect.dat$`GPS longitude`) K <- length(trans.counts) l <- df$Length #sqrt(trans.quads) L <- sum(df$Length) #sum(sqrt(trans.quads)) nl <- trans.counts/l N <- sum(trans.counts) NL <- N/L var.n <- L/(K-1)* sum(l*(nl - NL)^2) dhat.se <- sqrt(dhat^2*(var.n/N^2)) quad.list <- list(Quadrats=trans.quads, Area=area.vec, Mussels=trans.counts, Dhat=dhat, Dhat.se=dhat.se, Time=setup.time + hab.time + enc.time, df=df) return(quad.list) } ##formats raw data, then estimates densities using removal survey ##inputs are the lake names used to extract info from survey (curr.lake) and transect (curr.lake2) data. ##output is an object that contains estimated density, probability of detection, data summaries, survey information, and mrds output. removal.dens.est <- function(curr.lake, curr.lake2) { removal.dat <- read_csv(file="Encounters - Removal.csv", show_col_types = F) removal.dat <- removal.dat %>% subset(`Lake name` == curr.lake) removal.dat <- removal.dat[order(removal.dat$`Transect #`),] removal.dat <- dplyr::rename(removal.dat, size="Number of mussels in cluster") removalorig.dat <- removal.dat if(curr.lake == "Little Birch Lake") { removal.dat <- subset(removal.dat, `Transect #` == 15 | `Transect #` == 14 | `Transect #` == 13 | `Transect #` == 12) } if(any(removal.dat$size==0)) { removal.dat <- removal.dat[-which(removal.dat$size==0),] } if(any(is.na(removal.dat$size))) { removal.dat <- removal.dat[-which(is.na(removal.dat$size)),] } eS <- mean(removal.dat$size, na.rm=T) varS <- var(removal.dat$size, na.rm=T)/length(removal.dat$size) transect.dat <- read_csv(file="Transect datasheets.csv", show_col_types = F) transect.dat <- transect.dat %>% subset(`Lake name:` == curr.lake2) transect.dat <- transect.dat %>% subset(`Survey type` == "Double observer no distance") transect.dat <- transect.dat[order(transect.dat$"Transect number"),] if(curr.lake == "Little Birch Lake") { transect.dat <- subset(transect.dat, `Transect number` == 15 | `Transect number` == 14 | `Transect number` == 13 | `Transect number` == 12) } setup.time <- 60^2*hour(transect.dat$`Setup time`) + 60*minute(transect.dat$`Setup time`) + second(transect.dat$`Setup time`) hab.time <- 60^2*hour(transect.dat$`Habitat time`) + 60*minute(transect.dat$`Habitat time`) + second(transect.dat$`Habitat time`) enc.time <- 60^2*hour(transect.dat$`Encounter time`) + 60*minute(transect.dat$`Encounter time`) + second(transect.dat$`Encounter time`) trans.length <- transect.dat$`Transect length (if transect survey)` trans.area <- trans.length #get number of obsevations made by primary and secondary observers count.primary <- count.secondary <- detect.primary <- detect.secondary <- rep(NA, max(transect.dat$`Transect number`)) for(i in unique(transect.dat$`Transect number`)) { curr.prim <- transect.dat[transect.dat$`Transect number` == i,]$`Primary observer (double observer survey)` curr.dat <- removal.dat[removal.dat$`Transect #`== i,] count.primary[i] <- max(sum(curr.dat[curr.dat$`Observer name` == curr.prim,]$size), 0, na.rm=T) count.secondary[i] <- max(sum(curr.dat[curr.dat$`Observer name` != curr.prim,]$size), 0, na.rm=T) detect.primary[i] <- max(dim(curr.dat[curr.dat$`Observer name` == curr.prim,])[1], 0, na.rm=T) detect.secondary[i] <- max(dim(curr.dat[curr.dat$`Observer name` != curr.prim,])[1], 0, na.rm=T) } if(any(is.na(count.primary))) { rm.vec <- which(is.na(count.primary)) count.primary <- count.primary[-rm.vec] count.secondary <- count.secondary[-rm.vec] detect.primary <- detect.primary[-rm.vec] detect.secondary <- detect.secondary[-rm.vec] } removal.dat <- removal.dat %>% mutate(detected = rep(1, dim(removal.dat)[1]), distance=rep(0.99, dim(removal.dat)[1]), object=1:dim(removal.dat)[1]) removal.dat$object <- 1:dim(removal.dat)[1] removal.dat <- create.removal.Observer(transect.dat=transect.dat, obs.dat=removal.dat) removalDetect.model <- ddf(method="rem.fi", mrmodel=~glm(formula=~1), dsmodel=~cds(key="unif"), data=removal.dat, meta.data=list(width=1)) phat <- summary(removalDetect.model)$average.p0.1 phat.se <- summary(removalDetect.model)$average.p0.1.se[1,1] count.vec <- count.primary + count.secondary detect.vec <- detect.primary + detect.secondary count.vec <- t(as.matrix(count.vec)) dimnames(count.vec)[[2]] <- transect.dat$`Transect number` area.vec <- trans.length names(area.vec) <- transect.dat$`Transect number` trans.area <- sum(transect.dat$`Transect length (if transect survey)`) dhat <- removalDetect.model$Nhat/trans.area*eS df <- data.frame(Detections=detect.vec, Mussels=count.vec[1,], D=dhat, Time=setup.time+hab.time+enc.time, t.set=setup.time, t.hab=hab.time, t.enc=enc.time, Area=area.vec, Length=area.vec, Type=rep("removal", length(trans.area)), Lake=rep(curr.lake, length(trans.area)), Lat=transect.dat$`GPS latitude`, Long=transect.dat$`GPS longitude`) K <- length(detect.vec) l <- area.vec L <- sum(l) n <- detect.vec N <- sum(detect.vec) var.n <- L/(K-1)*sum(l*(n/l - N/L)^2) dhat.se <- sqrt(dhat^2*(var.n/N^2 + varS/K/eS^2 + phat.se^2/phat^2)) return.list <- list(Dhat=dhat, Dhat.se=dhat.se, phat=phat, phat.se=phat.se, Area=area.vec, Detections=detect.primary+detect.secondary, Mussels=count.vec, Time=setup.time+hab.time+enc.time, t.set=setup.time, t.hab=hab.time, t.enc=enc.time, df=df, ddf=removalDetect.model, eS=eS, varS=varS) return(return.list) } #primary coded as 1 #secondary coded as 2 #secondary sees all that 1 does but then a bit more #creates create.removal.Observer <- function(transect.dat, obs.dat) { primary <- transect.dat$`Primary observer (double observer survey)` secondary <- transect.dat$`Secondary observer (double observer survey)` obs.dat <- obs.dat %>% mutate(primary=primary[obs.dat$`Transect #`- min(obs.dat$`Transect #`)+1], secondary=secondary[obs.dat$`Transect #`- min(obs.dat$`Transect #`)+1], observer = rep(1, dim(obs.dat)[1])) for(i in 1:dim(obs.dat)[1]) { curr.tran <- obs.dat[i,]$`Transect #` obs.dat[i,]$observer <- as.numeric(obs.dat[i,]$`Observer name` == transect.dat[transect.dat$`Transect number` == curr.tran,]$`Primary observer (double observer survey)`) } #all observations made by secondary that were not made by primary temp01.obs <- obs.dat[obs.dat$observer==0,] if(dim(temp01.obs)[1]) { temp01.obs$observer <- 1 temp01.obs$detected <- 0 } #print(dim(temp01.obs)[1]) temp012.obs <- temp01.obs if(dim(temp012.obs)[1]) { temp012.obs$observer <- 0 temp012.obs$detected <- 1 } #obs.dat <- rbind(obs.dat, temp.obs) #copy all observations made by the primary observer to the secondary observer temp11.obs <- obs.dat[obs.dat$observer==1,] temp11.obs$observer <- 0 temp11.obs$detected <- 1 temp112.obs <- temp11.obs temp112.obs$observer <- 1 temp112.obs$detected <- 1 #obs.dat <- rbind(obs.dat, temp.obs) obs.dat <- rbind(temp01.obs, temp012.obs, temp11.obs, temp112.obs) obs.dat$observer[which(obs.dat$observer==0)] <- 2 obs.dat$primary <- as.factor(obs.dat$primary) obs.dat <- obs.dat[order(obs.dat$object),] return(obs.dat) }