#In this file, we run variation 1 on the occupancy model, as described in the manuscript. Except where noted, all else is as in the spOcc.R file, so annotations have been eliminated for brevity. library(spOccupancy) library(coda) library(stars) library(ggplot2) library(tidyverse) library(rjags) library(runjags) library(sf) library(Matrix) ssw_data = readRDS("ssw_data_final") #In this Deviation, we remove the 5 lakes with no documented detections of starry stonewort across any survey to see if this causes any notable changes in our parameter estimates. ssw_data = ssw_data %>% filter(!lake %in% c("Carnelian", "Grand", "LowerNemahbin", "Pleasant", "Sylvia")) # This cuts out some lakes and pts, so we need to renumber all the pt ids here. max.pt = length(unique(ssw_data$pt.id)) ##The highest value we should have afterwards. ssw_data$pt.id2 = NA ## Temp column for these new ids. for(i in max(ssw_data$pt.id):1) { #Go backwards from the highest point if(length(which(ssw_data$pt.id == i)) > 0) { #Assuming something of this value still exists... ssw_data$pt.id2[ssw_data$pt.id == i] = max.pt ##Make this equal to whatever the max.pt value is at this moment. max.pt = max.pt-1 #Reduce the max point value by 1. } } ssw_data$pt.id = ssw_data$pt.id2 #Overwrite the pt id data with the new ones. #Relative to those in the "main" model, these min/max distance values are probably somewhat different as a result of losing several lakes. However, the ways in which they are calculated are the same. coords = st_coordinates(ssw_data) ssw_data$X=coords[,1] ssw_data$Y=coords[,2] min.dist = vector() max.dist = vector() for(l in 1:length(unique(ssw_data$lake))) { tmp1 = filter(ssw_data, ssw_data$lake == unique(ssw_data$lake)[l]) %>% distinct(geometry) tmp.dists = dist(st_coordinates(tmp1)) min.dist = c(min(tmp.dists, na.rm = T), min.dist) max.dist = c(max(tmp.dists, na.rm=T), max.dist) } max.mindist = max(min.dist) max.maxdist = max(max.dist) ssw_data = ssw_data %>% st_drop_geometry() %>% dplyr::arrange(pt.id, survey_year, survey_dateStart) ssw_data = ssw_data %>% mutate(depth_m = depth_ft / 3.281) # Getting y and det.covs --------------------------------------------------------- num_pts = max(ssw_data$pt.id) num_years = max(ssw_data$survey_year - min(ssw_data$survey_year) + 1) num_reps = ssw_data %>% group_by(pt.id, survey_year) %>% summarize(count = n()) num_reps = max(num_reps$count) y = array(NA, dim=c(num_pts, num_years, num_reps)) plant_density = array(NA, dim=c(num_pts, num_years, num_reps)) chara_density = array(NA, dim=c(num_pts, num_years, num_reps)) day_of_year = array(NA, dim=c(num_pts, num_years, num_reps)) for(p in 1:num_pts) { tmp.dat1 = ssw_data %>% filter(pt.id == p) years2for = unique(tmp.dat1$survey_year) for(yr in 1:length(years2for)) { tmp.dat2 = tmp.dat1 %>% filter(survey_year == years2for[yr]) surveys2for = unique(tmp.dat2$survey_dateStart) for(s in 1:length(surveys2for)) { tmp.dat3 = tmp.dat2 %>% filter(survey_dateStart == surveys2for[s]) this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1 y[p, this_year, s] = tmp.dat3$SSW plant_density[p, this_year, s] = tmp.dat3$Plant.Density chara_density[p, this_year, s] = tmp.dat3$Characeae day_of_year[p, this_year, s] = tmp.dat3$days0 } } if(p %% 100 == 0) { print(p) } } det.covs = list( plant_density = plant_density, chara_density = chara_density, day_of_year = day_of_year ) # saveRDS(det.covs, "det_covs") # det.covs = readRDS("det_covs") # saveRDS(y, "y_mod") # y = readRDS("y_mod") # Getting occ.covs -------------------------------------------------------- depth = array(NA, dim=c(num_pts,1)) access_numPUB = array(NA, dim=c(num_pts,1)) access_numPRIV = array(NA, dim=c(num_pts,1)) fetch = array(NA, dim=c(num_pts,1)) dist2access = array(NA, dim=c(num_pts,1)) inf_year = array(NA, dim=c(num_pts, num_years)) access_type = array(NA, dim=c(num_pts,1)) lake = array(NA, dim=c(num_pts,1)) for(p in 1:num_pts) { tmp.dat1 = ssw_data %>% filter(pt.id == p) depth[p] = median(tmp.dat1$depth_m) access_numPUB[p] = median(tmp.dat1$access.numPUB) access_numPRIV[p] = median(tmp.dat1$access.numPRIV) fetch[p] = median(tmp.dat1$max.fetch) dist2access[p] = median(tmp.dat1$min.dist) access_type[p] = first(tmp.dat1$type) lake[p] = as.numeric(as.factor(first(tmp.dat1$lake))) years2for = unique(tmp.dat1$survey_year) for(yr in 1:length(years2for)) { tmp.dat2 = tmp.dat1 %>% filter(survey_year == years2for[yr]) if(nrow(tmp.dat2) > 1) { tmp.dat3 = tmp.dat2 %>% group_by(pt.id, survey_year) %>% summarize(inf_year = mean(inf.year), .groups = "drop") } else { tmp.dat3 = tmp.dat2 %>% rename(inf_year = inf.year) } this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1 inf_year[p, this_year] = tmp.dat3$inf_year } if(p %% 100 == 0) { print(p) } } depthxfetch = scale(depth) * scale(fetch) for(row in 1:nrow(inf_year)) { for(col in 1:ncol(inf_year)) { if(is.na(inf_year[row,col])) { first.val.spot = min(which(!is.na(inf_year[row,]))) first.val = inf_year[row, first.val.spot] current.val = first.val - (first.val.spot - col) inf_year[row, col:first.val.spot] = current.val:first.val } } } access_type[access_type == "PRIVATE"] = 1 access_type[access_type == "PUBLIC"] = 0 access_type = as.numeric(access_type) occ.covs = list( depth = depth, access_numPUB = access_numPUB, access_numPRIV = access_numPRIV, fetch = fetch, dist2access = dist2access, inf_year = inf_year, depthxfetch = depthxfetch, access_type = access_type, lake = as.numeric(lake) ) # saveRDS(occ.covs, "occ_covs") # occ.covs = readRDS("occ_covs") condensed_dat = ssw_data %>% group_by(pt.id) %>% summarize(X = mean(X), Y = mean(Y)) %>% arrange(pt.id) coords = matrix(NA, nrow=nrow(condensed_dat), ncol=2) coords[,1] = condensed_dat$X coords[,2] = condensed_dat$Y ssw.occ.formula = ~ scale(depth) + scale(fetch) + scale(dist2access) + access_type + scale(access_numPUB) + scale(access_numPRIV) + depthxfetch + scale(inf_year) + (1|lake) ssw.det.formula = ~ scale(plant_density) + scale(day_of_year) + scale(chara_density) ssw.mod.dat = list( y = y, occ.covs = occ.covs, det.covs = det.covs, coords = coords ) ssw.inits = list( alpha = c(0,0,0,0), beta = c(0,0,0,0,0,0,0,0, 0), z = apply(y, c(1, 2), function(a) { as.numeric(sum(a, na.rm = TRUE) > 0) }), sigma.sq = 2, phi = mean(c(3/(max.maxdist/2), 1/(max.mindist))), w = rep(0, nrow(y)), sigma.sq.psi = 2 ) ssw.priors = list(alpha.normal = list( mean = 0, var = 2.72 ), beta.normal = list( mean = 0, var = 2.72 ), phi.unif = c(3/(max.maxdist/2), 1/(max.mindist)), sigma.sq.unif = c(0.001, 10), sigma.sq.psi.ig = list(a = 0.1, b = 0.1) ) batch.length = 25 n.batch = 10000 n.thin = 50 n.chains = 3 n.neighbors = 15 out.mod1 = stPGOcc(ssw.occ.formula, ssw.det.formula, ssw.mod.dat, inits = ssw.inits, n.batch = n.batch, batch.length = batch.length, priors = ssw.priors, cov.model = "exponential", n.neighbors = n.neighbors, n.thin = n.thin, n.chains = n.chains, n.report = 100, n.omp.threads = 4, ar1 = FALSE) ## Analytics: # summary(out.mod1) # # beta.samples <- out.mod1$beta.samples # saveRDS(beta.samples, file = 'beta-samplesEXP1.rds') # # alpha.samples <- out.mod1$alpha.samples # saveRDS(alpha.samples, file = 'alpha-samplesEXP1.rds') # # z.samples <- out.mod1$z.samples # saveRDS(z.samples, file = 'z-samplesEXP1.rds') # # beta.star.samples <- out.mod1$beta.star.samples # saveRDS(beta.star.samples, file = 'theta.samples-samplesEXP1.rds') # # theta.samples <- out.mod1$theta.samples # saveRDS(theta.samples, file = 'theta-samplesEXP1.rds') # # sigma.sq.psi.samples <- out.mod1$sigma.sq.psi.samples # saveRDS(sigma.sq.psi.samples, file = 'sigma.sq.psiEXP1.samples.rds') # #Posterior predictive checks # ppc.out1 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 1) # saveRDS(ppc.out1, "PPC_tukey_grp1.rds") # # ppc.out2 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 2) # saveRDS(ppc.out2, "PPC_tukey_grp2.rds") # # ppc.out3 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 1) # saveRDS(ppc.out3, "PPC_chi_grp1.rds") # # ppc.out4 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 2) # saveRDS(ppc.out4, "PPC_chi_grp2.rds") # # summary(ppc.out1); summary(ppc.out2); summary(ppc.out3); summary(ppc.out4)