#In this file, we run variation 2 on the occupancy model, as described in the manuscript. Except where noted, all else is as in the spOcc.R file, so annotations have been eliminated for brevity. library(spOccupancy) library(coda) library(stars) library(ggplot2) library(tidyverse) library(rjags) library(runjags) library(sf) library(Matrix) ssw_data = readRDS("ssw_data_final") #In this Deviation, we remove all observations from Wind lake, which had a particularly infested private access. ssw_data = ssw_data %>% filter(!lake %in% c("Wind")) # This cuts out some lakes and pts, so we need to renumber all the pt ids here. max.pt = length(unique(ssw_data$pt.id)) ##The highest value we should have afterwards. ssw_data$pt.id2 = NA ## Temp column for(i in max(ssw_data$pt.id):1) { #Go backwards from the highest point if(length(which(ssw_data$pt.id == i)) > 0) { #Assuming something of this value still exists... ssw_data$pt.id2[ssw_data$pt.id == i] = max.pt ##Make this equal to whatever the max.pt value is at this moment. max.pt = max.pt-1 #Reduce the max point value by 1. } } ssw_data$pt.id = ssw_data$pt.id2 #Overwrite the pt id data with the new ones. #Relative to those in the "main" model, these min/max distance values are probably somewhat different as a result of losing a lake. However, the ways in which they are calculated are the same. coords = st_coordinates(ssw_data) ssw_data$X=coords[,1] ssw_data$Y=coords[,2] min.dist = vector() max.dist = vector() for(l in 1:length(unique(ssw_data$lake))) { tmp1 = filter(ssw_data, ssw_data$lake == unique(ssw_data$lake)[l]) %>% distinct(geometry) tmp.dists = dist(st_coordinates(tmp1)) min.dist = c(min(tmp.dists, na.rm = T), min.dist) max.dist = c(max(tmp.dists, na.rm=T), max.dist) } max.mindist = max(min.dist) max.maxdist = max(max.dist) ssw_data = ssw_data %>% st_drop_geometry() %>% dplyr::arrange(pt.id, survey_year, survey_dateStart) ssw_data = ssw_data %>% mutate(depth_m = depth_ft / 3.281) # Getting y and det.covs --------------------------------------------------------- num_pts = max(ssw_data$pt.id) num_years = max(ssw_data$survey_year - min(ssw_data$survey_year) + 1) num_reps = ssw_data %>% group_by(pt.id, survey_year) %>% summarize(count = n()) num_reps = max(num_reps$count) y = array(NA, dim=c(num_pts, num_years, num_reps)) plant_density = array(NA, dim=c(num_pts, num_years, num_reps)) chara_density = array(NA, dim=c(num_pts, num_years, num_reps)) day_of_year = array(NA, dim=c(num_pts, num_years, num_reps)) for(p in 1:num_pts) { tmp.dat1 = ssw_data %>% filter(pt.id == p) years2for = unique(tmp.dat1$survey_year) for(yr in 1:length(years2for)) { tmp.dat2 = tmp.dat1 %>% filter(survey_year == years2for[yr]) surveys2for = unique(tmp.dat2$survey_dateStart) for(s in 1:length(surveys2for)) { tmp.dat3 = tmp.dat2 %>% filter(survey_dateStart == surveys2for[s]) this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1 y[p, this_year, s] = tmp.dat3$SSW plant_density[p, this_year, s] = tmp.dat3$Plant.Density chara_density[p, this_year, s] = tmp.dat3$Characeae day_of_year[p, this_year, s] = tmp.dat3$days0 } } if(p %% 100 == 0) { print(p) } } det.covs = list( plant_density = plant_density, chara_density = chara_density, day_of_year = day_of_year ) # saveRDS(det.covs, "det_covs") # det.covs = readRDS("det_covs") # # saveRDS(y, "y_mod") # y = readRDS("y_mod") # Getting occ.covs -------------------------------------------------------- depth = array(NA, dim=c(num_pts,1)) access_numPUB = array(NA, dim=c(num_pts,1)) access_numPRIV = array(NA, dim=c(num_pts,1)) fetch = array(NA, dim=c(num_pts,1)) dist2access = array(NA, dim=c(num_pts,1)) inf_year = array(NA, dim=c(num_pts, num_years)) access_type = array(NA, dim=c(num_pts,1)) lake = array(NA, dim=c(num_pts,1)) for(p in 1:num_pts) { tmp.dat1 = ssw_data %>% filter(pt.id == p) depth[p] = median(tmp.dat1$depth_m) access_numPUB[p] = median(tmp.dat1$access.numPUB) access_numPRIV[p] = median(tmp.dat1$access.numPRIV) fetch[p] = median(tmp.dat1$max.fetch) dist2access[p] = median(tmp.dat1$min.dist) access_type[p] = first(tmp.dat1$type) lake[p] = as.numeric(as.factor(first(tmp.dat1$lake))) years2for = unique(tmp.dat1$survey_year) for(yr in 1:length(years2for)) { tmp.dat2 = tmp.dat1 %>% filter(survey_year == years2for[yr]) if(nrow(tmp.dat2) > 1) { tmp.dat3 = tmp.dat2 %>% group_by(pt.id, survey_year) %>% summarize(inf_year = mean(inf.year), .groups = "drop") } else { tmp.dat3 = tmp.dat2 %>% rename(inf_year = inf.year) } this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1 inf_year[p, this_year] = tmp.dat3$inf_year } if(p %% 100 == 0) { print(p) } } depthxfetch = scale(depth) * scale(fetch) for(row in 1:nrow(inf_year)) { for(col in 1:ncol(inf_year)) { if(is.na(inf_year[row,col])) { first.val.spot = min(which(!is.na(inf_year[row,]))) first.val = inf_year[row, first.val.spot] current.val = first.val - (first.val.spot - col) inf_year[row, col:first.val.spot] = current.val:first.val } } } access_type[access_type == "PRIVATE"] = 1 access_type[access_type == "PUBLIC"] = 0 access_type = as.numeric(access_type) occ.covs = list( depth = depth, access_numPUB = access_numPUB, access_numPRIV = access_numPRIV, fetch = fetch, dist2access = dist2access, inf_year = inf_year, depthxfetch = depthxfetch, access_type = access_type, lake = as.numeric(lake) ) # saveRDS(occ.covs, "occ_covs") # occ.covs = readRDS("occ_covs") condensed_dat = ssw_data %>% group_by(pt.id) %>% summarize(X = mean(X), Y = mean(Y)) %>% arrange(pt.id) coords = matrix(NA, nrow=nrow(condensed_dat), ncol=2) coords[,1] = condensed_dat$X coords[,2] = condensed_dat$Y ssw.occ.formula = ~ scale(depth) + scale(fetch) + scale(dist2access) + access_type + scale(access_numPUB) + scale(access_numPRIV) + depthxfetch + scale(inf_year) + (1|lake) ssw.det.formula = ~ scale(plant_density) + scale(day_of_year) + scale(chara_density) ssw.mod.dat = list( y = y, occ.covs = occ.covs, det.covs = det.covs, coords = coords ) ssw.inits = list( alpha = c(0,0,0,0), beta = c(0,0,0,0,0,0,0,0, 0), z = apply(y, c(1, 2), function(a) { as.numeric(sum(a, na.rm = TRUE) > 0) }), sigma.sq = 2, phi = mean(c(3/(max.maxdist/2), 1/(max.mindist))), w = rep(0, nrow(y)), sigma.sq.psi = 2 ) ssw.priors = list(alpha.normal = list( mean = 0, var = 2.72 ), beta.normal = list( mean = 0, var = 2.72 ), phi.unif = c(3/(max.maxdist/2), 1/(max.mindist)), sigma.sq.unif = c(0.001, 10), sigma.sq.psi.ig = list(a = 0.1, b = 0.1) ) batch.length = 25 n.batch = 10000 n.thin = 50 n.chains = 3 n.neighbors = 15 out.mod1 = stPGOcc(ssw.occ.formula, ssw.det.formula, ssw.mod.dat, inits = ssw.inits, n.batch = n.batch, batch.length = batch.length, priors = ssw.priors, cov.model = "exponential", n.neighbors = n.neighbors, n.thin = n.thin, n.chains = n.chains, n.report = 100, n.omp.threads = 4, ar1 = FALSE) ## Analytics: summary(out.mod1) # beta.samples <- out.mod1$beta.samples # saveRDS(beta.samples, file = 'beta-samplesEXP2.rds') # # alpha.samples <- out.mod1$alpha.samples # saveRDS(alpha.samples, file = 'alpha-samplesEXP2.rds') # # z.samples <- out.mod1$z.samples # saveRDS(z.samples, file = 'z-samplesEXP2.rds') # # beta.star.samples <- out.mod1$beta.star.samples # saveRDS(beta.star.samples, file = 'theta.samples-samplesEXP2.rds') # # theta.samples <- out.mod1$theta.samples # saveRDS(theta.samples, file = 'theta-samplesEXP2.rds') # # sigma.sq.psi.samples <- out.mod1$sigma.sq.psi.samples # saveRDS(sigma.sq.psi.samples, file = 'sigma.sq.psiEXP2.samples.rds') # # #Posterior predictive checks # ppc.out1 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 1) # saveRDS(ppc.out1, "PPC_tukey_grp1.rds") # # ppc.out2 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 2) # saveRDS(ppc.out2, "PPC_tukey_grp2.rds") # # ppc.out3 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 1) # saveRDS(ppc.out3, "PPC_chi_grp1.rds") # # ppc.out4 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 2) # saveRDS(ppc.out4, "PPC_chi_grp2.rds") # # summary(ppc.out1); summary(ppc.out2); summary(ppc.out3); summary(ppc.out4)