#In this file, we run variation 3 on the occupancy model, as described in the manuscript. Except where noted, all else is as in the spOcc.R file, so annotations have been eliminated for brevity. library(spOccupancy) library(coda) library(stars) library(ggplot2) library(tidyverse) library(rjags) library(runjags) library(sf) library(Matrix) ssw_data = readRDS("ssw_data_final") #In this Deviation, we add an interaction between time since first infestation and distance to the nearest access to the occupancy level of the model. Nothing else is changed. coords = st_coordinates(ssw_data) ssw_data$X=coords[,1] ssw_data$Y=coords[,2] min.dist = vector() max.dist = vector() for(l in 1:length(unique(ssw_data$lake))) { tmp1 = filter(ssw_data, ssw_data$lake == unique(ssw_data$lake)[l]) %>% distinct(geometry) tmp.dists = dist(st_coordinates(tmp1)) min.dist = c(min(tmp.dists, na.rm = T), min.dist) max.dist = c(max(tmp.dists, na.rm=T), max.dist) } max.mindist = max(min.dist) max.maxdist = max(max.dist) ssw_data = ssw_data %>% st_drop_geometry() %>% dplyr::arrange(pt.id, survey_year, survey_dateStart) ssw_data = ssw_data %>% mutate(depth_m = depth_ft / 3.281) # Getting y and det.covs --------------------------------------------------------- num_pts = max(ssw_data$pt.id) num_years = max(ssw_data$survey_year - min(ssw_data$survey_year) + 1) num_reps = ssw_data %>% group_by(pt.id, survey_year) %>% summarize(count = n()) num_reps = max(num_reps$count) y = array(NA, dim=c(num_pts, num_years, num_reps)) plant_density = array(NA, dim=c(num_pts, num_years, num_reps)) chara_density = array(NA, dim=c(num_pts, num_years, num_reps)) day_of_year = array(NA, dim=c(num_pts, num_years, num_reps)) for(p in 1:num_pts) { tmp.dat1 = ssw_data %>% filter(pt.id == p) years2for = unique(tmp.dat1$survey_year) for(yr in 1:length(years2for)) { tmp.dat2 = tmp.dat1 %>% filter(survey_year == years2for[yr]) surveys2for = unique(tmp.dat2$survey_dateStart) for(s in 1:length(surveys2for)) { tmp.dat3 = tmp.dat2 %>% filter(survey_dateStart == surveys2for[s]) this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1 y[p, this_year, s] = tmp.dat3$SSW plant_density[p, this_year, s] = tmp.dat3$Plant.Density chara_density[p, this_year, s] = tmp.dat3$Characeae day_of_year[p, this_year, s] = tmp.dat3$days0 } } if(p %% 100 == 0) { print(p) } } det.covs = list( plant_density = plant_density, chara_density = chara_density, day_of_year = day_of_year ) # saveRDS(det.covs, "det_covs") # det.covs = readRDS("det_covs") # # saveRDS(y, "y_mod") # y = readRDS("y_mod") # Getting occ.covs -------------------------------------------------------- depth = array(NA, dim=c(num_pts,1)) access_numPUB = array(NA, dim=c(num_pts,1)) access_numPRIV = array(NA, dim=c(num_pts,1)) fetch = array(NA, dim=c(num_pts,1)) dist2access = array(NA, dim=c(num_pts,1)) inf_year = array(NA, dim=c(num_pts, num_years)) access_type = array(NA, dim=c(num_pts,1)) lake = array(NA, dim=c(num_pts,1)) for(p in 1:num_pts) { tmp.dat1 = ssw_data %>% filter(pt.id == p) depth[p] = median(tmp.dat1$depth_m) access_numPUB[p] = median(tmp.dat1$access.numPUB) access_numPRIV[p] = median(tmp.dat1$access.numPRIV) fetch[p] = median(tmp.dat1$max.fetch) dist2access[p] = median(tmp.dat1$min.dist) access_type[p] = first(tmp.dat1$type) lake[p] = as.numeric(as.factor(first(tmp.dat1$lake))) years2for = unique(tmp.dat1$survey_year) for(yr in 1:length(years2for)) { tmp.dat2 = tmp.dat1 %>% filter(survey_year == years2for[yr]) if(nrow(tmp.dat2) > 1) { tmp.dat3 = tmp.dat2 %>% group_by(pt.id, survey_year) %>% summarize(inf_year = mean(inf.year), .groups = "drop") } else { tmp.dat3 = tmp.dat2 %>% rename(inf_year = inf.year) } this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1 inf_year[p, this_year] = tmp.dat3$inf_year } if(p %% 100 == 0) { print(p) } } depthxfetch = scale(depth) * scale(fetch) for(row in 1:nrow(inf_year)) { for(col in 1:ncol(inf_year)) { if(is.na(inf_year[row,col])) { first.val.spot = min(which(!is.na(inf_year[row,]))) first.val = inf_year[row, first.val.spot] current.val = first.val - (first.val.spot - col) inf_year[row, col:first.val.spot] = current.val:first.val } } } #We also need to create the year x dist interaction term, which we do using apply so that we can ensure the scaling happens correctly. yearxdist = apply(scale(inf_year), 2, function(x) { x * scale(c(dist2access)) }) access_type[access_type == "PRIVATE"] = 1 access_type[access_type == "PUBLIC"] = 0 access_type = as.numeric(access_type) occ.covs = list( depth = depth, access_numPUB = access_numPUB, access_numPRIV = access_numPRIV, fetch = fetch, dist2access = dist2access, inf_year = inf_year, depthxfetch = depthxfetch, access_type = access_type, lake = as.numeric(lake), yearxdist = yearxdist ) # saveRDS(occ.covs, "occ_covs") # occ.covs = readRDS("occ_covs") condensed_dat = ssw_data %>% group_by(pt.id) %>% summarize(X = mean(X), Y = mean(Y)) %>% arrange(pt.id) coords = matrix(NA, nrow=nrow(condensed_dat), ncol=2) coords[,1] = condensed_dat$X coords[,2] = condensed_dat$Y ssw.occ.formula = ~ scale(depth) + scale(fetch) + scale(dist2access) + access_type + scale(access_numPUB) + scale(access_numPRIV) + depthxfetch + scale(inf_year) + yearxdist + (1|lake) #Note addition here. ssw.det.formula = ~ scale(plant_density) + scale(day_of_year) + scale(chara_density) ssw.mod.dat = list( y = y, occ.covs = occ.covs, det.covs = det.covs, coords = coords ) ssw.inits = list( alpha = c(0,0,0,0), beta = c(0,0,0,0,0,0,0,0,0,0), #Extra 0 added here. z = apply(y, c(1, 2), function(a) { as.numeric(sum(a, na.rm = TRUE) > 0) }), sigma.sq = 2, phi = mean(c(3/(max.maxdist/2), 1/(max.mindist))), w = rep(0, nrow(y)), sigma.sq.psi = 2 ) ssw.priors = list(alpha.normal = list( mean = 0, var = 2.72 ), beta.normal = list( mean = 0, var = 2.72 ), phi.unif = c(3/(max.maxdist/2), 1/(max.mindist)), sigma.sq.unif = c(0.001, 10), sigma.sq.psi.ig = list(a = 0.1, b = 0.1) ) batch.length = 25 n.batch = 10000 n.thin = 50 n.chains = 3 n.neighbors = 15 out.mod1 = stPGOcc(ssw.occ.formula, ssw.det.formula, ssw.mod.dat, inits = ssw.inits, n.batch = n.batch, batch.length = batch.length, priors = ssw.priors, cov.model = "exponential", n.neighbors = n.neighbors, n.thin = n.thin, n.chains = n.chains, n.report = 100, n.omp.threads = 4, ar1 = FALSE) ## Analytics: summary(out.mod1) # # #Assess whether effective sample size for the spatial random effects + occupancy intercept are sufficiently high. # beta.0.w.samples <- out.mod1$beta.samples[, 1] + out.mod1$w.samples # ess.beta.0.w <- coda::effectiveSize(beta.0.w.samples) # summary(ess.beta.0.w) # # beta.samples <- out.mod1$beta.samples # saveRDS(beta.samples, file = 'beta-samplesEXP3.rds') # # alpha.samples <- out.mod1$alpha.samples # saveRDS(alpha.samples, file = 'alpha-samplesEXP3.rds') # # z.samples <- out.mod1$z.samples # saveRDS(z.samples, file = 'z-samplesEXP3.rds') # # beta.star.samples <- out.mod1$beta.star.samples # saveRDS(beta.star.samples, file = 'theta.samples-samplesEXP3.rds') # # theta.samples <- out.mod1$theta.samples # saveRDS(theta.samples, file = 'theta-samplesEXP3.rds') # # sigma.sq.psi.samples <- out.mod1$sigma.sq.psi.samples # saveRDS(sigma.sq.psi.samples, file = 'sigma.sq.psiEXP3.samples.rds') # # #Posterior predictive checks # ppc.out1 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 1) # saveRDS(ppc.out1, "PPC_tukey_grp1.rds") # # ppc.out2 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 2) # saveRDS(ppc.out2, "PPC_tukey_grp2.rds") # # ppc.out3 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 1) # saveRDS(ppc.out3, "PPC_chi_grp1.rds") # # ppc.out4 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 2) # saveRDS(ppc.out4, "PPC_chi_grp2.rds") # # summary(ppc.out1); summary(ppc.out2); summary(ppc.out3); summary(ppc.out4)