#In this file, we run variation 3 on the occupancy model, as described in the manuscript. Except where noted, all else is as in the spOcc.R file, so annotations have been eliminated for brevity. 

library(spOccupancy)
library(coda)
library(stars)
library(ggplot2)
library(tidyverse)
library(rjags) 
library(runjags) 
library(sf)
library(Matrix)

ssw_data = readRDS("ssw_data_final")

#In this Deviation, we add an interaction between time since first infestation and distance to the nearest access to the occupancy level of the model. Nothing else is changed.

coords = st_coordinates(ssw_data)
ssw_data$X=coords[,1] 
ssw_data$Y=coords[,2]

min.dist = vector()
max.dist = vector()

for(l in 1:length(unique(ssw_data$lake))) {

  tmp1 = filter(ssw_data, ssw_data$lake == unique(ssw_data$lake)[l]) %>% distinct(geometry)
  
  tmp.dists = dist(st_coordinates(tmp1))
 
  min.dist = c(min(tmp.dists, na.rm = T), min.dist)
  
  max.dist = c(max(tmp.dists, na.rm=T), max.dist)
}

max.mindist = max(min.dist)
max.maxdist = max(max.dist)

ssw_data = ssw_data %>% 
  st_drop_geometry() %>% 
  dplyr::arrange(pt.id, survey_year, survey_dateStart)

ssw_data = ssw_data %>% mutate(depth_m = depth_ft / 3.281)


# Getting y and det.covs ---------------------------------------------------------

num_pts = max(ssw_data$pt.id)
num_years = max(ssw_data$survey_year - min(ssw_data$survey_year) + 1)
num_reps = ssw_data %>% 
  group_by(pt.id, survey_year) %>% 
  summarize(count = n())
num_reps = max(num_reps$count)

y = array(NA, dim=c(num_pts, num_years, num_reps))
plant_density = array(NA, dim=c(num_pts, num_years, num_reps))
chara_density = array(NA, dim=c(num_pts, num_years, num_reps))
day_of_year = array(NA, dim=c(num_pts, num_years, num_reps))

for(p in 1:num_pts) {

  tmp.dat1 = ssw_data %>%
    filter(pt.id == p)

  years2for = unique(tmp.dat1$survey_year)

  for(yr in 1:length(years2for)) {

    tmp.dat2 = tmp.dat1 %>%
      filter(survey_year == years2for[yr])

    surveys2for = unique(tmp.dat2$survey_dateStart)

    for(s in 1:length(surveys2for)) {

      tmp.dat3 = tmp.dat2 %>%
        filter(survey_dateStart == surveys2for[s])

      this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1

      y[p, this_year, s] = tmp.dat3$SSW
      plant_density[p, this_year, s] = tmp.dat3$Plant.Density
      chara_density[p, this_year, s] = tmp.dat3$Characeae
      day_of_year[p, this_year, s] = tmp.dat3$days0

    }
  }
  if(p %% 100 == 0) { print(p) } 
}

det.covs = list(
  plant_density = plant_density,
  chara_density = chara_density,
  day_of_year = day_of_year
)

# saveRDS(det.covs, "det_covs")
# det.covs = readRDS("det_covs")
# 
# saveRDS(y, "y_mod")
# y = readRDS("y_mod")

# Getting occ.covs --------------------------------------------------------

depth = array(NA, dim=c(num_pts,1))
access_numPUB = array(NA, dim=c(num_pts,1))
access_numPRIV = array(NA, dim=c(num_pts,1))
fetch = array(NA, dim=c(num_pts,1))
dist2access = array(NA, dim=c(num_pts,1))
inf_year = array(NA, dim=c(num_pts, num_years))
access_type = array(NA, dim=c(num_pts,1))
lake = array(NA, dim=c(num_pts,1))

 for(p in 1:num_pts) {

   tmp.dat1 = ssw_data %>%
     filter(pt.id == p)

   depth[p] = median(tmp.dat1$depth_m)
   access_numPUB[p] = median(tmp.dat1$access.numPUB)
   access_numPRIV[p] = median(tmp.dat1$access.numPRIV)
   fetch[p] = median(tmp.dat1$max.fetch)
   dist2access[p] = median(tmp.dat1$min.dist)
   access_type[p] = first(tmp.dat1$type)
   lake[p] = as.numeric(as.factor(first(tmp.dat1$lake)))

   years2for = unique(tmp.dat1$survey_year)

   for(yr in 1:length(years2for)) {

    tmp.dat2 = tmp.dat1 %>%
       filter(survey_year == years2for[yr])

     if(nrow(tmp.dat2) > 1) {
       tmp.dat3 = tmp.dat2 %>%
       group_by(pt.id, survey_year) %>%
       summarize(inf_year = mean(inf.year),
                 .groups = "drop")
     } else {
        tmp.dat3 = tmp.dat2 %>%
           rename(inf_year = inf.year)
       }

     this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1

     inf_year[p, this_year] = tmp.dat3$inf_year

   }
   if(p %% 100 == 0) { print(p) } 
 }

depthxfetch = scale(depth) * scale(fetch)

 for(row in 1:nrow(inf_year)) { 
   for(col in 1:ncol(inf_year)) {

     if(is.na(inf_year[row,col])) {

       first.val.spot = min(which(!is.na(inf_year[row,])))
       first.val = inf_year[row, first.val.spot]

      current.val = first.val - (first.val.spot - col)

       inf_year[row, col:first.val.spot] = current.val:first.val
     }
   }
 }

#We also need to create the year x dist interaction term, which we do using apply so that we can ensure the scaling happens correctly.
yearxdist = apply(scale(inf_year), 2, function(x) {
  x * scale(c(dist2access))
})

 access_type[access_type == "PRIVATE"] = 1
 access_type[access_type == "PUBLIC"] = 0
 access_type = as.numeric(access_type)

 occ.covs =  list(
   depth = depth,
   access_numPUB = access_numPUB,
   access_numPRIV = access_numPRIV,
   fetch = fetch,
   dist2access = dist2access,
   inf_year = inf_year,
   depthxfetch = depthxfetch,
   access_type = access_type,
   lake = as.numeric(lake),
   yearxdist = yearxdist
   )

# saveRDS(occ.covs, "occ_covs")
# occ.covs = readRDS("occ_covs")

condensed_dat = ssw_data %>% 
  group_by(pt.id) %>% 
  summarize(X = mean(X),
            Y = mean(Y)) %>% 
  arrange(pt.id)

coords = matrix(NA, 
                nrow=nrow(condensed_dat),
                ncol=2)
coords[,1] = condensed_dat$X
coords[,2] = condensed_dat$Y

ssw.occ.formula = ~ scale(depth) + scale(fetch) + scale(dist2access) +
  access_type + scale(access_numPUB) + scale(access_numPRIV) + 
  depthxfetch + scale(inf_year) + yearxdist + (1|lake) #Note addition here.
ssw.det.formula = ~ scale(plant_density) + scale(day_of_year) + scale(chara_density)

ssw.mod.dat = list(
  y = y,
  occ.covs = occ.covs,
  det.covs = det.covs,
  coords = coords
)

ssw.inits = list(
  alpha = c(0,0,0,0), 
  beta = c(0,0,0,0,0,0,0,0,0,0), #Extra 0 added here.
  z = apply(y, c(1, 2), function(a) { as.numeric(sum(a, na.rm = TRUE) > 0) }),
  sigma.sq = 2, 
  phi = mean(c(3/(max.maxdist/2), 1/(max.mindist))),
  w = rep(0, nrow(y)),
  sigma.sq.psi = 2
)

ssw.priors = list(alpha.normal = list( 
                    mean = 0, var = 2.72 
                  ),
                  beta.normal = list( 
                    mean = 0, var = 2.72
                  ),
                  phi.unif = c(3/(max.maxdist/2), 1/(max.mindist)), 
                  sigma.sq.unif = c(0.001, 10),
                  sigma.sq.psi.ig = list(a = 0.1, b = 0.1)
                )

batch.length = 25 
n.batch = 10000
n.thin = 50 
n.chains = 3 
n.neighbors = 15 

out.mod1 = stPGOcc(ssw.occ.formula, 
        ssw.det.formula,
        ssw.mod.dat,
        inits = ssw.inits,
        n.batch = n.batch,
        batch.length = batch.length,
        priors = ssw.priors,
        cov.model = "exponential",
        n.neighbors = n.neighbors,
        n.thin = n.thin,
        n.chains = n.chains,
        n.report = 100, 
        n.omp.threads = 4,
        ar1 = FALSE) 


## Analytics:
summary(out.mod1)
# 
# #Assess whether effective sample size for the spatial random effects + occupancy intercept are sufficiently high.
# beta.0.w.samples <- out.mod1$beta.samples[, 1] + out.mod1$w.samples
# ess.beta.0.w <- coda::effectiveSize(beta.0.w.samples)
# summary(ess.beta.0.w)
# 

# beta.samples <- out.mod1$beta.samples
# saveRDS(beta.samples, file = 'beta-samplesEXP3.rds')
# 
# alpha.samples <- out.mod1$alpha.samples
# saveRDS(alpha.samples, file = 'alpha-samplesEXP3.rds')
# 
# z.samples <- out.mod1$z.samples
# saveRDS(z.samples, file = 'z-samplesEXP3.rds')
# 
# beta.star.samples <- out.mod1$beta.star.samples
# saveRDS(beta.star.samples, file = 'theta.samples-samplesEXP3.rds')
# 
# theta.samples <- out.mod1$theta.samples
# saveRDS(theta.samples, file = 'theta-samplesEXP3.rds')
# 
# sigma.sq.psi.samples <- out.mod1$sigma.sq.psi.samples
# saveRDS(sigma.sq.psi.samples, file = 'sigma.sq.psiEXP3.samples.rds')
# 
# #Posterior predictive checks
# ppc.out1 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 1)
# saveRDS(ppc.out1, "PPC_tukey_grp1.rds")
# 
# ppc.out2 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 2)
# saveRDS(ppc.out2, "PPC_tukey_grp2.rds")
# 
# ppc.out3 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 1)
# saveRDS(ppc.out3, "PPC_chi_grp1.rds")
# 
# ppc.out4 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 2)
# saveRDS(ppc.out4, "PPC_chi_grp2.rds")
# 
# summary(ppc.out1); summary(ppc.out2); summary(ppc.out3); summary(ppc.out4)