#In this file, we run variation 2 on the occupancy model, as described in the manuscript. Except where noted, all else is as in the spOcc.R file, so annotations have been eliminated for brevity. 

library(spOccupancy)
library(coda)
library(stars)
library(ggplot2)
library(tidyverse)
library(rjags) 
library(runjags) 
library(sf)
library(Matrix)

ssw_data = readRDS("ssw_data_final")

#In this Deviation, we remove all observations from Wind lake, which had a particularly infested private access.
ssw_data = ssw_data %>% filter(!lake %in% c("Wind"))

# This cuts out some lakes and pts, so we need to renumber all the pt ids here.

max.pt = length(unique(ssw_data$pt.id)) ##The highest value we should have afterwards.

ssw_data$pt.id2 = NA ## Temp column

for(i in max(ssw_data$pt.id):1) { #Go backwards from the highest point
  
  if(length(which(ssw_data$pt.id == i)) > 0) { #Assuming something of this value still exists...
  ssw_data$pt.id2[ssw_data$pt.id == i] = max.pt ##Make this equal to whatever the max.pt value is at this moment.
  max.pt = max.pt-1 #Reduce the max point value by 1.
  }
}

ssw_data$pt.id = ssw_data$pt.id2 #Overwrite the pt id data with the new ones. 

#Relative to those in the "main" model, these min/max distance values are probably somewhat different as a result of losing a lake. However, the ways in which they are calculated are the same. 
coords = st_coordinates(ssw_data)
ssw_data$X=coords[,1] 
ssw_data$Y=coords[,2]

min.dist = vector()
max.dist = vector()

for(l in 1:length(unique(ssw_data$lake))) {

  tmp1 = filter(ssw_data, ssw_data$lake == unique(ssw_data$lake)[l]) %>% distinct(geometry)
  
  tmp.dists = dist(st_coordinates(tmp1))
  
  min.dist = c(min(tmp.dists, na.rm = T), min.dist)

  max.dist = c(max(tmp.dists, na.rm=T), max.dist)
}

max.mindist = max(min.dist)
max.maxdist = max(max.dist)

ssw_data = ssw_data %>% 
  st_drop_geometry() %>% 
  dplyr::arrange(pt.id, survey_year, survey_dateStart)

ssw_data = ssw_data %>% mutate(depth_m = depth_ft / 3.281)

# Getting y and det.covs ---------------------------------------------------------

num_pts = max(ssw_data$pt.id)
num_years = max(ssw_data$survey_year - min(ssw_data$survey_year) + 1)
num_reps = ssw_data %>% 
  group_by(pt.id, survey_year) %>% 
  summarize(count = n())
num_reps = max(num_reps$count)

y = array(NA, dim=c(num_pts, num_years, num_reps))
plant_density = array(NA, dim=c(num_pts, num_years, num_reps))
chara_density = array(NA, dim=c(num_pts, num_years, num_reps))
day_of_year = array(NA, dim=c(num_pts, num_years, num_reps))

for(p in 1:num_pts) {

  tmp.dat1 = ssw_data %>%
    filter(pt.id == p)

  years2for = unique(tmp.dat1$survey_year)

  for(yr in 1:length(years2for)) {

    tmp.dat2 = tmp.dat1 %>%
      filter(survey_year == years2for[yr])

    surveys2for = unique(tmp.dat2$survey_dateStart)

    for(s in 1:length(surveys2for)) {

      tmp.dat3 = tmp.dat2 %>%
        filter(survey_dateStart == surveys2for[s])

      this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1

      y[p, this_year, s] = tmp.dat3$SSW
      plant_density[p, this_year, s] = tmp.dat3$Plant.Density
      chara_density[p, this_year, s] = tmp.dat3$Characeae
      day_of_year[p, this_year, s] = tmp.dat3$days0

    }
  }
  if(p %% 100 == 0) { print(p) }
}

det.covs = list(
  plant_density = plant_density,
  chara_density = chara_density,
  day_of_year = day_of_year
)

# saveRDS(det.covs, "det_covs")
# det.covs = readRDS("det_covs")
# 
# saveRDS(y, "y_mod")
# y = readRDS("y_mod")

# Getting occ.covs --------------------------------------------------------

depth = array(NA, dim=c(num_pts,1))
access_numPUB = array(NA, dim=c(num_pts,1))
access_numPRIV = array(NA, dim=c(num_pts,1))
fetch = array(NA, dim=c(num_pts,1))
dist2access = array(NA, dim=c(num_pts,1))
inf_year = array(NA, dim=c(num_pts, num_years))
access_type = array(NA, dim=c(num_pts,1))
lake = array(NA, dim=c(num_pts,1))

 for(p in 1:num_pts) {

   tmp.dat1 = ssw_data %>%
     filter(pt.id == p)

   depth[p] = median(tmp.dat1$depth_m)
   access_numPUB[p] = median(tmp.dat1$access.numPUB)
   access_numPRIV[p] = median(tmp.dat1$access.numPRIV)
   fetch[p] = median(tmp.dat1$max.fetch)
   dist2access[p] = median(tmp.dat1$min.dist)
   access_type[p] = first(tmp.dat1$type)
   lake[p] = as.numeric(as.factor(first(tmp.dat1$lake)))

   years2for = unique(tmp.dat1$survey_year)

   for(yr in 1:length(years2for)) {

    tmp.dat2 = tmp.dat1 %>%
       filter(survey_year == years2for[yr])

     if(nrow(tmp.dat2) > 1) {
       tmp.dat3 = tmp.dat2 %>%
       group_by(pt.id, survey_year) %>%
       summarize(inf_year = mean(inf.year),
                 .groups = "drop")
     } else {
        tmp.dat3 = tmp.dat2 %>%
           rename(inf_year = inf.year)
       }

     this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1

     inf_year[p, this_year] = tmp.dat3$inf_year

   }
   if(p %% 100 == 0) { print(p) }
 }

depthxfetch = scale(depth) * scale(fetch)

 for(row in 1:nrow(inf_year)) { 
   for(col in 1:ncol(inf_year)) {

     if(is.na(inf_year[row,col])) {

       first.val.spot = min(which(!is.na(inf_year[row,])))
       first.val = inf_year[row, first.val.spot]

      current.val = first.val - (first.val.spot - col)

       inf_year[row, col:first.val.spot] = current.val:first.val
     }
   }
 }

 access_type[access_type == "PRIVATE"] = 1
 access_type[access_type == "PUBLIC"] = 0
 access_type = as.numeric(access_type)

 occ.covs =  list(
   depth = depth,
   access_numPUB = access_numPUB,
   access_numPRIV = access_numPRIV,
   fetch = fetch,
   dist2access = dist2access,
   inf_year = inf_year,
   depthxfetch = depthxfetch,
   access_type = access_type,
   lake = as.numeric(lake)
   )

# saveRDS(occ.covs, "occ_covs")
# occ.covs = readRDS("occ_covs")

condensed_dat = ssw_data %>% 
  group_by(pt.id) %>% 
  summarize(X = mean(X),
            Y = mean(Y)) %>% 
  arrange(pt.id)

coords = matrix(NA, 
                nrow=nrow(condensed_dat),
                ncol=2)
coords[,1] = condensed_dat$X
coords[,2] = condensed_dat$Y

ssw.occ.formula = ~ scale(depth) + scale(fetch) + scale(dist2access) +
  access_type + scale(access_numPUB) + scale(access_numPRIV) + 
  depthxfetch + scale(inf_year) + (1|lake)
ssw.det.formula = ~ scale(plant_density) + scale(day_of_year) + scale(chara_density)

ssw.mod.dat = list(
  y = y,
  occ.covs = occ.covs,
  det.covs = det.covs,
  coords = coords
)

ssw.inits = list(
  alpha = c(0,0,0,0), 
  beta = c(0,0,0,0,0,0,0,0, 0), 
  z = apply(y, c(1, 2), function(a) { as.numeric(sum(a, na.rm = TRUE) > 0) }), 
  sigma.sq = 2, 
  phi = mean(c(3/(max.maxdist/2), 1/(max.mindist))),
  w = rep(0, nrow(y)),
  sigma.sq.psi = 2
)

ssw.priors = list(alpha.normal = list( 
                    mean = 0, var = 2.72
                  ),
                  beta.normal = list( 
                    mean = 0, var = 2.72
                  ),
                  phi.unif = c(3/(max.maxdist/2), 1/(max.mindist)),
                  sigma.sq.unif = c(0.001, 10),
                  sigma.sq.psi.ig = list(a = 0.1, b = 0.1)
                )

batch.length = 25 
n.batch = 10000 
n.thin = 50 
n.chains = 3 
n.neighbors = 15 

out.mod1 = stPGOcc(ssw.occ.formula, 
        ssw.det.formula,
        ssw.mod.dat,
        inits = ssw.inits,
        n.batch = n.batch,
        batch.length = batch.length,
        priors = ssw.priors,
        cov.model = "exponential",
        n.neighbors = n.neighbors,
        n.thin = n.thin,
        n.chains = n.chains,
        n.report = 100, 
        n.omp.threads = 4,
        ar1 = FALSE) 


## Analytics:
summary(out.mod1)

# beta.samples <- out.mod1$beta.samples
# saveRDS(beta.samples, file = 'beta-samplesEXP2.rds')
# 
# alpha.samples <- out.mod1$alpha.samples
# saveRDS(alpha.samples, file = 'alpha-samplesEXP2.rds')
# 
# z.samples <- out.mod1$z.samples
# saveRDS(z.samples, file = 'z-samplesEXP2.rds')
# 
# beta.star.samples <- out.mod1$beta.star.samples
# saveRDS(beta.star.samples, file = 'theta.samples-samplesEXP2.rds')
# 
# theta.samples <- out.mod1$theta.samples
# saveRDS(theta.samples, file = 'theta-samplesEXP2.rds')
# 
# sigma.sq.psi.samples <- out.mod1$sigma.sq.psi.samples
# saveRDS(sigma.sq.psi.samples, file = 'sigma.sq.psiEXP2.samples.rds')
# 
# #Posterior predictive checks
# ppc.out1 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 1)
# saveRDS(ppc.out1, "PPC_tukey_grp1.rds")
# 
# ppc.out2 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 2)
# saveRDS(ppc.out2, "PPC_tukey_grp2.rds")
# 
# ppc.out3 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 1)
# saveRDS(ppc.out3, "PPC_chi_grp1.rds")
# 
# ppc.out4 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 2)
# saveRDS(ppc.out4, "PPC_chi_grp2.rds")
# 
# summary(ppc.out1); summary(ppc.out2); summary(ppc.out3); summary(ppc.out4)