#In this file, we run variation 1 on the occupancy model, as described in the manuscript. Except where noted, all else is as in the spOcc.R file, so annotations have been eliminated for brevity. 

library(spOccupancy)
library(coda)
library(stars)
library(ggplot2)
library(tidyverse)
library(rjags) 
library(runjags) 
library(sf)
library(Matrix)

ssw_data = readRDS("ssw_data_final")

#In this Deviation, we remove the 5 lakes with no documented detections of starry stonewort across any survey to see if this causes any notable changes in our parameter estimates.
ssw_data = ssw_data %>% filter(!lake %in% c("Carnelian", "Grand", "LowerNemahbin", "Pleasant", "Sylvia"))

# This cuts out some lakes and pts, so we need to renumber all the pt ids here.

max.pt = length(unique(ssw_data$pt.id)) ##The highest value we should have afterwards.

ssw_data$pt.id2 = NA ## Temp column for these new ids.
for(i in max(ssw_data$pt.id):1) { #Go backwards from the highest point
  
  if(length(which(ssw_data$pt.id == i)) > 0) { #Assuming something of this value still exists...
  ssw_data$pt.id2[ssw_data$pt.id == i] = max.pt ##Make this equal to whatever the max.pt value is at this moment.
  max.pt = max.pt-1 #Reduce the max point value by 1.
  }
}
ssw_data$pt.id = ssw_data$pt.id2 #Overwrite the pt id data with the new ones. 

#Relative to those in the "main" model, these min/max distance values are probably somewhat different as a result of losing several lakes. However, the ways in which they are calculated are the same. 
coords = st_coordinates(ssw_data)
ssw_data$X=coords[,1] 
ssw_data$Y=coords[,2]

min.dist = vector()
max.dist = vector()

for(l in 1:length(unique(ssw_data$lake))) {

  tmp1 = filter(ssw_data, ssw_data$lake == unique(ssw_data$lake)[l]) %>% distinct(geometry)
  
  tmp.dists = dist(st_coordinates(tmp1))
  
  min.dist = c(min(tmp.dists, na.rm = T), min.dist)
  
  max.dist = c(max(tmp.dists, na.rm=T), max.dist)
}

max.mindist = max(min.dist)
max.maxdist = max(max.dist)

ssw_data = ssw_data %>% 
  st_drop_geometry() %>% 
  dplyr::arrange(pt.id, survey_year, survey_dateStart)

ssw_data = ssw_data %>% mutate(depth_m = depth_ft / 3.281)



# Getting y and det.covs ---------------------------------------------------------

num_pts = max(ssw_data$pt.id)
num_years = max(ssw_data$survey_year - min(ssw_data$survey_year) + 1)
num_reps = ssw_data %>% 
  group_by(pt.id, survey_year) %>% 
  summarize(count = n())
num_reps = max(num_reps$count)

y = array(NA, dim=c(num_pts, num_years, num_reps))
plant_density = array(NA, dim=c(num_pts, num_years, num_reps))
chara_density = array(NA, dim=c(num_pts, num_years, num_reps))
day_of_year = array(NA, dim=c(num_pts, num_years, num_reps))

for(p in 1:num_pts) {

  tmp.dat1 = ssw_data %>%
    filter(pt.id == p)

  years2for = unique(tmp.dat1$survey_year)

  for(yr in 1:length(years2for)) {

    tmp.dat2 = tmp.dat1 %>%
      filter(survey_year == years2for[yr])

    surveys2for = unique(tmp.dat2$survey_dateStart)

    for(s in 1:length(surveys2for)) {

      tmp.dat3 = tmp.dat2 %>%
        filter(survey_dateStart == surveys2for[s])

      this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1

      y[p, this_year, s] = tmp.dat3$SSW
      plant_density[p, this_year, s] = tmp.dat3$Plant.Density
      chara_density[p, this_year, s] = tmp.dat3$Characeae
      day_of_year[p, this_year, s] = tmp.dat3$days0

    }
  }
  if(p %% 100 == 0) { print(p) }
}

det.covs = list(
  plant_density = plant_density,
  chara_density = chara_density,
  day_of_year = day_of_year
)

# saveRDS(det.covs, "det_covs")
# det.covs = readRDS("det_covs")

# saveRDS(y, "y_mod")
# y = readRDS("y_mod")

# Getting occ.covs --------------------------------------------------------

depth = array(NA, dim=c(num_pts,1))
access_numPUB = array(NA, dim=c(num_pts,1))
access_numPRIV = array(NA, dim=c(num_pts,1))
fetch = array(NA, dim=c(num_pts,1))
dist2access = array(NA, dim=c(num_pts,1))
inf_year = array(NA, dim=c(num_pts, num_years))
access_type = array(NA, dim=c(num_pts,1))
lake = array(NA, dim=c(num_pts,1))

 for(p in 1:num_pts) {

   tmp.dat1 = ssw_data %>%
     filter(pt.id == p)

   depth[p] = median(tmp.dat1$depth_m)
   access_numPUB[p] = median(tmp.dat1$access.numPUB)
   access_numPRIV[p] = median(tmp.dat1$access.numPRIV)
   fetch[p] = median(tmp.dat1$max.fetch)
   dist2access[p] = median(tmp.dat1$min.dist)
   access_type[p] = first(tmp.dat1$type)
   lake[p] = as.numeric(as.factor(first(tmp.dat1$lake)))

   years2for = unique(tmp.dat1$survey_year)

   for(yr in 1:length(years2for)) {

    tmp.dat2 = tmp.dat1 %>%
       filter(survey_year == years2for[yr])

     if(nrow(tmp.dat2) > 1) {
       tmp.dat3 = tmp.dat2 %>%
       group_by(pt.id, survey_year) %>%
       summarize(inf_year = mean(inf.year),
                 .groups = "drop")
     } else {
        tmp.dat3 = tmp.dat2 %>%
           rename(inf_year = inf.year)
       }

     this_year = tmp.dat3$survey_year - min(ssw_data$survey_year) + 1

     inf_year[p, this_year] = tmp.dat3$inf_year

   }
   if(p %% 100 == 0) { print(p) }
 }

depthxfetch = scale(depth) * scale(fetch)

 for(row in 1:nrow(inf_year)) {
   for(col in 1:ncol(inf_year)) {

     if(is.na(inf_year[row,col])) {

       first.val.spot = min(which(!is.na(inf_year[row,])))
       first.val = inf_year[row, first.val.spot]

      current.val = first.val - (first.val.spot - col)

       inf_year[row, col:first.val.spot] = current.val:first.val
     }
   }
 }

 access_type[access_type == "PRIVATE"] = 1
 access_type[access_type == "PUBLIC"] = 0
 access_type = as.numeric(access_type)

 occ.covs =  list(
   depth = depth,
   access_numPUB = access_numPUB,
   access_numPRIV = access_numPRIV,
   fetch = fetch,
   dist2access = dist2access,
   inf_year = inf_year,
   depthxfetch = depthxfetch,
   access_type = access_type,
   lake = as.numeric(lake)
   )

# saveRDS(occ.covs, "occ_covs")
# occ.covs = readRDS("occ_covs")

condensed_dat = ssw_data %>% 
  group_by(pt.id) %>% 
  summarize(X = mean(X),
            Y = mean(Y)) %>% 
  arrange(pt.id)

coords = matrix(NA, 
                nrow=nrow(condensed_dat),
                ncol=2)
coords[,1] = condensed_dat$X
coords[,2] = condensed_dat$Y

ssw.occ.formula = ~ scale(depth) + scale(fetch) + scale(dist2access) +
  access_type + scale(access_numPUB) + scale(access_numPRIV) + 
  depthxfetch + scale(inf_year) + (1|lake)
ssw.det.formula = ~ scale(plant_density) + scale(day_of_year) + scale(chara_density)

ssw.mod.dat = list(
  y = y,
  occ.covs = occ.covs,
  det.covs = det.covs,
  coords = coords
)
 
ssw.inits = list(
  alpha = c(0,0,0,0),
  beta = c(0,0,0,0,0,0,0,0, 0), 
  z = apply(y, c(1, 2), function(a) { as.numeric(sum(a, na.rm = TRUE) > 0) }),
  sigma.sq = 2,
  phi = mean(c(3/(max.maxdist/2), 1/(max.mindist))),
  w = rep(0, nrow(y)),
  sigma.sq.psi = 2
)

ssw.priors = list(alpha.normal = list( 
                    mean = 0, var = 2.72
                  ),
                  beta.normal = list(
                    mean = 0, var = 2.72
                  ),
                  phi.unif = c(3/(max.maxdist/2), 1/(max.mindist)), 
                  sigma.sq.unif = c(0.001, 10),
                  sigma.sq.psi.ig = list(a = 0.1, b = 0.1)
                )

batch.length = 25 
n.batch = 10000 
n.thin = 50 
n.chains = 3 
n.neighbors = 15

out.mod1 = stPGOcc(ssw.occ.formula, 
        ssw.det.formula,
        ssw.mod.dat,
        inits = ssw.inits,
        n.batch = n.batch,
        batch.length = batch.length,
        priors = ssw.priors,
        cov.model = "exponential",
        n.neighbors = n.neighbors,
        n.thin = n.thin,
        n.chains = n.chains,
        n.report = 100, 
        n.omp.threads = 4,
        ar1 = FALSE) 

## Analytics:
# summary(out.mod1)
# 
# beta.samples <- out.mod1$beta.samples
# saveRDS(beta.samples, file = 'beta-samplesEXP1.rds')
# 
# alpha.samples <- out.mod1$alpha.samples
# saveRDS(alpha.samples, file = 'alpha-samplesEXP1.rds')
# 
# z.samples <- out.mod1$z.samples
# saveRDS(z.samples, file = 'z-samplesEXP1.rds')
# 
# beta.star.samples <- out.mod1$beta.star.samples
# saveRDS(beta.star.samples, file = 'theta.samples-samplesEXP1.rds')
# 
# theta.samples <- out.mod1$theta.samples
# saveRDS(theta.samples, file = 'theta-samplesEXP1.rds')
# 
# sigma.sq.psi.samples <- out.mod1$sigma.sq.psi.samples
# saveRDS(sigma.sq.psi.samples, file = 'sigma.sq.psiEXP1.samples.rds')

# #Posterior predictive checks
# ppc.out1 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 1)
# saveRDS(ppc.out1, "PPC_tukey_grp1.rds")
# 
# ppc.out2 <- ppcOcc(out.mod1, fit.stat = 'freeman-tukey', group = 2)
# saveRDS(ppc.out2, "PPC_tukey_grp2.rds")
# 
# ppc.out3 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 1)
# saveRDS(ppc.out3, "PPC_chi_grp1.rds")
# 
# ppc.out4 <- ppcOcc(out.mod1, fit.stat = 'chi-squared', group = 2)
# saveRDS(ppc.out4, "PPC_chi_grp2.rds")
# 
# summary(ppc.out1); summary(ppc.out2); summary(ppc.out3); summary(ppc.out4)