# Version history: 
#
# v17: Removed intercept for all models.
# SL is included in the model
rm(list = ls())

setwd("simulations/")

library(TwoStepCLogit)
library(mclogit)
library(survival)
library(INLA)
library(stringr)
library(glue)
library(scales)
library(geoR)
library(raster)
library(mvtnorm)
library(tidyverse)
library(lubridate)
library(parallel)
library(amt)  
library(glmmTMB)
system("rm -r data")

n_runs <- 500
n_cores <- 8


simulate_ssf <- function(n_steps, n_ch, l = 1, coef, xy0, resc) {
  sl <- rexp(n_steps * n_ch, rate = l)
  ta <- runif(n_steps * n_ch, -pi, pi)
  steps <- rep(1:n_steps, each = n_ch)
  x_0 <- xy0[1]
  y_0 <- xy0[2]
  x_s <- sl * sin(ta)
  y_s <- sl * cos(ta)
  x <- rep(NA, n_steps)
  y <- rep(NA, n_steps)
  x[1] <- x_0
  y[1] <- y_0
  # multiply resources with selection coef
  for (i in 1:length(coef)) resc[[i]] <- resc[[i]] * coef[i]
  for (i in 2:n_steps) {
    x_pos <- x[i - 1] + sl[steps == i] * sin(ta[steps == i])
    y_pos <- y[i - 1] + sl[steps == i] * cos(ta[steps == i])
    p <- exp(rowSums(raster::extract(resc, cbind(x_pos, y_pos)))) 
    w <- sample(n_ch, 1, prob = p)
    x[i] <- x_pos[w]
    y[i] <- y_pos[w]
  }
  data_frame(x = x, y = y)
}



random_steps_static <- function(x, n_control, rate) {
  # Generate random points
  ns <- nrow(x)  # number of steps
  case_for_control <- rep(1:ns, each = n_control)
  
  slr <-  rexp(ns * n_control, rate = rate)
  tar <- x[case_for_control, ]$ta_ + stats::runif(ns * n_control, -pi, pi)  # turning angles for new stps
  
  # control points
  xy_cc <- x[case_for_control, ]
  xy_cc["x2_"] <- xy_cc$x1_ + slr * cos(tar)
  xy_cc["y2_"] <- xy_cc$y1_ + slr * sin(tar)
  
  xy_cc$case_ <- FALSE
  xy_cc$step_id_ <- rep(1:ns, each = n_control)
  xy_cc$sl_ <- slr
  xy_cc$ta_ <- tar
  
  x$case_ <- TRUE
  x$step_id_ <- 1:ns
  
  suppressWarnings(out <- bind_rows(x, xy_cc))
  
  class(out) <- c("random_steps", class(out))
  out
}

# Generate environmental covariates

set.seed(1222411)
dir.create("data/")



mclapply(1:n_runs, function(i) {
  
  dem <- raster(grf(ncell(r), grid="reg", nx=400, ny=400, xlims=c(0,400), ylims=c(0,400), cov.pars=c(0.1, 50)))
  h <- raster(grf(ncell(r), grid="reg", nx=400, ny=400, xlims=c(0,400), ylims=c(0,400), cov.pars=c(0.1, 50)))
  
  # Standardize covariates
  dem[] <- (dem[] - mean(dem[])) / sd(dem[])
  h[] <- (h[] - mean(h[])) / sd(h[]) 
  
  # Simulation setup
  resc <- stack(dem, h)
  names(resc) <- c("dem", "hab")
  
  # Selection coefficients
  S <- cbind(c(10, 0), c(0, 5))
  omgs <- rmvnorm(20, mean = c(-4, 4), sigma = S)
  
  stps <- map(1:20, function(i) 
#    simulate_ssf(round(runif(1, 25, 200)), 200, 2, 1, omgs[i, ], c(200, 200), resc)
    simulate_ssf(200, 200, 1, omgs[i, ], c(200, 200), resc)
  )
  
  # Generate random points and steps
  animals <- data_frame(
    id = paste0("a", 1:20), 
    track = map(stps, ~ track(x = .$x, y = .$y, t = ymd_hm("2017-01-01 00:00") + hours(1:nrow(.))))
  )
  
  # Random steps
  animals <- animals %>% 
    mutate(rnd_stps = map(track, ~ steps(.) %>%  
                            random_steps_static(n_control = 9, rate = 1 / (2 * mean(.$sl_, na.rm = TRUE))) %>% 
                            extract_covariates(resc)))
  animals %>% select(id, rnd_stps) %>% unnest() %>% write_rds(sprintf("data/simulations_random_steps_%s.rds", i))
  return(NULL)
}, mc.cores = n_cores)

fls <- sample(list.files("data", pattern ="^s.*rds$", full.names = TRUE))

mclapply(fls, function(fl) {
  id <- str_extract(fl, "[0-9]{1,4}")
  
  if (!file.exists(glue("data/fix_res_{id}.rds"))) {
    print("-----------------")
    print(fl)
    print("-----------------")
    dd.steps <- readRDS(fl)
    res <- list()
    
    ### Rearrange a little
    dd.steps <- dd.steps[,c("id", "step_id_","case_", "dem","hab", "sl_")]
    dd.steps <- dd.steps[!is.na(dd.steps$dem), ]
    names(dd.steps) <- c("id", "step","y", "dem","hab", "sl")
    dd.steps$id0 <- dd.steps$id1 <- dd.steps$id2 <- dd.steps$id <- as.numeric(as.factor(dd.steps$id))
    dd.steps$weights <- ifelse(dd.steps$y == 1, 1, 1000)
    
    dd.steps$y <- as.numeric(dd.steps$y)
    dd.steps <- dd.steps[!is.na(dd.steps$dem) ,]
    dd.steps$stratumID <- paste(dd.steps$id, dd.steps$step, sep = "_")
    
    dd.steps$dem <- scale(dd.steps$dem,scale=FALSE)
    dd.steps$hab <- scale(dd.steps$hab,scale=FALSE)
    
    ###################################################################
    ### 1) clogit
    ###################################################################
    
    tclog <- system.time(
      clog <- clogit(y ~ dem + hab + sl + strata(stratumID), 
                     data=dd.steps, method = "efron"))
    res[[1]] <- data.frame(
      run_time = as.vector(tclog[3]),
      rep = id,
      method = "mle", 
      name = "clogit", 
      weights = FALSE,
      summary = "mle",
      terms = c("dem", "hab", "sl"), 
      estimates = as.vector(coef(clog))
    )
    
    ###################################################################
    ### 2) glmmTMB
    ###################################################################
    
    ## glmmTMB: estimate stratum var
    run_time <- system.time({
      m1 <- glmmTMB(y ~ -1 + dem + hab + sl + (1|stratumID) + (0 + dem|id) + (0 + hab|id),
                    family=poisson, 
                    data=dd.steps)})
    
    res[[2]] <- data.frame(
      run_time = as.vector(run_time[3]),
      rep = id,
      method = "glmmTMB", 
      name = "glmmTMB_0",
      weights = FALSE,
      summary = "mean", 
      terms = c("dem", "hab", "sl", "var_dem", "var_hab"), 
      estimates = c(summary(m1)$coefficients$cond[, 1], sapply(glmmTMB::VarCorr(m1)[[1]], "[[", 1)[-1])
    )
    
    
    ## glmmTMB: fixed var and unweighted
    TMBStruc = glmmTMB(y ~ -1 + dem + hab + sl + (1|stratumID) + (0 + dem|id) + (0 + hab|id),
                       family=poisson, 
                       data=dd.steps, 
                       doFit=FALSE) 
    
    # set the value of the log sd (not variance) of the RE
    TMBStruc$parameters$theta[1] = log(1e6) 
    
    # tell TMB not to change theta1, all other values are freely estimated (and are different from each other)
    # this number is 6 here, because we have 3 variables, thus a covariance matrix with 3*4/2=6 parameters is to be estimated
    TMBStruc$mapArg = list(theta=factor(c(NA, 1:2)))
    run_time <- system.time(m1 <- glmmTMB:::fitTMB(TMBStruc))
    
    res[[3]] <- data.frame(
      run_time = as.vector(run_time[3]),
      rep = id,
      method = "glmmTMB", 
      name = "glmmTMB_1",
      weights = FALSE,
      summary = "mean", 
      terms = c("dem", "hab", "sl", "var_dem", "var_hab"), 
      estimates = c(summary(m1)$coefficients$cond[, 1], sapply(glmmTMB::VarCorr(m1)[[1]], "[[", 1)[-1])
    )
    
    
    ###################################################################
    ### 3) INLA
    ###################################################################
     inla.setOption(enable.inla.argument.weights=TRUE)
     
     formula = y ~ -1 + dem + hab + sl +
       f(stratumID, model="iid",hyper=list(theta=list(initial=log(1e-6),fixed=TRUE))) + 
       f(id0, dem, values = 1:20, model="iid", 
         hyper = list(theta = list(initial = log(0.5), fixed = FALSE, prior = "pc.prec", param = c(10, 0.05)))) +
       f(id1, hab, values= 1:20, model="iid", 
         hyper = list(theta = list(initial = log(0.5), fixed = FALSE, prior = "pc.prec", param = c(5, 0.05)))) 
     
     run_time <- system.time(
       inla_res <- inla(formula, family = "Poisson", data = dd.steps,  
                        control.inla = list(correct = TRUE, correct.factor = 1), 
                        verbose = FALSE, num.threads = 2)) 
     
     # summary of the random effects (actually the precisions, which are 1/variances)
     var_mode <- c(
       inla.mmarginal(inla.tmarginal(function(x) 1/x, inla_res$marginals.hyperpar$"Precision for id0")),
       inla.mmarginal(inla.tmarginal(function(x) 1/x, inla_res$marginals.hyperpar$"Precision for id1")))
     
     res[[4]] <- data.frame(
       run_time = as.vector(run_time[3]),
       rep = id,
       method = "inla", 
       name = "INLA",
       weights = FALSE,
       summary = "mode",
       terms = c("dem", "hab", "sl", "var_dem", "var_hab"), 
       estimates = c(inla_res$summary.fixed$mode, var_mode)
     )
     
    
    ###################################################################
    ### 4) TwoStep
    ###################################################################
    
    run_time <- system.time(ts <-  Ts.estim(formula = y ~ dem + hab + sl + 
                                              strata(stratumID) +
                                              cluster(id), 
                                            data = dd.steps, random = ~ dem + hab ,
                                            all.m.1=TRUE, D="UN(1)"))
    res[[5]] <- data.frame(
      run_time = as.vector(run_time[3]),
      method = "mle", 
      name = "Two step 1",
      weights = FALSE,
      summary = "mle",
      terms = c("dem", "hab", "sl", "var_dem", "var_hab"), 
      estimates = as.vector(c(ts$beta, diag(ts$D)[1:2]))
    )
    saveRDS(bind_rows(res), glue("data/fix_res_{id}.rds"))
  }
}, mc.cores = n_cores)


library(tidyverse)
fls <- list.files("data", pattern ="^fix_res", full.names = TRUE)
res <- lapply(fls, read_rds)
res <- do.call(rbind, res)
write_rds(res, "results_balanced17.rds")


# local -------------------------------------------------------------------
library(tidyverse)
library(glue)
a <- read_rds("simulations/data/results_balanced17.rds")
n <- sum(a$name == "clogit") / 3 

tc <- data_frame(terms = c("dem", "hab", "var_dem", "var_hab"),  val = c(-4, 4, 10, 5))

p1 <- filter(a, !terms %in% c("int", "sl")) %>% 
  ggplot(., aes(name, estimates)) + geom_boxplot() +
  facet_wrap(~ terms, scale = "free", drop = FALSE) +
  geom_hline(aes(yintercept = val), data = tc) + labs(title = glue("Sampling distribution (n = {n})")) +
  stat_summary(fun.y = mean, geom = "point", col = "red")

p1
ggsave("tmp/plot1.png")