### ------------------------------------------------------------------- ###
###              LOAD LIBRARIES AND DEFINE FUNCTIONS                    ###
### ------------------------------------------------------------------- ###
library(glmnet)
library(data.table)
library(INLA)
library(dplyr)

## -- Define function 'inla_tab' to summarize and write out model results
inla_tab <- function(inla_model, out) {
  # coefficients in a dataframe
  inla_tab <- data.frame(inla_model$summary.fixed)
  
  # grab the coefficients
  colnames(inla_tab)<-colnames(inla_model$summary.fixed)
  
  # calculate relative effect sizes
  inla_tab$b_se = inla_tab[,1]/inla_tab[,2]
  inla_tab$abs_b_se = abs(inla_tab$b_se)
  
  # format the table
  summary=format(inla_tab, autoformat=1)
  
  # write as a csv file
  write.csv(summary, out)
}

## -- Define function 'exp.decay' to perform exponential decay transformation
exp.decay = function(d, alpha) {
  exp((-d)/alpha)
}

## -- Define function 'run.inla' to run hierarchical model in INLA with default priors
run.inla = function(form, data, link) {
  mod = inla(form, data=data, family="binomial", control.compute=list(waic=TRUE, cpo=TRUE),
             control.predictor=list(compute=TRUE, link=link))
  return(mod)
}

## -- Define function 'random.rows' to randomly subset rows from a data frame
randomRows = function(df,m) {return(df[sample(nrow(df),m),])}

### ------------------------------------------------------------------- ###
###             SET WORKING DIRECTORY AND LOAD DATA FILE                ###
### ------------------------------------------------------------------- ###

setwd("C:\\Users\\soneil\\Documents") # change me
wdat = read.csv("data.csv")
## -- Make a copy of the data frame to store transformed variables
wdat1 = wdat
## -- Exponential decay functions for distance variables
wdat1$DDWC = exp.decay(wdat1$DDWC, mean(wdat1$DDWC[wdat1$Used==1]))
wdat1$HWY = exp.decay(wdat1$HWY, mean(wdat1$HWY[wdat1$Used==1]))
## -- Standardize continuous variables around their mean & standard deviation
wdat1[,7:23] = apply(wdat1[,7:23], 2, scale)
wdat1$Year = as.factor(wdat1$Year)
# -- Identify correlations between candidate predictors
corMat = cor(wdat1[,7:23])
corMat = subset(melt(abs(corMat)), value > 0.6 & value < 1)
corMat = corMat[order(corMat$value, decreasing=TRUE),]
corMat

### ------------------------------------------------------------------- ###
###      IDENTIFY THE MOST IMPORTANT PREDICTORS WITH LASSO (GLMNET)     ###
###       USE MONTE CARLO TO CHECK CONSISTENCY OF MODEL RESULTS         ###
### ------------------------------------------------------------------- ###

m1 = glm(Used ~ DDWC + STREAM + ELEV + SLOPE + ROUGH +
           EDGE + OPEN + WATER + HWY + ROAD + IMPERV + 
           AG + PLAND + TRASP + SNOW + SNOW_LTA, 
         data=wdat1, family=binomial)
X = model.matrix(m1)
Y = as.factor(wdat1$Used)
n = 1000
m = 5000
mat = matrix(nrow=ncol(X), ncol=n)
lams.min = lams.se = numeric(n)

## -- Run glmnet 1000 times, each time subsetting 5000 rows to operate on
set.seed(0123)
for (i in 1:n) {
  print(paste("refitting glmnet for subset #", paste(i)))
  dat_sub = randomRows(wdat1, m) 
  Y = as.factor(dat_sub$Used)    
  X = model.matrix(m1$formula, data=dat_sub)
  cv = cv.glmnet(x=X, y=Y, alpha=0.90, family="binomial")
  lams.min[i] = lam.min = cv$lambda.min
  lams.se[i] = lam.se = cv$lambda.1se
  fits = glmnet(x=X, y=Y, alpha=0.90, family="binomial")
  result = predict(fits, s=lam.se, type="coefficients")
  result = as.vector(result[,1])
  result = result[2:(ncol(X)+1)]
  for (z in 1:length(result)) { 
    mat[z,i]<-result[z]
  }
}

## -- Summarize the output
p<-numeric(ncol(X))
for (i in 1:ncol(X)) {
  m<-mat[i,]!=0
  p[i]<-sum(m)/n
}

## -- Coefficients with p > 0.5 are retained for further modeling
probs = which(p>0.5)
names(p) = colnames(X)
p
p[probs]

### ------------------------------------------------------------------- ###
###                      MODEL FORMULAE                                 ###
### ------------------------------------------------------------------- ###

## -- 1) Hierarchical model with random pack coefficients
wdat1$Pack5=wdat1$Pack4=wdat1$Pack3=wdat1$Pack2=wdat1$Pack1=wdat1$Pack
wdat1$Pack9=wdat1$Pack8=wdat1$Pack7=wdat1$Pack6=wdat1$Pack
link=rep(1, nrow(wdat1))
form.inla.1 = Used ~ 
  # fixed effects
  DDWC + SNOW_LTA + STREAM + EDGE + ROAD + IMPERV + PLAND + ELEV + SLOPE + 
  # random intercepts
  f(Year, model="iid") + f(Pack, model = "iid") + 
  # random pack coefficients
  f(Pack1, DDWC, model="iid") +
  f(Pack2, IMPERV, model="iid") + 
  f(Pack3, EDGE, model="iid") + 
  f(Pack4, ELEV, model="iid") + 
  f(Pack5, PLAND, model="iid") + 
  f(Pack6, ROAD, model="iid") + 
  f(Pack7, SLOPE, model="iid") + 
  f(Pack8, STREAM, model="iid") + 
  f(Pack9, SNOW_LTA, model="iid")

## -- 2) Hierarchical model with random pack coefficients + density-dependent interactions
form.inla.2 = Used ~ 
  # fixed effects (includes interactions with wolf density)
  (DDWC + SNOW_LTA + STREAM + EDGE + ROAD + IMPERV + PLAND + ELEV + SLOPE)*WDENS + 
  # random intercepts
  f(Year, model="iid") + f(Pack, model = "iid") + 
  # random pack coefficients
  f(Pack1, DDWC, model="iid") +
  f(Pack2, IMPERV, model="iid") + 
  f(Pack3, EDGE, model="iid") + 
  f(Pack4, ELEV, model="iid") + 
  f(Pack5, PLAND, model="iid") + 
  f(Pack6, ROAD, model="iid") + 
  f(Pack7, SLOPE, model="iid") + 
  f(Pack8, STREAM, model="iid") + 
  f(Pack9, SNOW_LTA, model="iid")

### ------------------------------------------------------------------- ###
###                EVALUATE THE MODELS USING 'INLA'                     ###
###         WARNING: THIS STEP MAY TAKE SEVERAL HOURS TO COMPLETE       ###                                                     
### ------------------------------------------------------------------- ###

M1 = run.inla(form.inla.1, wdat1, link)
inla_tab(M1, "result_M1.csv")

M2 = run.inla(form.inla.2, wdat1, link)
inla_tab(M2, "result_M2.csv")

### ------------------------------------------------------------------- ###
###                MODEL RESULTS & DIAGNOSTICS                          ###                                                   
### ------------------------------------------------------------------- ###

library(pROC)
library(sp)
library(ncf)
library(foreign)

lcpo<-function(n,cpo) {(-1/n)*sum(log(cpo))}

#-- Diagnostics (No density dependent interactions)
n = nrow(wdat1)
fitted = M1$summary.fitted.values$mean
summary(fitted)
plot(wdat1$Used ~ fitted); cor(wdat1$Used, fitted, use="complete.obs")
cpo<-M1$cpo$cpo
lcpo(nrow(wdat1),cpo) # LCPO
M1$waic$waic
roccurve = roc(wdat1$Used ~ fitted); roccurve

# Diagnostics (Density-dependent interactions)
n = nrow(wdat1)
fitted = M2$summary.fitted.values$mean
summary(fitted)
plot(wdat1$Used ~ fitted); cor(wdat1$Used, fitted, use="complete.obs")
cpo<-M2$cpo$cpo
lcpo(nrow(wdat1),cpo) # LCPO
M2$waic$waic
roccurve = roc(wdat1$Used ~ fitted); roccurve