### ------------------------------------------------------------------- ### ### LOAD LIBRARIES AND DEFINE FUNCTIONS ### ### ------------------------------------------------------------------- ### library(glmnet) library(data.table) library(INLA) library(dplyr) ## -- Define function 'inla_tab' to summarize and write out model results inla_tab <- function(inla_model, out) { # coefficients in a dataframe inla_tab <- data.frame(inla_model$summary.fixed) # grab the coefficients colnames(inla_tab)<-colnames(inla_model$summary.fixed) # calculate relative effect sizes inla_tab$b_se = inla_tab[,1]/inla_tab[,2] inla_tab$abs_b_se = abs(inla_tab$b_se) # format the table summary=format(inla_tab, autoformat=1) # write as a csv file write.csv(summary, out) } ## -- Define function 'exp.decay' to perform exponential decay transformation exp.decay = function(d, alpha) { exp((-d)/alpha) } ## -- Define function 'run.inla' to run hierarchical model in INLA with default priors run.inla = function(form, data, link) { mod = inla(form, data=data, family="binomial", control.compute=list(waic=TRUE, cpo=TRUE), control.predictor=list(compute=TRUE, link=link)) return(mod) } ## -- Define function 'random.rows' to randomly subset rows from a data frame randomRows = function(df,m) {return(df[sample(nrow(df),m),])} ### ------------------------------------------------------------------- ### ### SET WORKING DIRECTORY AND LOAD DATA FILE ### ### ------------------------------------------------------------------- ### setwd("C:\\Users\\soneil\\Documents") # change me wdat = read.csv("data.csv") ## -- Make a copy of the data frame to store transformed variables wdat1 = wdat ## -- Exponential decay functions for distance variables wdat1$DDWC = exp.decay(wdat1$DDWC, mean(wdat1$DDWC[wdat1$Used==1])) wdat1$HWY = exp.decay(wdat1$HWY, mean(wdat1$HWY[wdat1$Used==1])) ## -- Standardize continuous variables around their mean & standard deviation wdat1[,7:23] = apply(wdat1[,7:23], 2, scale) wdat1$Year = as.factor(wdat1$Year) # -- Identify correlations between candidate predictors corMat = cor(wdat1[,7:23]) corMat = subset(melt(abs(corMat)), value > 0.6 & value < 1) corMat = corMat[order(corMat$value, decreasing=TRUE),] corMat ### ------------------------------------------------------------------- ### ### IDENTIFY THE MOST IMPORTANT PREDICTORS WITH LASSO (GLMNET) ### ### USE MONTE CARLO TO CHECK CONSISTENCY OF MODEL RESULTS ### ### ------------------------------------------------------------------- ### m1 = glm(Used ~ DDWC + STREAM + ELEV + SLOPE + ROUGH + EDGE + OPEN + WATER + HWY + ROAD + IMPERV + AG + PLAND + TRASP + SNOW + SNOW_LTA, data=wdat1, family=binomial) X = model.matrix(m1) Y = as.factor(wdat1$Used) n = 1000 m = 5000 mat = matrix(nrow=ncol(X), ncol=n) lams.min = lams.se = numeric(n) ## -- Run glmnet 1000 times, each time subsetting 5000 rows to operate on set.seed(0123) for (i in 1:n) { print(paste("refitting glmnet for subset #", paste(i))) dat_sub = randomRows(wdat1, m) Y = as.factor(dat_sub$Used) X = model.matrix(m1$formula, data=dat_sub) cv = cv.glmnet(x=X, y=Y, alpha=0.90, family="binomial") lams.min[i] = lam.min = cv$lambda.min lams.se[i] = lam.se = cv$lambda.1se fits = glmnet(x=X, y=Y, alpha=0.90, family="binomial") result = predict(fits, s=lam.se, type="coefficients") result = as.vector(result[,1]) result = result[2:(ncol(X)+1)] for (z in 1:length(result)) { mat[z,i]<-result[z] } } ## -- Summarize the output p<-numeric(ncol(X)) for (i in 1:ncol(X)) { m<-mat[i,]!=0 p[i]<-sum(m)/n } ## -- Coefficients with p > 0.5 are retained for further modeling probs = which(p>0.5) names(p) = colnames(X) p p[probs] ### ------------------------------------------------------------------- ### ### MODEL FORMULAE ### ### ------------------------------------------------------------------- ### ## -- 1) Hierarchical model with random pack coefficients wdat1$Pack5=wdat1$Pack4=wdat1$Pack3=wdat1$Pack2=wdat1$Pack1=wdat1$Pack wdat1$Pack9=wdat1$Pack8=wdat1$Pack7=wdat1$Pack6=wdat1$Pack link=rep(1, nrow(wdat1)) form.inla.1 = Used ~ # fixed effects DDWC + SNOW_LTA + STREAM + EDGE + ROAD + IMPERV + PLAND + ELEV + SLOPE + # random intercepts f(Year, model="iid") + f(Pack, model = "iid") + # random pack coefficients f(Pack1, DDWC, model="iid") + f(Pack2, IMPERV, model="iid") + f(Pack3, EDGE, model="iid") + f(Pack4, ELEV, model="iid") + f(Pack5, PLAND, model="iid") + f(Pack6, ROAD, model="iid") + f(Pack7, SLOPE, model="iid") + f(Pack8, STREAM, model="iid") + f(Pack9, SNOW_LTA, model="iid") ## -- 2) Hierarchical model with random pack coefficients + density-dependent interactions form.inla.2 = Used ~ # fixed effects (includes interactions with wolf density) (DDWC + SNOW_LTA + STREAM + EDGE + ROAD + IMPERV + PLAND + ELEV + SLOPE)*WDENS + # random intercepts f(Year, model="iid") + f(Pack, model = "iid") + # random pack coefficients f(Pack1, DDWC, model="iid") + f(Pack2, IMPERV, model="iid") + f(Pack3, EDGE, model="iid") + f(Pack4, ELEV, model="iid") + f(Pack5, PLAND, model="iid") + f(Pack6, ROAD, model="iid") + f(Pack7, SLOPE, model="iid") + f(Pack8, STREAM, model="iid") + f(Pack9, SNOW_LTA, model="iid") ### ------------------------------------------------------------------- ### ### EVALUATE THE MODELS USING 'INLA' ### ### WARNING: THIS STEP MAY TAKE SEVERAL HOURS TO COMPLETE ### ### ------------------------------------------------------------------- ### M1 = run.inla(form.inla.1, wdat1, link) inla_tab(M1, "result_M1.csv") M2 = run.inla(form.inla.2, wdat1, link) inla_tab(M2, "result_M2.csv") ### ------------------------------------------------------------------- ### ### MODEL RESULTS & DIAGNOSTICS ### ### ------------------------------------------------------------------- ### library(pROC) library(sp) library(ncf) library(foreign) lcpo<-function(n,cpo) {(-1/n)*sum(log(cpo))} #-- Diagnostics (No density dependent interactions) n = nrow(wdat1) fitted = M1$summary.fitted.values$mean summary(fitted) plot(wdat1$Used ~ fitted); cor(wdat1$Used, fitted, use="complete.obs") cpo<-M1$cpo$cpo lcpo(nrow(wdat1),cpo) # LCPO M1$waic$waic roccurve = roc(wdat1$Used ~ fitted); roccurve # Diagnostics (Density-dependent interactions) n = nrow(wdat1) fitted = M2$summary.fitted.values$mean summary(fitted) plot(wdat1$Used ~ fitted); cor(wdat1$Used, fitted, use="complete.obs") cpo<-M2$cpo$cpo lcpo(nrow(wdat1),cpo) # LCPO M2$waic$waic roccurve = roc(wdat1$Used ~ fitted); roccurve