This document provides R code to extend the BLR model to allow for random logistic intercepts (see Appendix S2). This hierarchical model is fit to the three years of data in DNR_Data.csv. R session info and package versions can be found at the end of this document.

library(R2jags)
library(R2WinBUGS)
library(mcmcplots)
library(ggplot2)
library(KernSmooth)
library(coda)
library(gridExtra)
library(devtools)
install_github(repo='johnbaums/jagstools')
library(jagstools)
library(loo)

Read in MDNR data:

State.dat <- read.csv("DNR_Data.csv",header=T) # DNR data

# Resave variables
lake <- as.factor(State.dat$lakeID)
Year <- as.factor(State.dat$Year) # 2009, 2010, 2011
TPug <- State.dat$TPug  # ug/L
lnTPug <- log(TPug)
Chla <- State.dat$Chla  # ug/L
lnChla <- log(Chla)
SAVms <- State.dat$SAVms      # avg SAV (kg) in each lake
lnSAVms <- log(SAVms+.02)

Rename variables

lnP <- lnTPug
ind.rm <- which(is.na(lnP)) # which observations missing
lnP <- lnP[-ind.rm]
P <- TPug; P <- P[-ind.rm]
lnC <- lnChla; lnC <- lnC[-ind.rm]
Chl <- Chla; Chl <- Chl[-ind.rm]
SAV <- SAVms; SAV <- SAV[-ind.rm]
lnSAV <- lnSAVms; lnSAV <- lnSAV[-ind.rm]
lakes <- lake; lakes <- lakes[-ind.rm]

SAV.mean <- mean(SAV)
SAVc <- SAV-SAV.mean # centered SAV

Visualize data

pl1 <- ggplot(NULL, aes(x=P, y=Chl)) + geom_point(aes(size=SAV)) + scale_size_continuous(range=c(2,5))
pl2 <- ggplot(NULL, aes(x=lnP, y=lnC)) + geom_point( aes(size=SAV))+ scale_size_continuous(range=c(2,5))
grid.arrange(pl1, pl2, ncol=2)

JAGS code for Bayesian latent variable regression with random logistic intercepts:

latent_reg <-function(){
  
  
  # Priors for logistic regression relating SAV to P(turbid)

  # Priors for the random logistic intercepts
  for (i in 1:n.lakes){     
    alpha[i] ~ dnorm(mu.alpha, tau.alpha) #  random intercept for logistic regression 
  }

  # Hyperpriors for logistic intercepts
  mu.alpha ~ dnorm(0,.01) #  mean intercept for logistic regression 
  sigma.alpha ~ dunif(0, 10)  # standard deviation of logistic random intercept
  tau.alpha <- 1 / (sigma.alpha * sigma.alpha)
 
  beta <- -1*negbeta  # slope for logistic regression
  negbeta ~ dlnorm( .5, 1/1^2 ) # prior for -1*beta

  
  # Priors for TP/Chla regression
  a0 ~  dnorm(0,0.01) # clear TP/Chla intercept
  a1adj ~  dnorm(0,0.1)  # difference between clear and turbid intercepts
  a1 <- a1adj+a0 # turbid TP/Chla intercept

  b0 ~ dunif(0,10)  # clear TP/Chla slope
  b1 ~ dunif(0,10)  # turbid TP/Chla slope
  b1adj <- b1-b0 # difference between clear and turbid slopes

  sigma1 ~ dunif(0, 20)  # Residual standard deviation of clear lakes
  sigma2 ~ dunif(0, 20)  # Residual standard deviation of turbid lakes
  tau1 <- 1 / ( sigma1 * sigma1)
  tau2 <- 1 / ( sigma2 * sigma2)
  
  b2 <- (a1+b1*logp1-a0-b0*logp2) /(logp1-logp2) # slope of unstable line
  constraint <- step(b2)  

  # Priors for TP thresholds
  logp1 ~ dunif(0,6.5)   # left bifurcation point (tip down) on log scale
  logp2 ~ dunif(0,6.5)   # right bifurcation point (tip up) on log scale
  p1 <- exp(logp1)
  p2 <- exp(logp2)
  
  
  # Likelihood
  for(i in 1:nsamp){
    lnC[i] ~ dnorm(mu[i], tau1*(1-S[i]) + tau2*S[i])
    mu[i] <- a0+a1adj*S[i] + b0*lnP[i]*(1-S[i])+b1*lnP[i]*S[i] # state-dependent linear relationship between log(TP) and log(Chla)
    
    S[i] ~ dbern(p[i]) # latent state (clear/turbid) for observation i
    logit(ptmp[i])<-alpha[Lake.num[i]]+beta*SAVc[i]  # logistic regression relating SAV to the probability observation i is turbid with random intercept for each lake       
    p[i] <- ptmp[i]*step(lnP[i]-logp1)*step(logp2-lnP[i])+step(lnP[i]-logp2) # sets p to 0 if TP less than p1, p depends on SAV if between p1 and p2, and p is 1 if TP greater than p2
    
    ForceNeg[i] ~ dbern(constraint) # Force the slope of the unstable line connecting the turbid line at logp1 to the clear line at logp2 to be negative so the fit resembles a bifurcation diagram
    
    log_lik[i] <- log( dnorm(lnC[i], mu[i], tau1*(1-S[i]) + tau2*S[i]) ) # contribution of observation i to log-likelihood for PSIS-LOO computations
       
  }
}



# Bundle data
ForceNeg <- rep(0,length(lnC))
nsamp <- length(lnC)
Lake.num <- as.numeric(as.factor(lakes)) # numerical index for each lake
n.lakes <- length(unique(Lake.num))

win.data <- list("lnC", "lnP", "nsamp", "SAVc", "ForceNeg", "n.lakes", "Lake.num")
  
# Inits function
inits <- function(){ list(a0=0.12, a1adj=-0.67, logp1=3.78, logp2=6.09, b0=.40, b1=.90, mu.alpha=-0.97, sigma.alpha=1.60, negbeta=5.93, sigma1=runif(1,.5, .7), sigma2=runif(1,.4,.65))} 
# initial values informed by posteriors from previous run

# Parameters to estimate
params <- c("a0","a1","a1adj", "b0", "b1","b1adj","b2","alpha","mu.alpha","sigma.alpha","beta","p","p1","p2","logp1","logp2", "sigma1", "sigma2", "S", "log_lik") 

# MCMC settings
nc <- 3
ni <- 1000000 
nb <-  200000  
nt <- 240      

Start MCMC sampler

WARNING: For 1 million iterations (as specified above), JAGS takes several hours to run, even using parallel computing. The model has already been run and saved as “DNR_randomLogist_JAGSout_1mil_240thin_LOO.rds”, which can be loaded using the function ‘readRDS’ (see Markdown file for code).

set.seed(27)

out.BLR <- do.call(jags.parallel, list(win.data, inits, params, latent_reg, nc, ni, nb, nt))

# If desired, save MCMC run to be loaded later
#saveRDS(out.BLR, "DNR_randomLogist_JAGSout_1mil_240thin_LOO.rds")

Trace plots

traplot(out.BLR ,c("a0","a1","b0","b1","mu.alpha","sigma.alpha","beta","logp1","logp2"))

Posterior density plots

denplot(out.BLR ,c("a0","a1","b0","b1","mu.alpha","sigma.alpha","beta","p1","p2", "sigma1", "sigma2"))

Threshold posterior modes and medians

dKS.logp1 <- bkde(out.BLR$BUGSoutput$sims.list$logp1)
(logp1.modeKS <- dKS.logp1$x[which.max(dKS.logp1$y)]) # lower threshold mode on log scale
## [1] 3.79
dKS.logp2 <- bkde(out.BLR$BUGSoutput$sims.list$logp2)
(logp2.modeKS <- dKS.logp2$x[which.max(dKS.logp2$y)]) # upper threshold mode on log scale
## [1] 6.08
ylim.logp <- max(c(max(dKS.logp1$y), max(dKS.logp2$y)))

exp(logp1.modeKS) # exponentiate to get p1 mode
## [1] 44.3
exp(logp2.modeKS) # exponentiate to get p2 mode
## [1] 437
# Save posterior medians for thresholds
logp1med <- out.BLR$BUGSoutput$median$logp1
logp2med <- out.BLR$BUGSoutput$median$logp2
p1med <- out.BLR$BUGSoutput$median$p1
p2med <- out.BLR$BUGSoutput$median$p2

Density plots of threshold posteriors

par(mar= c(5, 5, 2, 2), cex.lab=1.8, cex.axis=1.6)
plot(dKS.logp1, xlab="Threshold", las=1, lwd=3, xlim=c(0,6.5), main="", col="#009a9a", ylim=c(0,ylim.logp+1), type="l", ylab="Density")
lines(dKS.logp2, lwd=3, col="#9a0000")
abline(h=1/6.5, lwd=2, col="black")
abline(v=c(logp1.modeKS,logp2.modeKS), col="darkorchid", lty=3, lwd=3) 
abline(v=c(logp1med,logp2med), col="dodgerblue", lty=3, lwd=3) 
legend("topleft",c("Lower Threshold Posterior","Upper Threshold Posterior","Prior","Mode","Median"),lwd=3, col=c("#009a9a","#9a0000","black","darkorchid","dodgerblue"), bty='n', cex=1.3, lty=c(1,1,1,2,3,3))

Plot of prior and posterior distributions for \(\beta\) (this is \(\gamma_1\) in the paper; i.e., the slope parameter for the logistic regression relating SAV to the probability a lake is turbid)

par(mar= c(5, 5, 2, 2), cex.lab=1.7, cex.axis=1.6)
plot(density(out.BLR$BUGSoutput$sims.list$beta),xlim=c(-40,0),ylim=c(0,.4),las=1,xlab=expression(beta),main="",lwd=3, col="firebrick3")
lines(density(-1*rlnorm(10000,meanlog=.5,sdlog=1)), col="dodgerblue3",lwd=3)
legend("topleft",c("Prior","Posterior"),cex=1.3,lty=1,lwd=3,col=c("dodgerblue3","firebrick3"))

Pull out posterior means/medians and classify discrete states

# posterior means for regression parameters
a0m <- out.BLR$BUGSoutput$mean$a0
a1m <- out.BLR$BUGSoutput$mean$a1
a1adjm <- out.BLR$BUGSoutput$mean$a1adj
b0m <- out.BLR$BUGSoutput$mean$b0
b1m <- out.BLR$BUGSoutput$mean$b1
b1adjm <- out.BLR$BUGSoutput$mean$b1adj
alpham <- out.BLR$BUGSoutput$mean$mu.alpha # posterior mean estimate for mean parameter of random logistic intercept distribution (i.e., value of alpha for a typical lake with a random effect of zero)
betam <- out.BLR$BUGSoutput$mean$beta

# posterior medians for regression parameters
a0med <- out.BLR$BUGSoutput$median$a0
a1med <- out.BLR$BUGSoutput$median$a1
a1adjmed <- out.BLR$BUGSoutput$median$a1adj
b0med <- out.BLR$BUGSoutput$median$b0
b1med <- out.BLR$BUGSoutput$median$b1
b1adjmed <- out.BLR$BUGSoutput$median$b1adj
alphamed <- out.BLR$BUGSoutput$median$mu.alpha # posterior median estimate for mean parameter of random logistic intercept distribution (i.e., value of alpha for a typical lake with a random effect of zero)
betamed <- out.BLR$BUGSoutput$median$beta

# 95% credible interval bounds for TP thresholds on log scale (medians were previously saved)
logp1.row <- which(row.names(out.BLR$BUGSoutput$summary)=="logp1")
logp1.lowbound <- out.BLR$BUGSoutput$summary[logp1.row,3] # 2.5
logp1.upbound <- out.BLR$BUGSoutput$summary[logp1.row,7] # 97.5
logp2.row <- which(row.names(out.BLR$BUGSoutput$summary)=="logp2")
logp2.lowbound <- out.BLR$BUGSoutput$summary[logp2.row,3] # 2.5
logp2.upbound <- out.BLR$BUGSoutput$summary[logp2.row,7] # 97.5

# Put parameter estimates in a dataframe
coefs <- data.frame(Int.Mean=c(a0m,a1m),Int.Med=c(a0med,a1med),Slope.Mean=c(b0m,b1m), Slope.Med=c(b0med,b1med),State=c("Clear","Turbid"), Bifur.LogMed=c(logp1med,logp2med),Bifur.LogMode=c(logp1.modeKS,logp2.modeKS),  Bifur.Log.LowBound=c(logp1.lowbound,logp2.lowbound), Bifur.Log.UpBound=c(logp1.upbound,logp2.upbound))

coefs
##   Int.Mean Int.Med Slope.Mean Slope.Med  State Bifur.LogMed Bifur.LogMode Bifur.Log.LowBound Bifur.Log.UpBound
## 1  -0.0749 -0.0725      0.458     0.458  Clear         3.81          3.79               3.74              3.96
## 2  -0.3036 -0.3683      0.869     0.879 Turbid         6.09          6.08               6.02              6.17
# Classify Discrete State: 0 if mean(State)<.5 ; 1 if mean(State) > .5
Latent.State <- as.factor(as.numeric(out.BLR$BUGSoutput$mean$S > .5))

Plot average estimated logistic curve for a lake with a random effect of zero

  mcmc.l<-out.BLR$BUGSoutput$sims.list
  Ep.PE <- Ep.LCL <- Ep.UCL <- rep(NA,length(SAV))
  
  for(i in 1:length(SAV)){
     jags.Ep.temp<-plogis(mcmc.l$mu.alpha-SAV.mean*mcmc.l$beta + mcmc.l$beta*SAV[i])
     Ep.PE[i]<-mean(jags.Ep.temp)
     Ep.LCL[i]<-quantile(jags.Ep.temp, prob=0.025)   
     Ep.UCL[i]<-quantile(jags.Ep.temp, prob=0.975)    
  }

state.num <- as.numeric(out.BLR$BUGSoutput$mean$S > .5)

ggplot(NULL, aes(x=SAV, y=state.num) ) + geom_point(size=3)+ geom_line(aes(x=SAV, y=Ep.PE)) + geom_ribbon(aes(x=SAV,ymin=Ep.LCL, ymax=Ep.UCL),alpha=0.3) +theme(axis.text=element_text(colour="black", size=15),axis.title=element_text(colour="black", size=16),legend.text=element_text(colour="black", size=15),legend.title=element_text(colour="black", size=15),axis.title.y=element_text(vjust=1.5),axis.title.x=element_text(vjust=0))+ coord_cartesian(ylim = c(-.05,1.05)) + ylab("P(turbid)") + xlab("SAV (kg)")

Plot average estimated regression line for each state

Black solid (dashed) lines represent average (2.5th, 97.5th quantiles) estimated steady state relationships across all MCMC samples. Steady state lines end at the TP threshold point estimates (posterior modes), and gray bands represent 95% credible intervals for TP thresholds. Triangular points represent lakes classified as clear (>50% of MCMC sampled states were clear), and circular points represent lakes classified as turbid (>50% of MCMC sampled states were turbid). The average MCMC sampled state for each lake is shown on a blue to green color gradient (0=clear, 1=turbid). Point size is proportional to submerged aquatic vegetation (SAV, units: average kg/sample).

Discrete.State <- as.numeric(Latent.State)-1
Discrete.State[Discrete.State==0] <- "Clear" ; Discrete.State[Discrete.State==1] <- "Turbid"
Continuous.State <- out.BLR$BUGSoutput$mean$S

log.TP <- lnP
log.Chla <- lnC
log.SAV <- lnSAV
cbPalette <- c("royalblue4","seagreen")

a0.samps <- out.BLR$BUGSoutput$sims.list$a0
a1.samps <- out.BLR$BUGSoutput$sims.list$a1
b0.samps <- out.BLR$BUGSoutput$sims.list$b0
b1.samps <- out.BLR$BUGSoutput$sims.list$b1

num.sims <- length(a0.samps)
logp2.modeKS <- round(logp2.modeKS, digits=2)

xclear <- seq(1.4,logp2.modeKS, .01)
xclear.pred <- matrix(rep(NA, length(xclear)*num.sims), nrow=length(xclear))

for (i in 1:length(xclear)){
  x <- xclear[i]
  xclear.pred[i,] <- a0.samps + b0.samps*x
}

clear.means <- apply(xclear.pred, 1, mean)
clear.lower <- apply(xclear.pred, 1, quantile, probs = 0.025)
clear.upper <- apply(xclear.pred, 1, quantile, probs = 0.975)


logp1.modeKS <- round(logp1.modeKS, digits=2)
xturbid <- seq(logp1.modeKS,7, .01)
xturbid.pred <- matrix(rep(NA, length(xturbid)*num.sims), nrow=length(xturbid))

for (i in 1:length(xturbid)){
  x <- xturbid[i]
  xturbid.pred[i,] <- a1.samps + b1.samps*x
}

turbid.means <- apply(xturbid.pred, 1, mean)
turbid.lower <- apply(xturbid.pred, 1, quantile, probs = 0.025)
turbid.upper <- apply(xturbid.pred, 1, quantile, probs = 0.975)


ggplot(NULL, aes(x=log.TP, y=log.Chla , color=Continuous.State )) +
  
  annotate("rect", xmin=coefs$Bifur.Log.LowBound[1], xmax=coefs$Bifur.Log.UpBound[1], ymin=-Inf, ymax=Inf, alpha=0.15, fill="gray20") + annotate("rect", xmin=coefs$Bifur.Log.LowBound[2], xmax=coefs$Bifur.Log.UpBound[2], ymin=-Inf, ymax=Inf, alpha=0.15, fill="gray20") + 
 
  geom_point( aes(shape=Discrete.State, size=SAV)) + 
  geom_line(aes(x=xturbid, y=turbid.means), size=1.1, colour="gray20") +   
  geom_line(aes(x=xclear, y=clear.means), size=1.1, colour="gray20")  + 
  geom_line(aes(x=xturbid, y=turbid.lower), size=1.1, colour="gray20", linetype=2) + 
  geom_line(aes(x=xturbid, y=turbid.upper), size=1.1, colour="gray20", linetype=2) + 
  geom_line(aes(x=xclear, y=clear.lower), size=1.1, colour="gray20", linetype=2) + 
  geom_line(aes(x=xclear, y=clear.upper), size=1.1, colour="gray20", linetype=2)+

  scale_size(range=c(2,6),  limits=range(SAV)) + 
  
  theme(legend.position="right", panel.border=element_rect(fill=NA, colour = "black") , panel.background = element_rect(fill = "white"), axis.text=element_text(colour="black", size=15),axis.title=element_text(colour="black", size=16),legend.text=element_text(colour="black", size=13),legend.title=element_text(colour="black", size=14),axis.title.y=element_text(vjust=1.5),axis.title.x=element_text(vjust=0), plot.title = element_text(size = rel(1.5), hjust = 0.5),   legend.key.height=unit(1.2, 'lines')) +coord_cartesian(ylim = c(-1,6))+ 
  
  coord_cartesian(ylim = c(-1,6)) + 
  
  ylab("log(Chla)") + xlab("log(TP)") + 
  
  scale_colour_gradient2( mid="royalblue3",high="seagreen3", limits=c(0,1)) + 
  
  guides(colour = guide_colourbar(order = 1, label.vjust=1, title.vjust=.85, barwidth=unit(1, 'lines'), barheight=unit(7, 'lines')), shape = guide_legend(order = 2), size = guide_legend(order = 3) )

R session info

sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] loo_1.0.0            jagstools_1.6.0.9000 devtools_1.12.0      gridExtra_2.2.1      KernSmooth_2.23-15   ggplot2_2.2.1       
##  [7] mcmcplots_0.4.2      R2WinBUGS_2.1-21     boot_1.3-18          R2jags_0.5-7         rjags_4-6            coda_0.19-1         
## 
## loaded via a namespace (and not attached):
##  [1] denstrip_1.5.3     Rcpp_0.12.9        git2r_0.18.0       plyr_1.8.4         tools_3.3.2        digest_0.6.12      evaluate_0.10     
##  [8] memoise_1.0.0      tibble_1.3.3       gtable_0.2.0       lattice_0.20-34    rlang_0.1.1        curl_2.3           yaml_2.1.14       
## [15] parallel_3.3.2     withr_1.0.2        stringr_1.2.0      httr_1.2.1         knitr_1.15.1       rprojroot_1.2      grid_3.3.2        
## [22] R6_2.2.0           rmarkdown_1.3      magrittr_1.5       matrixStats_0.51.0 backports_1.0.5    scales_0.4.1       htmltools_0.3.5   
## [29] sfsmisc_1.1-0      abind_1.4-5        colorspace_1.3-2   labeling_0.3       stringi_1.1.2      lazyeval_0.2.0     munsell_0.4.3