This document provides R code to extend the BLR model to allow for random logistic intercepts, random TP/Chla intercepts, and random TP thresholds (see Appendix S2). This hierarchical model is fit to the three years of data in DNR_Data.csv. R session info and package versions can be found at the end of this document.

library(R2jags)
library(R2WinBUGS)
library(mcmcplots)
library(ggplot2)
library(KernSmooth)
library(coda)
library(gridExtra)
library(devtools)
install_github(repo='johnbaums/jagstools')
library(jagstools)
library(loo)

Read in MDNR data:

State.dat <- read.csv("DNR_Data.csv",header=T) # DNR data

# Resave variables
lake <- as.factor(State.dat$lakeID)
Year <- as.factor(State.dat$Year) # 2009, 2010, 2011
TPug <- State.dat$TPug  # ug/L
lnTPug <- log(TPug)
Chla <- State.dat$Chla  # ug/L
lnChla <- log(Chla)
SAVms <- State.dat$SAVms      # avg SAV (kg) in each lake
lnSAVms <- log(SAVms+.02)

Rename variables

lnP <- lnTPug
ind.rm <- which(is.na(lnP)) # which observations missing
lnP <- lnP[-ind.rm]
P <- TPug; P <- P[-ind.rm]
lnC <- lnChla; lnC <- lnC[-ind.rm]
Chl <- Chla; Chl <- Chl[-ind.rm]
SAV <- SAVms; SAV <- SAV[-ind.rm]
lnSAV <- lnSAVms; lnSAV <- lnSAV[-ind.rm]
lakes <- lake; lakes <- lakes[-ind.rm]

SAV.mean <- mean(SAV)
SAVc <- SAV-SAV.mean # centered SAV

Visualize data

pl1 <- ggplot(NULL, aes(x=P, y=Chl)) + geom_point(aes(size=SAV)) + scale_size_continuous(range=c(2,5))
pl2 <- ggplot(NULL, aes(x=lnP, y=lnC)) + geom_point( aes(size=SAV))+ scale_size_continuous(range=c(2,5))
grid.arrange(pl1, pl2, ncol=2)

JAGS code for Bayesian latent variable regression with random logistic intercepts, random TP/Chla intercepts, and random TP thresholds:

latent_reg <-function(){
  
  # Priors for random parameters
  for (i in 1:n.lakes){     
   alpha[i] ~ dnorm(mu.alpha, tau.alpha) # random intercept for logistic regression

   logp1[i] ~ dnorm(mu.logp1, tau.logp1)    # Random tip down threshold
   logp2[i] ~ dnorm(mu.logp2, tau.logp2)    # Random tip up threshold
    
   a0[i] ~ dnorm(mu.a0, tau.a0)           # Random TP/Chla intercept for clear state
   a1adj[i] ~ dnorm(mu.a1adj, tau.a1adj)    # Random difference between clear and turbid intercepts
   a1[i] <- a0[i] + a1adj[i]              # Random TP/Chla intercept for turbid state
  }

  # Hyperpriors for logistic intercepts
  mu.alpha ~ dnorm(0,.01) # mean intercept for logistic regression 
  sigma.alpha ~ dunif(0, 10)  # standard deviation of logistic random intercept
  tau.alpha <- 1 / (sigma.alpha * sigma.alpha)

  
  # Hyperpriors for TP thresholds
  mu.logp1 ~ dunif(0,6.5)   # mean tip down threshold (log scale)
  mu.p1 <- exp(mu.logp1)
  tau.logp1 <- 1 / (sigma.logp1 * sigma.logp1)
  sigma.logp1 ~ dunif(0, 10)  # standard deviation of random tip down threshold
   
  mu.logp2 ~ dunif(0,6.5)   # mean tip up threshold (log scale)
  mu.p2 <- exp(mu.logp2)
  tau.logp2 <- 1 / (sigma.logp2 * sigma.logp2)
  sigma.logp2 ~ dunif(0, 10)  # standard deviation of random tip up threshold
  
  
  # Hyperpriors for TP/Chla intercepts
  mu.a0 ~ dnorm(0, 0.01)         # mean clear TP/Chla intercept
  tau.a0 <- 1 / (sigma.a0 * sigma.a0)
  sigma.a0 ~ dunif(0, 10)  # standard deviation of clear random intercept
 
  mu.a1adj ~ dnorm(0, 0.01)      # mean of difference between clear and turbid intercepts
  tau.a1adj <- 1 / (sigma.a1adj * sigma.a1adj)
  sigma.a1adj ~ dunif(0, 10)  # standard deviation of random difference between intercepts
  
  mu.a1 <- mu.a1adj+mu.a0    # mean turbid TP/Chla intercept


  # Priors for TP/Chla slopes
  b0 ~ dunif(0,10)   # clear TP/Chla slope
  b1 ~ dunif(0,10)   # turbid TP/Chla slope
  b1adj <- b1-b0     # difference between clear and turbid slopes

  
  # Prior for logistic slope (SAV/State relationship)
  beta <- -1*negbeta  # slope for logistic regression
  negbeta ~ dlnorm( .5, 1/1^2 ) # prior for -1*beta
  
  
  sigma1 ~ dunif(0, 10)  # Residual standard deviation of clear lakes
  sigma2 ~ dunif(0, 10)  # Residual standard deviation of turbid lakes
  tau1 <- 1 / ( sigma1 * sigma1)
  tau2 <- 1 / ( sigma2 * sigma2)
  
  
  b2 <- (mu.a1+b1*mu.logp1-mu.a0-b0*mu.logp2) /(mu.logp1-mu.logp2) # slope of unstable line for a typical lake (random effects are all zero)
  constraint <- step(b2)  
  
 
  
  # Likelihood
  for(i in 1:nsamp){
    lnC[i] ~ dnorm(mu[i], tau1*(1-S[i]) + tau2*S[i])
    mu[i] <- a0[Lake.num[i]]+a1adj[Lake.num[i]]*S[i] + b0*lnP[i]*(1-S[i])+b1*lnP[i]*S[i] # state-dependent linear relationship between log(TP) and log(Chla) with random intercepts for each lake
    
    S[i] ~ dbern(p[i]) # latent state (clear/turbid) for observation i
    logit(ptmp[i])<-alpha[Lake.num[i]]+beta*SAVc[i] # logistic regression relating SAV to the probability observation i is turbid with random intercept for each lake  
    p[i] <- ptmp[i]*step(lnP[i]-logp1[Lake.num[i]])*step(logp2[Lake.num[i]]-lnP[i])+step(lnP[i]-logp2[Lake.num[i]]) # sets p to 0 if TP less than p1, p depends on SAV if between p1 and p2, and p is 1 if TP greater than p2
    
    ForceNeg[i] ~ dbern(constraint) # Force the slope of the unstable line connecting the turbid line at logp1 to the clear line at logp2 to be negative for a typical lake (random effects are all zero) so the typical fit resembles a bifurcation diagram
    
    log_lik[i] <- log( dnorm(lnC[i], mu[i], tau1*(1-S[i]) + tau2*S[i]) ) # contribution of observation i to log-likelihood for PSIS-LOO computations
       
  }
}


# Bundle data
ForceNeg <- rep(0,length(lnC))
nsamp <- length(lnC)
Lake.num <- as.numeric(as.factor(lakes)) # numerical index for each lake
n.lakes <- length(unique(Lake.num))

win.data <- list("lnC", "lnP", "nsamp", "SAVc", "ForceNeg", "n.lakes", "Lake.num")
  
# Inits function
inits <- function(){ list(mu.a0=-.078, sigma.a0=.1, mu.a1adj=-0.32, sigma.a1adj=.2, mu.logp1=3.86, sigma.logp1=0.56, mu.logp2=6.13, sigma.logp2=0.19, b0=.46, b1=.88, mu.alpha=-1.96, sigma.alpha=2.23, negbeta=12.8, sigma1=runif(1,.6, .74), sigma2=runif(1,.45,.69))} 
# initial values informed by posteriors from previous run

# Parameters to estimate
params <- c("a0","a1adj", "mu.a0", "mu.a1adj", "mu.a1", "sigma.a0", "b0", "b1","b2","alpha","mu.alpha","sigma.alpha","beta","mu.p1","mu.p2","logp1","logp2", "mu.logp1", "mu.logp2", "sigma.logp1", "sigma.logp2", "sigma1", "sigma2", "S", "log_lik") 

# MCMC settings
nc <- 3
ni <- 1000000 
nb <-  200000  
nt <- 240       

Start MCMC sampler

WARNING: For 1 million iterations (as specified above), JAGS takes several hours to run, even using parallel computing. The model has already been run and saved as “DNR_randomIntLogistThresh_JAGSout_1mil_240thin_LOO.rds”, which can be loaded using the function ‘readRDS’ (see Markdown file for code).

set.seed(27)

out.BLR <- do.call(jags.parallel, list(win.data, inits, params, latent_reg, nc, ni, nb, nt))

# If desired, save MCMC run to be loaded later
#saveRDS(out.BLR, "DNR_randomIntLogistThresh_JAGSout_1mil_240thin_LOO.rds")

Trace plots

traplot(out.BLR ,c("mu.a0","mu.a1","b0","b1","mu.alpha","beta","mu.logp1","mu.logp2"))

Posterior density plots

denplot(out.BLR ,c("mu.a0","mu.a1","sigma.a0", "sigma.a1adj",  "b0", "b1", "mu.alpha", "sigma.alpha","beta","mu.p1","mu.p2", "mu.logp1", "mu.logp2", "sigma.logp1", "sigma.logp2",  "sigma1", "sigma2"))

Posterior modes and medians for mean threshold parameters (i.e., thresholds for a typical lake with random effects set to zero)

dKS.logp1 <- bkde(out.BLR$BUGSoutput$sims.list$mu.logp1)
(logp1.modeKS <- dKS.logp1$x[which.max(dKS.logp1$y)]) # mode of mean random lower threshold parameter on log scale
## [1] 3.87
dKS.logp2 <- bkde(out.BLR$BUGSoutput$sims.list$mu.logp2)
(logp2.modeKS <- dKS.logp2$x[which.max(dKS.logp2$y)]) # mode of mean random upper threshold parameter on log scale
## [1] 6.11
ylim.logp <- max(c(max(dKS.logp1$y), max(dKS.logp2$y)))

exp(logp1.modeKS) # exponentiate to get p1 mode
## [1] 48.1
exp(logp2.modeKS) # exponentiate to get p2 mode
## [1] 449
# Save posterior medians for mean threshold parameters
logp1med <- out.BLR$BUGSoutput$median$mu.logp1
logp2med <- out.BLR$BUGSoutput$median$mu.logp2
p1med <- out.BLR$BUGSoutput$median$mu.p1
p2med <- out.BLR$BUGSoutput$median$mu.p2

Density plots of posteriors for mean threshold parameters

par(mar= c(5, 5, 2, 2), cex.lab=1.8, cex.axis=1.6)
plot(dKS.logp1, xlab="Threshold", las=1, lwd=3, xlim=c(0,6.5), main="", col="#009a9a", ylim=c(0,ylim.logp+1), type="l", ylab="Density")
lines(dKS.logp2, lwd=3, col="#9a0000")
abline(h=1/6.5, lwd=2, col="black")
abline(v=c(logp1.modeKS,logp2.modeKS), col="darkorchid", lty=3, lwd=3) 
abline(v=c(logp1med,logp2med), col="dodgerblue", lty=3, lwd=3) 
legend("topleft",c("Lower Threshold Posterior","Upper Threshold Posterior","Prior","Mode","Median"),lwd=3, col=c("#009a9a","#9a0000","black","darkorchid","dodgerblue"), bty='n', cex=1.3, lty=c(1,1,1,2,3,3))

Plot of prior and posterior distributions for \(\beta\) (this is \(\gamma_1\) in the paper; i.e., the slope parameter for the logistic regression relating SAV to the probability a lake is turbid)

par(mar= c(5, 5, 2, 2), cex.lab=1.7, cex.axis=1.6)
plot(density(out.BLR$BUGSoutput$sims.list$beta),xlim=c(-40,0),ylim=c(0,.4),las=1,xlab=expression(beta),main="",lwd=3, col="firebrick3")
lines(density(-1*rlnorm(10000,meanlog=.5,sdlog=1)), col="dodgerblue3",lwd=3)
legend("topleft",c("Prior","Posterior"),cex=1.3,lty=1,lwd=3,col=c("dodgerblue3","firebrick3"))

Pull out posterior means/medians and classify discrete states

# posterior means for regression parameters
a0m <- out.BLR$BUGSoutput$mean$mu.a0 # posterior mean estimate for mean parameter of random clear intercept distribution (i.e., value of a0 for a typical lake with a random effect of zero)
a1m <- out.BLR$BUGSoutput$mean$mu.a1  # posterior mean estimate for mean parameter of random turbid intercept distribution (i.e., value of a1 for a typical lake with a random effect of zero)
a1adjm <- out.BLR$BUGSoutput$mean$mu.a1adj
b0m <- out.BLR$BUGSoutput$mean$b0
b1m <- out.BLR$BUGSoutput$mean$b1
b1adjm <- out.BLR$BUGSoutput$mean$b1adj
alpham <- out.BLR$BUGSoutput$mean$mu.alpha # posterior mean estimate for mean parameter of random logistic intercept distribution (i.e., value of alpha for a typical lake with a random effect of zero)
betam <- out.BLR$BUGSoutput$mean$beta

# posterior medians for regression parameters
a0med <- out.BLR$BUGSoutput$median$mu.a0 # posterior median estimate for mean parameter of random clear intercept distribution (i.e., value of a0 for a typical lake with a random effect of zero)
a1med <- out.BLR$BUGSoutput$median$mu.a1 # posterior median estimate for mean parameter of random turbid intercept distribution (i.e., value of a1 for a typical lake with a random effect of zero)
a1adjmed <- out.BLR$BUGSoutput$median$mu.a1adj
b0med <- out.BLR$BUGSoutput$median$b0
b1med <- out.BLR$BUGSoutput$median$b1
b1adjmed <- out.BLR$BUGSoutput$median$b1adj
alphamed <- out.BLR$BUGSoutput$median$mu.alpha # posterior median estimate for mean parameter of random logistic intercept distribution (i.e., value of alpha for a typical lake with a random effect of zero)
betamed <- out.BLR$BUGSoutput$median$beta

# 95% credible interval bounds for mean TP threshold parameters on log scale (medians were previously saved)
logp1.row <- which(row.names(out.BLR$BUGSoutput$summary)=="mu.logp1")
logp1.lowbound <- out.BLR$BUGSoutput$summary[logp1.row,3] # 2.5
logp1.upbound <- out.BLR$BUGSoutput$summary[logp1.row,7] # 97.5
logp2.row <- which(row.names(out.BLR$BUGSoutput$summary)=="mu.logp2")
logp2.lowbound <- out.BLR$BUGSoutput$summary[logp2.row,3] # 2.5
logp2.upbound <- out.BLR$BUGSoutput$summary[logp2.row,7] # 97.5

# Put parameter estimates in a dataframe
coefs <- data.frame(Int.Mean=c(a0m,a1m),Int.Med=c(a0med,a1med),Slope.Mean=c(b0m,b1m), Slope.Med=c(b0med,b1med),State=c("Clear","Turbid"), Bifur.LogMed=c(logp1med,logp2med),Bifur.LogMode=c(logp1.modeKS,logp2.modeKS),  Bifur.Log.LowBound=c(logp1.lowbound,logp2.lowbound), Bifur.Log.UpBound=c(logp1.upbound,logp2.upbound))

coefs
##   Int.Mean Int.Med Slope.Mean Slope.Med  State Bifur.LogMed Bifur.LogMode Bifur.Log.LowBound Bifur.Log.UpBound
## 1   0.0146  0.0122      0.423     0.423  Clear         3.90          3.87               3.56              4.34
## 2  -0.2333 -0.2709      0.848     0.854 Turbid         6.14          6.11               5.90              6.47
# Classify Discrete State: 0 if mean(State)<.5 ; 1 if mean(State) > .5
Latent.State <- as.factor(as.numeric(out.BLR$BUGSoutput$mean$S > .5))

Plot average estimated logistic curve for a lake with a random effect of zero

  mcmc.l<-out.BLR$BUGSoutput$sims.list
  Ep.PE <- Ep.LCL <- Ep.UCL <- rep(NA,length(SAV))
  
  for(i in 1:length(SAV)){
     jags.Ep.temp<-plogis(mcmc.l$mu.alpha-SAV.mean*mcmc.l$beta + mcmc.l$beta*SAV[i])
     Ep.PE[i]<-mean(jags.Ep.temp)
     Ep.LCL[i]<-quantile(jags.Ep.temp, prob=0.025)   
     Ep.UCL[i]<-quantile(jags.Ep.temp, prob=0.975)    
  }

state.num <- as.numeric(out.BLR$BUGSoutput$mean$S > .5)

ggplot(NULL, aes(x=SAV, y=state.num) ) + geom_point(size=3)+ geom_line(aes(x=SAV, y=Ep.PE)) + geom_ribbon(aes(x=SAV,ymin=Ep.LCL, ymax=Ep.UCL),alpha=0.3) +theme(axis.text=element_text(colour="black", size=15),axis.title=element_text(colour="black", size=16),legend.text=element_text(colour="black", size=15),legend.title=element_text(colour="black", size=15),axis.title.y=element_text(vjust=1.5),axis.title.x=element_text(vjust=0))+ coord_cartesian(ylim = c(-.05,1.05)) + ylab("P(turbid)") + xlab("SAV (kg)")

Plot average estimated regression line for each state for a typical lake (i.e., all random effects set to zero)

Black solid (dashed) lines represent average (2.5th, 97.5th quantiles) estimated steady state relationships across all MCMC samples for a typical lake. Steady state lines end at the mean TP threshold parameter point estimates (posterior modes), and gray bands represent 95% credible intervals for mean TP threshold parameters. Triangular points represent lakes classified as clear (>50% of MCMC sampled states were clear), and circular points represent lakes classified as turbid (>50% of MCMC sampled states were turbid). The average MCMC sampled state for each lake is shown on a blue to green color gradient (0=clear, 1=turbid). Point size is proportional to submerged aquatic vegetation (SAV, units: average kg/sample).

Discrete.State <- as.numeric(Latent.State)-1
Discrete.State[Discrete.State==0] <- "Clear" ; Discrete.State[Discrete.State==1] <- "Turbid"
Continuous.State <- out.BLR$BUGSoutput$mean$S

log.TP <- lnP
log.Chla <- lnC
log.SAV <- lnSAV
cbPalette <- c("royalblue4","seagreen")

a0.samps <- out.BLR$BUGSoutput$sims.list$mu.a0
a1.samps <- out.BLR$BUGSoutput$sims.list$mu.a1
b0.samps <- out.BLR$BUGSoutput$sims.list$b0
b1.samps <- out.BLR$BUGSoutput$sims.list$b1

num.sims <- length(a0.samps)
logp2.modeKS <- round(logp2.modeKS, digits=2)

xclear <- seq(1.4,logp2.modeKS, .01)
xclear.pred <- matrix(rep(NA, length(xclear)*num.sims), nrow=length(xclear))

for (i in 1:length(xclear)){
  x <- xclear[i]
  xclear.pred[i,] <- a0.samps + b0.samps*x
}

clear.means <- apply(xclear.pred, 1, mean)
clear.lower <- apply(xclear.pred, 1, quantile, probs = 0.025)
clear.upper <- apply(xclear.pred, 1, quantile, probs = 0.975)


logp1.modeKS <- round(logp1.modeKS, digits=2)
xturbid <- seq(logp1.modeKS,7, .01)
xturbid.pred <- matrix(rep(NA, length(xturbid)*num.sims), nrow=length(xturbid))

for (i in 1:length(xturbid)){
  x <- xturbid[i]
  xturbid.pred[i,] <- a1.samps + b1.samps*x
}

turbid.means <- apply(xturbid.pred, 1, mean)
turbid.lower <- apply(xturbid.pred, 1, quantile, probs = 0.025)
turbid.upper <- apply(xturbid.pred, 1, quantile, probs = 0.975)


ggplot(NULL, aes(x=log.TP, y=log.Chla , color=Continuous.State )) +
  
  annotate("rect", xmin=coefs$Bifur.Log.LowBound[1], xmax=coefs$Bifur.Log.UpBound[1], ymin=-Inf, ymax=Inf, alpha=0.15, fill="gray20") + annotate("rect", xmin=coefs$Bifur.Log.LowBound[2], xmax=coefs$Bifur.Log.UpBound[2], ymin=-Inf, ymax=Inf, alpha=0.15, fill="gray20") + 
 
  geom_point( aes(shape=Discrete.State, size=SAV)) + 
  geom_line(aes(x=xturbid, y=turbid.means), size=1.1, colour="gray20") +   
  geom_line(aes(x=xclear, y=clear.means), size=1.1, colour="gray20")  + 
  geom_line(aes(x=xturbid, y=turbid.lower), size=1.1, colour="gray20", linetype=2) + 
  geom_line(aes(x=xturbid, y=turbid.upper), size=1.1, colour="gray20", linetype=2) + 
  geom_line(aes(x=xclear, y=clear.lower), size=1.1, colour="gray20", linetype=2) + 
  geom_line(aes(x=xclear, y=clear.upper), size=1.1, colour="gray20", linetype=2)+

  scale_size(range=c(2,6),  limits=range(SAV)) + 
  
  theme(legend.position="right", panel.border=element_rect(fill=NA, colour = "black") , panel.background = element_rect(fill = "white"), axis.text=element_text(colour="black", size=15),axis.title=element_text(colour="black", size=16),legend.text=element_text(colour="black", size=13),legend.title=element_text(colour="black", size=14),axis.title.y=element_text(vjust=1.5),axis.title.x=element_text(vjust=0), plot.title = element_text(size = rel(1.5), hjust = 0.5),   legend.key.height=unit(1.2, 'lines')) +coord_cartesian(ylim = c(-1,6))+ 
  
  coord_cartesian(ylim = c(-1,6)) + 
  
  ylab("log(Chla)") + xlab("log(TP)") + 
  
  scale_colour_gradient2( mid="royalblue3",high="seagreen3", limits=c(0,1)) + 
  
  guides(colour = guide_colourbar(order = 1, label.vjust=1, title.vjust=.85, barwidth=unit(1, 'lines'), barheight=unit(7, 'lines')), shape = guide_legend(order = 2), size = guide_legend(order = 3) )

R session info

sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] loo_1.0.0            jagstools_1.6.0.9000 devtools_1.12.0      gridExtra_2.2.1      KernSmooth_2.23-15   ggplot2_2.2.1       
##  [7] mcmcplots_0.4.2      R2WinBUGS_2.1-21     boot_1.3-18          R2jags_0.5-7         rjags_4-6            coda_0.19-1         
## 
## loaded via a namespace (and not attached):
##  [1] denstrip_1.5.3     Rcpp_0.12.9        git2r_0.18.0       plyr_1.8.4         tools_3.3.2        digest_0.6.12      evaluate_0.10     
##  [8] memoise_1.0.0      tibble_1.3.3       gtable_0.2.0       lattice_0.20-34    rlang_0.1.1        curl_2.3           yaml_2.1.14       
## [15] parallel_3.3.2     withr_1.0.2        stringr_1.2.0      httr_1.2.1         knitr_1.15.1       rprojroot_1.2      grid_3.3.2        
## [22] R6_2.2.0           rmarkdown_1.3      magrittr_1.5       matrixStats_0.51.0 backports_1.0.5    scales_0.4.1       htmltools_0.3.5   
## [29] sfsmisc_1.1-0      abind_1.4-5        colorspace_1.3-2   labeling_0.3       stringi_1.1.2      lazyeval_0.2.0     munsell_0.4.3