library(R2jags)
library(R2WinBUGS)
library(mcmcplots)
library(ggplot2)
library(KernSmooth)
library(coda)
library(gridExtra)
library(devtools)
install_github(repo='johnbaums/jagstools')
library(jagstools)
library(loo)
State.dat <- read.csv("DNR_Data.csv",header=T) # DNR data
# Resave variables
lake <- as.factor(State.dat$lakeID)
Year <- as.factor(State.dat$Year) # 2009, 2010, 2011
TPug <- State.dat$TPug # ug/L
lnTPug <- log(TPug)
Chla <- State.dat$Chla # ug/L
lnChla <- log(Chla)
SAVms <- State.dat$SAVms # avg SAV (kg) in each lake
lnSAVms <- log(SAVms+.02)
lnP <- lnTPug
ind.rm <- which(is.na(lnP)) # which observations missing
lnP <- lnP[-ind.rm]
P <- TPug; P <- P[-ind.rm]
lnC <- lnChla; lnC <- lnC[-ind.rm]
Chl <- Chla; Chl <- Chl[-ind.rm]
SAV <- SAVms; SAV <- SAV[-ind.rm]
lnSAV <- lnSAVms; lnSAV <- lnSAV[-ind.rm]
lakes <- lake; lakes <- lakes[-ind.rm]
SAV.mean <- mean(SAV)
SAVc <- SAV-SAV.mean # centered SAV
pl1 <- ggplot(NULL, aes(x=P, y=Chl)) + geom_point(aes(size=SAV)) + scale_size_continuous(range=c(2,5))
pl2 <- ggplot(NULL, aes(x=lnP, y=lnC)) + geom_point( aes(size=SAV))+ scale_size_continuous(range=c(2,5))
grid.arrange(pl1, pl2, ncol=2)
latent_reg <-function(){
# Priors for logistic regression relating SAV to P(turbid)
# Priors for the random logistic intercepts
for (i in 1:n.lakes){
alpha[i] ~ dnorm(mu.alpha, tau.alpha) # random intercept for logistic regression
}
# Hyperpriors for logistic intercepts
mu.alpha ~ dnorm(0,.01) # mean intercept for logistic regression
sigma.alpha ~ dunif(0, 10) # standard deviation of logistic random intercept
tau.alpha <- 1 / (sigma.alpha * sigma.alpha)
beta <- -1*negbeta # slope for logistic regression
negbeta ~ dlnorm( .5, 1/1^2 ) # prior for -1*beta
# Priors for TP/Chla regression
a0 ~ dnorm(0,0.01) # clear TP/Chla intercept
a1adj ~ dnorm(0,0.1) # difference between clear and turbid intercepts
a1 <- a1adj+a0 # turbid TP/Chla intercept
b0 ~ dunif(0,10) # clear TP/Chla slope
b1 ~ dunif(0,10) # turbid TP/Chla slope
b1adj <- b1-b0 # difference between clear and turbid slopes
sigma1 ~ dunif(0, 20) # Residual standard deviation of clear lakes
sigma2 ~ dunif(0, 20) # Residual standard deviation of turbid lakes
tau1 <- 1 / ( sigma1 * sigma1)
tau2 <- 1 / ( sigma2 * sigma2)
b2 <- (a1+b1*logp1-a0-b0*logp2) /(logp1-logp2) # slope of unstable line
constraint <- step(b2)
# Priors for TP thresholds
logp1 ~ dunif(0,6.5) # left bifurcation point (tip down) on log scale
logp2 ~ dunif(0,6.5) # right bifurcation point (tip up) on log scale
p1 <- exp(logp1)
p2 <- exp(logp2)
# Likelihood
for(i in 1:nsamp){
lnC[i] ~ dnorm(mu[i], tau1*(1-S[i]) + tau2*S[i])
mu[i] <- a0+a1adj*S[i] + b0*lnP[i]*(1-S[i])+b1*lnP[i]*S[i] # state-dependent linear relationship between log(TP) and log(Chla)
S[i] ~ dbern(p[i]) # latent state (clear/turbid) for observation i
logit(ptmp[i])<-alpha[Lake.num[i]]+beta*SAVc[i] # logistic regression relating SAV to the probability observation i is turbid with random intercept for each lake
p[i] <- ptmp[i]*step(lnP[i]-logp1)*step(logp2-lnP[i])+step(lnP[i]-logp2) # sets p to 0 if TP less than p1, p depends on SAV if between p1 and p2, and p is 1 if TP greater than p2
ForceNeg[i] ~ dbern(constraint) # Force the slope of the unstable line connecting the turbid line at logp1 to the clear line at logp2 to be negative so the fit resembles a bifurcation diagram
log_lik[i] <- log( dnorm(lnC[i], mu[i], tau1*(1-S[i]) + tau2*S[i]) ) # contribution of observation i to log-likelihood for PSIS-LOO computations
}
}
# Bundle data
ForceNeg <- rep(0,length(lnC))
nsamp <- length(lnC)
Lake.num <- as.numeric(as.factor(lakes)) # numerical index for each lake
n.lakes <- length(unique(Lake.num))
win.data <- list("lnC", "lnP", "nsamp", "SAVc", "ForceNeg", "n.lakes", "Lake.num")
# Inits function
inits <- function(){ list(a0=0.12, a1adj=-0.67, logp1=3.78, logp2=6.09, b0=.40, b1=.90, mu.alpha=-0.97, sigma.alpha=1.60, negbeta=5.93, sigma1=runif(1,.5, .7), sigma2=runif(1,.4,.65))}
# initial values informed by posteriors from previous run
# Parameters to estimate
params <- c("a0","a1","a1adj", "b0", "b1","b1adj","b2","alpha","mu.alpha","sigma.alpha","beta","p","p1","p2","logp1","logp2", "sigma1", "sigma2", "S", "log_lik")
# MCMC settings
nc <- 3
ni <- 1000000
nb <- 200000
nt <- 240
WARNING: For 1 million iterations (as specified above), JAGS takes several hours to run, even using parallel computing. The model has already been run and saved as “DNR_randomLogist_JAGSout_1mil_240thin_LOO.rds”, which can be loaded using the function ‘readRDS’ (see Markdown file for code).
set.seed(27)
out.BLR <- do.call(jags.parallel, list(win.data, inits, params, latent_reg, nc, ni, nb, nt))
# If desired, save MCMC run to be loaded later
#saveRDS(out.BLR, "DNR_randomLogist_JAGSout_1mil_240thin_LOO.rds")
jagsresults(x=out.BLR, params=c("logp1","logp2","p1","p2","a0","a1","b0","b1","mu.alpha","sigma.alpha","beta","sigma1","sigma2"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## a0 -0.0749 0.1819 -0.436 -0.199 -0.0725 0.0491 0.2792 1 10000
## a1 -0.3036 0.6619 -1.446 -0.768 -0.3683 0.1135 1.1345 1 10000
## b0 0.4580 0.0522 0.356 0.423 0.4578 0.4930 0.5610 1 8200
## b1 0.8690 0.1225 0.608 0.791 0.8789 0.9543 1.0873 1 10000
## beta -13.8012 7.7310 -34.485 -16.860 -11.2973 -8.3339 -5.5394 1 10000
## logp1 3.8428 0.0737 3.745 3.783 3.8138 3.9135 3.9568 1 10000
## logp2 6.0942 0.0591 6.017 6.053 6.0921 6.1335 6.1742 1 4700
## mu.alpha -2.6338 2.0958 -8.313 -3.395 -2.0853 -1.2984 -0.0566 1 10000
## p1 46.7818 3.4596 42.297 43.928 45.3242 50.0724 52.2919 1 10000
## p2 444.0721 26.9430 410.422 425.408 442.3467 461.0686 480.1951 1 4800
## sigma.alpha 3.1361 2.3278 0.169 1.380 2.5340 4.4117 8.9674 1 3700
## sigma1 0.6877 0.0311 0.630 0.666 0.6868 0.7079 0.7513 1 2200
## sigma2 0.5817 0.0656 0.457 0.537 0.5807 0.6248 0.7158 1 10000
traplot(out.BLR ,c("a0","a1","b0","b1","mu.alpha","sigma.alpha","beta","logp1","logp2"))
denplot(out.BLR ,c("a0","a1","b0","b1","mu.alpha","sigma.alpha","beta","p1","p2", "sigma1", "sigma2"))
dKS.logp1 <- bkde(out.BLR$BUGSoutput$sims.list$logp1)
(logp1.modeKS <- dKS.logp1$x[which.max(dKS.logp1$y)]) # lower threshold mode on log scale
## [1] 3.79
dKS.logp2 <- bkde(out.BLR$BUGSoutput$sims.list$logp2)
(logp2.modeKS <- dKS.logp2$x[which.max(dKS.logp2$y)]) # upper threshold mode on log scale
## [1] 6.08
ylim.logp <- max(c(max(dKS.logp1$y), max(dKS.logp2$y)))
exp(logp1.modeKS) # exponentiate to get p1 mode
## [1] 44.3
exp(logp2.modeKS) # exponentiate to get p2 mode
## [1] 437
# Save posterior medians for thresholds
logp1med <- out.BLR$BUGSoutput$median$logp1
logp2med <- out.BLR$BUGSoutput$median$logp2
p1med <- out.BLR$BUGSoutput$median$p1
p2med <- out.BLR$BUGSoutput$median$p2
par(mar= c(5, 5, 2, 2), cex.lab=1.8, cex.axis=1.6)
plot(dKS.logp1, xlab="Threshold", las=1, lwd=3, xlim=c(0,6.5), main="", col="#009a9a", ylim=c(0,ylim.logp+1), type="l", ylab="Density")
lines(dKS.logp2, lwd=3, col="#9a0000")
abline(h=1/6.5, lwd=2, col="black")
abline(v=c(logp1.modeKS,logp2.modeKS), col="darkorchid", lty=3, lwd=3)
abline(v=c(logp1med,logp2med), col="dodgerblue", lty=3, lwd=3)
legend("topleft",c("Lower Threshold Posterior","Upper Threshold Posterior","Prior","Mode","Median"),lwd=3, col=c("#009a9a","#9a0000","black","darkorchid","dodgerblue"), bty='n', cex=1.3, lty=c(1,1,1,2,3,3))
par(mar= c(5, 5, 2, 2), cex.lab=1.7, cex.axis=1.6)
plot(density(out.BLR$BUGSoutput$sims.list$beta),xlim=c(-40,0),ylim=c(0,.4),las=1,xlab=expression(beta),main="",lwd=3, col="firebrick3")
lines(density(-1*rlnorm(10000,meanlog=.5,sdlog=1)), col="dodgerblue3",lwd=3)
legend("topleft",c("Prior","Posterior"),cex=1.3,lty=1,lwd=3,col=c("dodgerblue3","firebrick3"))
# posterior means for regression parameters
a0m <- out.BLR$BUGSoutput$mean$a0
a1m <- out.BLR$BUGSoutput$mean$a1
a1adjm <- out.BLR$BUGSoutput$mean$a1adj
b0m <- out.BLR$BUGSoutput$mean$b0
b1m <- out.BLR$BUGSoutput$mean$b1
b1adjm <- out.BLR$BUGSoutput$mean$b1adj
alpham <- out.BLR$BUGSoutput$mean$mu.alpha # posterior mean estimate for mean parameter of random logistic intercept distribution (i.e., value of alpha for a typical lake with a random effect of zero)
betam <- out.BLR$BUGSoutput$mean$beta
# posterior medians for regression parameters
a0med <- out.BLR$BUGSoutput$median$a0
a1med <- out.BLR$BUGSoutput$median$a1
a1adjmed <- out.BLR$BUGSoutput$median$a1adj
b0med <- out.BLR$BUGSoutput$median$b0
b1med <- out.BLR$BUGSoutput$median$b1
b1adjmed <- out.BLR$BUGSoutput$median$b1adj
alphamed <- out.BLR$BUGSoutput$median$mu.alpha # posterior median estimate for mean parameter of random logistic intercept distribution (i.e., value of alpha for a typical lake with a random effect of zero)
betamed <- out.BLR$BUGSoutput$median$beta
# 95% credible interval bounds for TP thresholds on log scale (medians were previously saved)
logp1.row <- which(row.names(out.BLR$BUGSoutput$summary)=="logp1")
logp1.lowbound <- out.BLR$BUGSoutput$summary[logp1.row,3] # 2.5
logp1.upbound <- out.BLR$BUGSoutput$summary[logp1.row,7] # 97.5
logp2.row <- which(row.names(out.BLR$BUGSoutput$summary)=="logp2")
logp2.lowbound <- out.BLR$BUGSoutput$summary[logp2.row,3] # 2.5
logp2.upbound <- out.BLR$BUGSoutput$summary[logp2.row,7] # 97.5
# Put parameter estimates in a dataframe
coefs <- data.frame(Int.Mean=c(a0m,a1m),Int.Med=c(a0med,a1med),Slope.Mean=c(b0m,b1m), Slope.Med=c(b0med,b1med),State=c("Clear","Turbid"), Bifur.LogMed=c(logp1med,logp2med),Bifur.LogMode=c(logp1.modeKS,logp2.modeKS), Bifur.Log.LowBound=c(logp1.lowbound,logp2.lowbound), Bifur.Log.UpBound=c(logp1.upbound,logp2.upbound))
coefs
## Int.Mean Int.Med Slope.Mean Slope.Med State Bifur.LogMed Bifur.LogMode Bifur.Log.LowBound Bifur.Log.UpBound
## 1 -0.0749 -0.0725 0.458 0.458 Clear 3.81 3.79 3.74 3.96
## 2 -0.3036 -0.3683 0.869 0.879 Turbid 6.09 6.08 6.02 6.17
# Classify Discrete State: 0 if mean(State)<.5 ; 1 if mean(State) > .5
Latent.State <- as.factor(as.numeric(out.BLR$BUGSoutput$mean$S > .5))
mcmc.l<-out.BLR$BUGSoutput$sims.list
Ep.PE <- Ep.LCL <- Ep.UCL <- rep(NA,length(SAV))
for(i in 1:length(SAV)){
jags.Ep.temp<-plogis(mcmc.l$mu.alpha-SAV.mean*mcmc.l$beta + mcmc.l$beta*SAV[i])
Ep.PE[i]<-mean(jags.Ep.temp)
Ep.LCL[i]<-quantile(jags.Ep.temp, prob=0.025)
Ep.UCL[i]<-quantile(jags.Ep.temp, prob=0.975)
}
state.num <- as.numeric(out.BLR$BUGSoutput$mean$S > .5)
ggplot(NULL, aes(x=SAV, y=state.num) ) + geom_point(size=3)+ geom_line(aes(x=SAV, y=Ep.PE)) + geom_ribbon(aes(x=SAV,ymin=Ep.LCL, ymax=Ep.UCL),alpha=0.3) +theme(axis.text=element_text(colour="black", size=15),axis.title=element_text(colour="black", size=16),legend.text=element_text(colour="black", size=15),legend.title=element_text(colour="black", size=15),axis.title.y=element_text(vjust=1.5),axis.title.x=element_text(vjust=0))+ coord_cartesian(ylim = c(-.05,1.05)) + ylab("P(turbid)") + xlab("SAV (kg)")
Black solid (dashed) lines represent average (2.5th, 97.5th quantiles) estimated steady state relationships across all MCMC samples. Steady state lines end at the TP threshold point estimates (posterior modes), and gray bands represent 95% credible intervals for TP thresholds. Triangular points represent lakes classified as clear (>50% of MCMC sampled states were clear), and circular points represent lakes classified as turbid (>50% of MCMC sampled states were turbid). The average MCMC sampled state for each lake is shown on a blue to green color gradient (0=clear, 1=turbid). Point size is proportional to submerged aquatic vegetation (SAV, units: average kg/sample).
Discrete.State <- as.numeric(Latent.State)-1
Discrete.State[Discrete.State==0] <- "Clear" ; Discrete.State[Discrete.State==1] <- "Turbid"
Continuous.State <- out.BLR$BUGSoutput$mean$S
log.TP <- lnP
log.Chla <- lnC
log.SAV <- lnSAV
cbPalette <- c("royalblue4","seagreen")
a0.samps <- out.BLR$BUGSoutput$sims.list$a0
a1.samps <- out.BLR$BUGSoutput$sims.list$a1
b0.samps <- out.BLR$BUGSoutput$sims.list$b0
b1.samps <- out.BLR$BUGSoutput$sims.list$b1
num.sims <- length(a0.samps)
logp2.modeKS <- round(logp2.modeKS, digits=2)
xclear <- seq(1.4,logp2.modeKS, .01)
xclear.pred <- matrix(rep(NA, length(xclear)*num.sims), nrow=length(xclear))
for (i in 1:length(xclear)){
x <- xclear[i]
xclear.pred[i,] <- a0.samps + b0.samps*x
}
clear.means <- apply(xclear.pred, 1, mean)
clear.lower <- apply(xclear.pred, 1, quantile, probs = 0.025)
clear.upper <- apply(xclear.pred, 1, quantile, probs = 0.975)
logp1.modeKS <- round(logp1.modeKS, digits=2)
xturbid <- seq(logp1.modeKS,7, .01)
xturbid.pred <- matrix(rep(NA, length(xturbid)*num.sims), nrow=length(xturbid))
for (i in 1:length(xturbid)){
x <- xturbid[i]
xturbid.pred[i,] <- a1.samps + b1.samps*x
}
turbid.means <- apply(xturbid.pred, 1, mean)
turbid.lower <- apply(xturbid.pred, 1, quantile, probs = 0.025)
turbid.upper <- apply(xturbid.pred, 1, quantile, probs = 0.975)
ggplot(NULL, aes(x=log.TP, y=log.Chla , color=Continuous.State )) +
annotate("rect", xmin=coefs$Bifur.Log.LowBound[1], xmax=coefs$Bifur.Log.UpBound[1], ymin=-Inf, ymax=Inf, alpha=0.15, fill="gray20") + annotate("rect", xmin=coefs$Bifur.Log.LowBound[2], xmax=coefs$Bifur.Log.UpBound[2], ymin=-Inf, ymax=Inf, alpha=0.15, fill="gray20") +
geom_point( aes(shape=Discrete.State, size=SAV)) +
geom_line(aes(x=xturbid, y=turbid.means), size=1.1, colour="gray20") +
geom_line(aes(x=xclear, y=clear.means), size=1.1, colour="gray20") +
geom_line(aes(x=xturbid, y=turbid.lower), size=1.1, colour="gray20", linetype=2) +
geom_line(aes(x=xturbid, y=turbid.upper), size=1.1, colour="gray20", linetype=2) +
geom_line(aes(x=xclear, y=clear.lower), size=1.1, colour="gray20", linetype=2) +
geom_line(aes(x=xclear, y=clear.upper), size=1.1, colour="gray20", linetype=2)+
scale_size(range=c(2,6), limits=range(SAV)) +
theme(legend.position="right", panel.border=element_rect(fill=NA, colour = "black") , panel.background = element_rect(fill = "white"), axis.text=element_text(colour="black", size=15),axis.title=element_text(colour="black", size=16),legend.text=element_text(colour="black", size=13),legend.title=element_text(colour="black", size=14),axis.title.y=element_text(vjust=1.5),axis.title.x=element_text(vjust=0), plot.title = element_text(size = rel(1.5), hjust = 0.5), legend.key.height=unit(1.2, 'lines')) +coord_cartesian(ylim = c(-1,6))+
coord_cartesian(ylim = c(-1,6)) +
ylab("log(Chla)") + xlab("log(TP)") +
scale_colour_gradient2( mid="royalblue3",high="seagreen3", limits=c(0,1)) +
guides(colour = guide_colourbar(order = 1, label.vjust=1, title.vjust=.85, barwidth=unit(1, 'lines'), barheight=unit(7, 'lines')), shape = guide_legend(order = 2), size = guide_legend(order = 3) )
sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
##
## locale:
## [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] loo_1.0.0 jagstools_1.6.0.9000 devtools_1.12.0 gridExtra_2.2.1 KernSmooth_2.23-15 ggplot2_2.2.1
## [7] mcmcplots_0.4.2 R2WinBUGS_2.1-21 boot_1.3-18 R2jags_0.5-7 rjags_4-6 coda_0.19-1
##
## loaded via a namespace (and not attached):
## [1] denstrip_1.5.3 Rcpp_0.12.9 git2r_0.18.0 plyr_1.8.4 tools_3.3.2 digest_0.6.12 evaluate_0.10
## [8] memoise_1.0.0 tibble_1.3.3 gtable_0.2.0 lattice_0.20-34 rlang_0.1.1 curl_2.3 yaml_2.1.14
## [15] parallel_3.3.2 withr_1.0.2 stringr_1.2.0 httr_1.2.1 knitr_1.15.1 rprojroot_1.2 grid_3.3.2
## [22] R6_2.2.0 rmarkdown_1.3 magrittr_1.5 matrixStats_0.51.0 backports_1.0.5 scales_0.4.1 htmltools_0.3.5
## [29] sfsmisc_1.1-0 abind_1.4-5 colorspace_1.3-2 labeling_0.3 stringi_1.1.2 lazyeval_0.2.0 munsell_0.4.3