# Code to fit model of age-0 walleye length vs degree days as function of invasion status (spiny water flea and zebra mussels) in Minnesota's large lakes as described in Hansen et al. Biological Invasions. This file runs model and generates figures. Model can be called directly here or separately in WalleyeModel.R and read in RDS output #G Hansen #november 22, 2019 require(mvtnorm) require(data.table) library(doBy) library(ggplot2) library(RColorBrewer) library(dplyr) library(reshape2) library(gridExtra) library(grid) #for plotting myPal=c("black", "#F8766D", "#00BFC4", "#C77CFF") mylabels=c("Uninvaded", expression(italic("Bythotrephes")),"Zebra mussels", "Both") #If needed, run Bayesian model in Jags (73.29 mins to run on computer cluster) ################################################# #run walleye model code directly source("WalleyeModel.R") ####################################### #alternatively, read in MCMC samples of posterior distributions of parameter estimates here ################################################### # Read in MCMC output mcmcOut <- readRDS('Bugsout_new_swf_zm_wae.rds') ##################################################### # Read in fish data datf <- fread('wae_length_data.csv') head(datf) # Number of lakes n.lake <- datf[,length(unique(Lake))] # Numeric lake ID datf[,lakeID:=as.numeric(as.factor(Lake))] # Housekeeping datf[, Treatment:=as.factor(Treatment)] datf[,ZM.status:=as.factor(ZM.status)] datf[,SWF.status:=as.factor(SWF.status)] # Dummy variables for invasion status (1 = invaded, 0 otherwise) datf$ZM <- ifelse(datf$ZM.status=="Invaded",1,0) datf$SWF <- ifelse(datf$SWF.status =="Invaded",1,0) datf[,range(Year)] length(min(datf$Year):max(datf$Year)) # Number of obs per year sorted by year samp.sizes <- datf[,.N, by=list(Lake,Year)][order(Lake,-Year)] # write.csv(samp.sizes, "samp.sizes.csv") lake.year <- paste(datf$Lake, datf$Year, sep="") length(unique(datf$lake.year)) head(datf) # Standardize DD DD_z <- as.numeric(scale(datf$DD)) n.lake.year <- length(unique(lake.year)) n.years <- length(unique(datf$Year)) lake <- datf$lakeID year <- as.numeric(as.factor(datf$Year)) lake.year = as.numeric(as.factor(lake.year)) J<-length(unique(datf$lake.year)) # Invasive covariate dummy variables swf <- summaryBy(SWF ~ lakeID*Year, data=datf, FUN=mean) zm <- summaryBy(ZM ~ lakeID*Year, data=datf, FUN=mean) z1 <- as.numeric(swf$SWF.mean) z2 <- as.numeric(zm$ZM.mean) n.lake <- length(unique(datf$Lake)) lake <- as.numeric(as.factor(datf$Lake)) ################ # Begin predict code # Number of desired MCMC samples used for prediction (use a subset of all samples) nsim <- 10000 # Chain length from analysis chainLength <- length(mcmcOut$mu.alpha) # Select thinned steps in chain for posterior predictions to ensure we take values from length of posterior ID = seq( 1 , chainLength , floor(chainLength/nsim) ) # length(ID) ########## PREDICT Length for SWF, ZM, Both, and noninvaded along a DD gradient mu.alpha <- mcmcOut$mu.alpha[ID] mu.beta <- mcmcOut$mu.beta[ID] gamma.alpha1 <- mcmcOut$gamma.alpha[ID,1] gamma.alpha2 <- mcmcOut$gamma.alpha[ID,2] gamma.beta1 <- mcmcOut$gamma.beta[ID,1] gamma.beta2 <- mcmcOut$gamma.beta[ID,2] mu.sigma <- mcmcOut$Mu.Sigma[ID] gamma.sigma1 <- mcmcOut$gamma.sigma[ID,1] gamma.sigma2 <- mcmcOut$gamma.sigma[ID,2] # Degree days DD <- seq(min(scale(datf$DD)), max(scale(datf$DD)), length=60) pred.lengths.status <- array(NA, c(length(ID), length(DD), 4)) dim(pred.lengths.status) for(i in 1:length(ID)){ # number of iterations for(j in 1:length(DD)){ # SWF predictions pred.lengths.status[i,j,1] <- rnorm(1, (mu.alpha[i] + gamma.alpha1[i]) + (mu.beta[i] + gamma.beta1) * DD[j], (mu.sigma[i] + gamma.sigma1[i])) # ZM predictions pred.lengths.status[i,j,2] <- rnorm(1, (mu.alpha[i] + gamma.alpha2[i]) + (mu.beta[i] + gamma.beta2[i]) * DD[j], (mu.sigma[i] + gamma.sigma2[i])) # ZM & SWF predictions pred.lengths.status[i,j,3] <- rnorm(1, (mu.alpha[i] + gamma.alpha1[i] + gamma.alpha2[i]) + (mu.beta[i] + gamma.beta1[i] + gamma.beta2[i]) * DD[j], (mu.sigma[i] + gamma.sigma1[i] + gamma.sigma2[i])) # Noninvaded pred.lengths.status[i,j,4] <- rnorm(1, mu.alpha[i] + mu.beta[i] * DD[j], mu.sigma[i]) } } str(pred.lengths.status) status.means <- apply(pred.lengths.status, c(2,3), mean) status.CIs <- apply(pred.lengths.status, c(2,3), quantile, c(0.025, 0.975)) ################# PLOT ############################ Ymin <- min(status.CIs) Ymax <- max(status.CIs) plot.dat <- data.frame(status.means,DD,status.CIs[1,,1],status.CIs[2,,1],status.CIs[1,,2],status.CIs[2,,2],status.CIs[1,,3],status.CIs[2,,3],status.CIs[1,,4],status.CIs[2,,4]) colnames(plot.dat) <- c("mean.swf", 'mean.zm', 'mean.both','mean.non',"DD","lower.swf",'upper.swf','lower.zm','upper.zm','lower.both','upper.both','lower.non','upper.non') #better data structure for first set of plots plot.dat2=data.frame(status.means) colnames(plot.dat2)=c("SWF", "ZM", "Both", "None") plot.dat2$DD=DD plot.dat.long=melt(plot.dat2, id.vars="DD", variable.name = "status", value.name = "predicted.length") lcis=data.frame(cbind(status.CIs[1,,1],status.CIs[1,,2],status.CIs[1,,3],status.CIs[1,,4])) colnames(lcis)=c("SWF", "ZM", "Both", "None") lcis=melt(lcis, variable.name="status",value.name = "LCI") ucis=data.frame(cbind(status.CIs[2,,1],status.CIs[2,,2],status.CIs[2,,3],status.CIs[2,,4])) colnames(ucis)=c("SWF", "ZM", "Both", "None") ucis=melt(ucis, variable.name="status",value.name = "UCI") plot.dat.long=data.frame(cbind(plot.dat.long, "LCI"=lcis[,2], "UCI"=ucis[,2])) plot.dat.long$status=factor(plot.dat.long$status, levels=c("None", "SWF", "ZM", "Both")) #put on regular DD scale plot.dat.long$DD_z=plot.dat.long$DD plot.dat.long$DD=(plot.dat.long$DD_z*sd(datf$DD))+mean(datf$DD) windows() panel1=ggplot(plot.dat.long, aes(x=DD, y=predicted.length, group=status)) + #geom_ribbon( aes(ymax=UCI, ymin=LCI, fill=status), alpha=.15) + geom_line(aes(colour=status, lty=status), lwd=1) + theme_bw()+ theme(panel.grid = element_blank(), axis.title.y = element_text(size=11), axis.title.x = element_blank(),axis.text = element_text(size=11))+ ylab('Predicted walleye length\n(mm)')+scale_colour_manual("", values=myPal,labels=mylabels)+scale_fill_manual("", values=myPal,labels=mylabels)+scale_linetype_manual("", values=c(1,2,3,1),labels=mylabels)+geom_text(x=330, y=145, label="A")+ theme(legend.text.align = 0,legend.background = element_blank(), legend.text=element_text(size=9), legend.key.size = unit(0.1, 'lines'),legend.position =c(.80,.35)) print(panel1) #ggsave("WAE_predicted_length_by_status_DD_withCI.png", height=4, width=4, units="in") ################################################################# ############################################################# ######### Probability of length > noninvaded str(pred.lengths.status) probs.lt.noninvaded <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ probs.lt.noninvaded[j,1] <- mean(pred.lengths.status[,j,1] < pred.lengths.status[,j,4]) # prob that swf invaded lake < noninvaded probs.lt.noninvaded[j,2] <- mean(pred.lengths.status[,j,2] < pred.lengths.status[,j,4]) # prob that zm invaded lake < noninvaded probs.lt.noninvaded[j,3] <- mean(pred.lengths.status[,j,3] < pred.lengths.status[,j,4]) # prob that both invaded lake < noninvaded } plot.probs <- data.frame(probs.lt.noninvaded,DD) colnames(plot.probs) <- c("SWF","ZM","Both","DD_z") # Covert to long format plot.probs <- melt(plot.probs, id.vars=c("DD_z")) colnames(plot.probs) <- c("DD_z","status","pred") plot.probs$DD=(plot.probs$DD_z*sd(datf$DD))+mean(datf$DD) panel2=ggplot(plot.probs, aes(x = DD, y = pred, color = factor(status, labels = c("SWF","ZM", "Both")), lty = factor(status, labels = c("SWF","ZM", "Both")))) + geom_line(lwd=1) + labs(color = "") + xlab(expression(Degree~days~(degree~C%.%days))) + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=11), legend.position = "none")+ ylab('Probability walleye is\nsmaller in invaded lake') + scale_colour_manual(values=myPal[-1],labels=mylabels[2:4])+scale_linetype_manual("", values=c(2,3,1),labels=mylabels[2:4])+geom_text(x=330, y=.98, label="C", colour="black") print(panel2) #ggsave("WAE_probability_smaller_than_uninvaded_DD.png", height=4, width=4, units="in") #differences str(pred.lengths.status) diffs.lt.noninvaded <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ diffs.lt.noninvaded[j,1] <- mean(pred.lengths.status[,j,1] - pred.lengths.status[,j,4]) # prob that swf invaded lake < noninvaded diffs.lt.noninvaded[j,2] <- mean(pred.lengths.status[,j,2] - pred.lengths.status[,j,4]) # prob that zm invaded lake < noninvaded diffs.lt.noninvaded[j,3] <- mean(pred.lengths.status[,j,3] - pred.lengths.status[,j,4]) # prob that both invaded lake < noninvaded } plot.diffs <- data.frame(diffs.lt.noninvaded,DD) colnames(plot.diffs) <- c("SWF","ZM","Both","DD") # Covert to long format plot.diffs <- melt(plot.diffs, id.vars=c("DD")) colnames(plot.diffs) <- c("DD","status","pred") #upper CI diffs.lt.noninvaded.ci <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ diffs.lt.noninvaded.ci[j,1] <- quantile(pred.lengths.status[,j,1] - pred.lengths.status[,j,4], c(0.95)) # prob that swf invaded lake < noninvaded diffs.lt.noninvaded.ci[j,2] <- quantile(pred.lengths.status[,j,2] - pred.lengths.status[,j,4], c(0.95)) # prob that zm invaded lake < noninvaded diffs.lt.noninvaded.ci[j,3] <- quantile(pred.lengths.status[,j,3] - pred.lengths.status[,j,4], c(0.95) )# prob that both invaded lake < noninvaded } plot.diffs.ci <- data.frame(diffs.lt.noninvaded.ci,DD) colnames(plot.diffs.ci) <- c("SWF","ZM","Both","DD") # Covert to long format plot.diffs.ci <- melt(plot.diffs.ci, id.vars=c("DD")) colnames(plot.diffs.ci) <- c("DD","status","UCI") #lower CI diffs.lt.noninvaded.ci <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ diffs.lt.noninvaded.ci[j,1] <- quantile(pred.lengths.status[,j,1] - pred.lengths.status[,j,4], c(0.025)) # prob that swf invaded lake < noninvaded diffs.lt.noninvaded.ci[j,2] <- quantile(pred.lengths.status[,j,2] - pred.lengths.status[,j,4], c(0.025)) # prob that zm invaded lake < noninvaded diffs.lt.noninvaded.ci[j,3] <- quantile(pred.lengths.status[,j,3] - pred.lengths.status[,j,4], c(0.025) )# prob that both invaded lake < noninvaded } plot.diffs2 <- data.frame(diffs.lt.noninvaded.ci,DD) colnames(plot.diffs2) <- c("SWF","ZM","Both","DD") # Covert to long format plot.diffs2 <- melt(plot.diffs2, id.vars=c("DD")) colnames(plot.diffs2) <- c("DD","status","LCI") plot.diffs.ci=merge(plot.diffs.ci, plot.diffs2, by=c("DD", "status")) #add real DD plot.diffs.ci$DD_z=plot.diffs.ci$DD plot.diffs.ci$DD=(plot.diffs.ci$DD_z*sd(datf$DD))+mean(datf$DD) plot.diffs$DD_z=plot.diffs$DD plot.diffs$DD=(plot.diffs$DD_z*sd(datf$DD))+mean(datf$DD) plot.diffs=merge(plot.diffs, plot.diffs.ci) ############## #original scale DD windows() panel3=ggplot(plot.diffs, aes(x = DD, y = pred, color = factor(status, labels = mylabels[2:4]), lty = factor(status, labels = mylabels[2:4]))) + # geom_ribbon( aes(ymax=UCI, ymin=LCI, fill = factor(status, labels = mylabels[2:4])), alpha=.15) + geom_line(lwd=1) + labs(color = "") + theme_bw()+ theme(panel.grid = element_blank(), axis.title.y = element_text(size=11), axis.title.x = element_blank(),axis.text = element_text(size=11), legend.position = "none")+ ylab('Length difference from\nuninvaded lake (mm)') + scale_colour_manual(values=myPal[-1])+geom_hline(yintercept=0, colour="grey20", lty=2)+scale_linetype_manual("", values=c(2,3,1))+geom_text(x=330, y=2.5, label="B", colour="black") print(panel3) #ggsave("WAE_length_difference_from_uninvaded_DD_withCI.png", height=4, width=4, units="in") ######################################################## #multipanel plot MatrixPlot<-arrangeGrob(panel1,panel3,panel2, ncol=1) grid.arrange(MatrixPlot) ggsave("Fig3_WAE_predicted_lengths_DD_multipanel.png", MatrixPlot,height=6, width=4, units="in") ########################################################## #median DD value across all lake years is 1400 #standardize end.dd=round((1400-mean(datf$DD))/sd(datf$DD),1) end.lengths=subset(plot.dat, round(DD,1)==end.dd) end.lengths.plot=data.frame("mean.length"=melt(end.lengths[1,c(1:4)]), status=c("SWF", "ZM", "Both", "None"), "UCI"=melt(end.lengths[1,c(7,9,11,13)]), "LCI"=melt(end.lengths[1,c(6,8,10,12)])) end.lengths.plot$status=factor(end.lengths.plot$status, levels=c("None", "SWF", "ZM", "Both")) grob1 <- grobTree(textGrob("A", x=0.02, y=0.95, hjust=0, gp=gpar(col="black", fontsize=10))) panel4=ggplot(end.lengths.plot)+geom_pointrange(aes(color = status, x=status, y=mean.length.value, ymax=UCI.value, ymin=LCI.value))+ xlab('Status') + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=10), legend.position = c(.15, .18))+ylab('End of season length (mm)') + scale_colour_manual(values=myPal)+guides(colour=FALSE)+ annotation_custom(grob1)+scale_x_discrete(labels=c("Uninvaded", expression(italic("Bytho.")),"Zebra mussels", "Both")) #ggsave("WAE_length__end_of_season.png", height=4, width=4, units="in") #end of season length differences end.length=subset(plot.diffs, round(DD_z,1)==end.dd) grob <- grobTree(textGrob("B", x=0.02, y=0.95, hjust=0, gp=gpar(col="black", fontsize=10))) # Plot panel5=ggplot(end.length)+geom_pointrange(aes(color = status, x=status, y=pred, ymax=UCI, ymin=LCI))+ xlab('Status') + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=10), legend.position = c(.15, .18))+ylab('Difference from uninvaded\nend of season length (mm)') + scale_colour_manual(values=myPal[-1])+geom_hline(yintercept=0, colour="grey20", lty=2)+guides(colour=FALSE)+ annotation_custom(grob)+scale_x_discrete(labels=c( expression(italic("Bytho.")),"Zebra mussels", "Both"))+scale_y_continuous(lim=c(-53,3)) #ggsave("WAE_length_difference_from_uninvaded_end_of_season.png", height=4, width=4, units="in") ######################################################## #multipanel plot MatrixPlot<-arrangeGrob(panel4,panel5, ncol=2) grid.arrange(MatrixPlot) ggsave("Fig4_WAE_predicted_end_of_season_multipanel.png", MatrixPlot,height=3, width=7, units="in") ##########################################################