# Code to fit model of age-0 yellow perch length vs degree days as function of invasion status (spiny water flea and zebra mussels) in Minnesota's large lakes as described in Hansen et al. Biological Invasions. This file runs model and generates figures. Model can be called directly here or separately in PerchModel.R and read in RDS output #G Hansen #november 22, 2019 require(mvtnorm) require(data.table) library(doBy) library(ggplot2) library(RColorBrewer) library(dplyr) library(reshape2) library(gridExtra) library(grid) #for plotting myPal=c("black", "#F8766D", "#00BFC4", "#C77CFF") mylabels=c("Uninvaded", expression(italic("Bythotrephes")),"Zebra mussels", "Both") #If needed, run Bayesian model in Jags (287.78 mins to run on computer cluster) ################################################# #run yellow perch model code directly source("PerchModel.R") ####################################### #alternatively, read in MCMC samples of posterior distributions of parameter estimates here ################################################### # Read in MCMC output mcmcOut <- readRDS('Bugsout_new_swf_zm_yep.rds') ############################################## # Read in fish data datf <- fread('yep_length_data.csv') head(datf) # Number of lakes n.lake <- datf[,length(unique(Lake))] # Numeric lake ID datf[,lakeID:=as.numeric(as.factor(Lake))] # Housekeeping datf[, Treatment:=as.factor(Treatment)] datf[,ZM.status:=as.factor(ZM.status)] datf[,SWF.status:=as.factor(SWF.status)] # Dummy variables for invasion status (1 = invaded, 0 otherwise) datf$ZM <- ifelse(datf$ZM.status=="Invaded",1,0) datf$SWF <- ifelse(datf$SWF.status =="Invaded",1,0) datf[,range(Year)] length(min(datf$Year):max(datf$Year)) # Number of obs per year sorted by year samp.sizes <- datf[,.N, by=list(Lake,Year)][order(Lake,-Year)] # write.csv(samp.sizes, "samp.sizes.csv") lake.year <- paste(datf$Lake, datf$Year, sep="") length(unique(datf$lake.year)) head(datf) # Standardize DD DD_z <- as.numeric(scale(datf$DD)) n.lake.year <- length(unique(lake.year)) n.years <- length(unique(datf$Year)) lake <- datf$lakeID year <- as.numeric(as.factor(datf$Year)) lake.year = as.numeric(as.factor(lake.year)) J<-length(unique(datf$lake.year)) # Invasive covariate dummy variables swf <- summaryBy(SWF ~ lakeID*Year, data=datf, FUN=mean) zm <- summaryBy(ZM ~ lakeID*Year, data=datf, FUN=mean) z1 <- as.numeric(swf$SWF.mean) z2 <- as.numeric(zm$ZM.mean) n.lake <- length(unique(datf$Lake)) lake <- as.numeric(as.factor(datf$Lake)) ################ # Begin predict code # Number of desired MCMC samples used for prediction (use a subset of all samples) nsim <- 10000 # Chain length from analysis chainLength <- length(mcmcOut$mu.alpha) # Select thinned steps in chain for posterior predictions to ensure we take values from length of posterior ID = seq( 1 , chainLength , floor(chainLength/nsim) ) # length(ID) ########## PREDICT Length for SWF, ZM, Both, and noninvaded along a DD gradient mu.alpha <- mcmcOut$mu.alpha[ID] mu.beta <- mcmcOut$mu.beta[ID] gamma.alpha1 <- mcmcOut$gamma.alpha[ID,1] gamma.alpha2 <- mcmcOut$gamma.alpha[ID,2] gamma.beta1 <- mcmcOut$gamma.beta[ID,1] gamma.beta2 <- mcmcOut$gamma.beta[ID,2] mu.sigma <- mcmcOut$Mu.Sigma[ID] gamma.sigma1 <- mcmcOut$gamma.sigma[ID,1] gamma.sigma2 <- mcmcOut$gamma.sigma[ID,2] # Degree days DD <- seq(min(scale(datf$DD)), max(scale(datf$DD)), length=60) pred.lengths.status <- array(NA, c(length(ID), length(DD), 4)) dim(pred.lengths.status) for(i in 1:length(ID)){ # number of iterations for(j in 1:length(DD)){ # SWF predictions pred.lengths.status[i,j,1] <- rnorm(1, (mu.alpha[i] + gamma.alpha1[i]) + (mu.beta[i] + gamma.beta1) * DD[j], (mu.sigma[i] + gamma.sigma1[i])) # ZM predictions pred.lengths.status[i,j,2] <- rnorm(1, (mu.alpha[i] + gamma.alpha2[i]) + (mu.beta[i] + gamma.beta2[i]) * DD[j], (mu.sigma[i] + gamma.sigma2[i])) # ZM & SWF predictions pred.lengths.status[i,j,3] <- rnorm(1, (mu.alpha[i] + gamma.alpha1[i] + gamma.alpha2[i]) + (mu.beta[i] + gamma.beta1[i] + gamma.beta2[i]) * DD[j], (mu.sigma[i] + gamma.sigma1[i] + gamma.sigma2[i])) # Noninvaded pred.lengths.status[i,j,4] <- rnorm(1, mu.alpha[i] + mu.beta[i] * DD[j], mu.sigma[i]) } } str(pred.lengths.status) status.means <- apply(pred.lengths.status, c(2,3), mean) status.CIs <- apply(pred.lengths.status, c(2,3), quantile, c(0.025, 0.975)) ################# PLOT ############################ Ymin <- min(status.CIs) Ymax <- max(status.CIs) plot.dat <- data.frame(status.means,DD,status.CIs[1,,1],status.CIs[2,,1],status.CIs[1,,2],status.CIs[2,,2],status.CIs[1,,3],status.CIs[2,,3],status.CIs[1,,4],status.CIs[2,,4]) colnames(plot.dat) <- c("mean.swf", 'mean.zm', 'mean.both','mean.non',"DD","lower.swf",'upper.swf','lower.zm','upper.zm','lower.both','upper.both','lower.non','upper.non') #better data structure for plotting predicted lengths plot.dat2=data.frame(status.means) colnames(plot.dat2)=c("SWF", "ZM", "Both", "None") plot.dat2$DD=DD plot.dat.long=melt(plot.dat2, id.vars="DD", variable.name = "status", value.name = "predicted.length") lcis=data.frame(cbind(status.CIs[1,,1],status.CIs[1,,2],status.CIs[1,,3],status.CIs[1,,4])) colnames(lcis)=c("SWF", "ZM", "Both", "None") lcis=melt(lcis, variable.name="status",value.name = "LCI") ucis=data.frame(cbind(status.CIs[2,,1],status.CIs[2,,2],status.CIs[2,,3],status.CIs[2,,4])) colnames(ucis)=c("SWF", "ZM", "Both", "None") ucis=melt(ucis, variable.name="status",value.name = "UCI") plot.dat.long=data.frame(cbind(plot.dat.long, "LCI"=lcis[,2], "UCI"=ucis[,2])) plot.dat.long$status=factor(plot.dat.long$status, levels=c("None", "SWF", "ZM", "Both")) #put on regular DD scale plot.dat.long$DD_z=plot.dat.long$DD plot.dat.long$DD=(plot.dat.long$DD_z*sd(datf$DD))+mean(datf$DD) panel1=ggplot(plot.dat.long, aes(x=DD, y=predicted.length, group=status)) + #geom_ribbon( aes(ymax=UCI, ymin=LCI, fill=status), alpha=.15) + geom_line(aes(colour=status, lty=status), lwd=1) + theme_bw()+ theme(panel.grid = element_blank(), axis.title.y = element_text(size=11), axis.title.x = element_blank(),axis.text = element_text(size=11))+ ylab('Predicted yellow perch\nlength (mm)')+scale_colour_manual("", values=myPal,labels=mylabels)+scale_fill_manual("", values=myPal,labels=mylabels)+scale_linetype_manual("", values=c(1,2,3,1),labels=mylabels)+geom_text(x=380, y=71, label="A")+ theme(legend.text.align = 0,legend.background = element_blank(), legend.text=element_text(size=9), legend.key.size = unit(0.1, 'lines'),legend.position =c(.80,.35)) print(panel1) #ggsave("YEP_predicted_length_by_status_DD.png", height=4, width=4, units="in") ################################################################# ############################################################# ######### Probability of length > noninvaded str(pred.lengths.status) probs.lt.noninvaded <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ probs.lt.noninvaded[j,1] <- mean(pred.lengths.status[,j,1] < pred.lengths.status[,j,4]) # prob that swf invaded lake < noninvaded probs.lt.noninvaded[j,2] <- mean(pred.lengths.status[,j,2] < pred.lengths.status[,j,4]) # prob that zm invaded lake < noninvaded probs.lt.noninvaded[j,3] <- mean(pred.lengths.status[,j,3] < pred.lengths.status[,j,4]) # prob that both invaded lake < noninvaded } plot.probs <- data.frame(probs.lt.noninvaded,DD) colnames(plot.probs) <- c("SWF","ZM","Both","DD_z") # Covert to long format plot.probs <- melt(plot.probs, id.vars=c("DD_z")) colnames(plot.probs) <- c("DD_z","status","pred") plot.probs$DD=(plot.probs$DD_z*sd(datf$DD))+mean(datf$DD) panel2=ggplot(plot.probs, aes(x = DD, y = pred, color = factor(status, labels = c("SWF","ZM", "Both")), lty = factor(status, labels = c("SWF","ZM", "Both")))) + geom_line(lwd=1) + labs(color = "") + xlab(expression(Degree~days~(degree~C%.%days))) + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=11), legend.position = "none")+ ylab('Probability yellow perch are\nsmaller in invaded lake') + scale_colour_manual(values=myPal[-1],labels=mylabels[2:4])+scale_linetype_manual("", values=c(2,3,1),labels=mylabels[2:4])+geom_text(x=380, y=.8, label="C", colour="black") print(panel2) #ggsave("YEP_probability_smaller_than_uninvaded_DD.png", height=4, width=4, units="in") #differences str(pred.lengths.status) diffs.lt.noninvaded <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ diffs.lt.noninvaded[j,1] <- mean(pred.lengths.status[,j,1] - pred.lengths.status[,j,4]) # prob that swf invaded lake < noninvaded diffs.lt.noninvaded[j,2] <- mean(pred.lengths.status[,j,2] - pred.lengths.status[,j,4]) # prob that zm invaded lake < noninvaded diffs.lt.noninvaded[j,3] <- mean(pred.lengths.status[,j,3] - pred.lengths.status[,j,4]) # prob that both invaded lake < noninvaded } plot.diffs <- data.frame(diffs.lt.noninvaded,DD) colnames(plot.diffs) <- c("SWF","ZM","Both","DD") # Covert to long format plot.diffs <- melt(plot.diffs, id.vars=c("DD")) colnames(plot.diffs) <- c("DD","status","pred") #upper CI diffs.lt.noninvaded.ci <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ diffs.lt.noninvaded.ci[j,1] <- quantile(pred.lengths.status[,j,1] - pred.lengths.status[,j,4], c(0.95)) # prob that swf invaded lake < noninvaded diffs.lt.noninvaded.ci[j,2] <- quantile(pred.lengths.status[,j,2] - pred.lengths.status[,j,4], c(0.95)) # prob that zm invaded lake < noninvaded diffs.lt.noninvaded.ci[j,3] <- quantile(pred.lengths.status[,j,3] - pred.lengths.status[,j,4], c(0.95) )# prob that both invaded lake < noninvaded } plot.diffs.ci <- data.frame(diffs.lt.noninvaded.ci,DD) colnames(plot.diffs.ci) <- c("SWF","ZM","Both","DD") # Covert to long format plot.diffs.ci <- melt(plot.diffs.ci, id.vars=c("DD")) colnames(plot.diffs.ci) <- c("DD","status","UCI") #lower CI diffs.lt.noninvaded.ci <- array(NA, c(length(DD), 3)) for(j in 1:length(DD)){ diffs.lt.noninvaded.ci[j,1] <- quantile(pred.lengths.status[,j,1] - pred.lengths.status[,j,4], c(0.025)) # prob that swf invaded lake < noninvaded diffs.lt.noninvaded.ci[j,2] <- quantile(pred.lengths.status[,j,2] - pred.lengths.status[,j,4], c(0.025)) # prob that zm invaded lake < noninvaded diffs.lt.noninvaded.ci[j,3] <- quantile(pred.lengths.status[,j,3] - pred.lengths.status[,j,4], c(0.025) )# prob that both invaded lake < noninvaded } plot.diffs2 <- data.frame(diffs.lt.noninvaded.ci,DD) colnames(plot.diffs2) <- c("SWF","ZM","Both","DD") # Covert to long format plot.diffs2 <- melt(plot.diffs2, id.vars=c("DD")) colnames(plot.diffs2) <- c("DD","status","LCI") plot.diffs.ci=merge(plot.diffs.ci, plot.diffs2, by=c("DD", "status")) #add real DD plot.diffs.ci$DD_z=plot.diffs.ci$DD plot.diffs.ci$DD=(plot.diffs.ci$DD_z*sd(datf$DD))+mean(datf$DD) plot.diffs$DD_z=plot.diffs$DD plot.diffs$DD=(plot.diffs$DD_z*sd(datf$DD))+mean(datf$DD) ############## #original scale DD panel3=ggplot(plot.diffs, aes(x = DD, y = pred, color = factor(status, labels = mylabels[2:4]), lty = factor(status, labels = mylabels[2:4]))) + geom_line(lwd=1) + labs(color = "") + theme_bw()+ theme(panel.grid = element_blank(), axis.title.y = element_text(size=11), axis.title.x = element_blank(),axis.text = element_text(size=11), legend.position = "none")+ ylab('Length difference from\nuninvaded lake (mm)') + scale_colour_manual(values=myPal[-1])+geom_hline(yintercept=0, colour="grey20", lty=2)+scale_linetype_manual("", values=c(2,3,1))+geom_text(x=350, y=2.7, label="B", colour="black") print(panel3) #ggsave("YEP_length_difference_from_uninvaded_DD.png", height=4, width=4, units="in") ######################################################## #multipanel plot MatrixPlot<-arrangeGrob(panel1,panel3,panel2, ncol=1) grid.arrange(MatrixPlot) ggsave("Fig5_YEP_predicted_lengths_DD_multipanel.png", MatrixPlot,height=6, width=4, units="in") ########################################################## #median DD value across all lake years is 1400 #standardize end.dd=round((1400-mean(datf$DD))/sd(datf$DD),1) end.lengths=subset(plot.dat, round(DD,1)==end.dd) end.lengths.plot=data.frame("mean.length"=melt(end.lengths[1,c(1:4)]), status=c("SWF", "ZM", "Both", "None"), "UCI"=melt(end.lengths[1,c(7,9,11,13)]), "LCI"=melt(end.lengths[1,c(6,8,10,12)])) end.lengths.plot$status=factor(end.lengths.plot$status, levels=c("None", "SWF", "ZM", "Both")) grob1 <- grobTree(textGrob("A", x=0.02, y=0.95, hjust=0, gp=gpar(col="black", fontsize=10))) panel4=ggplot(end.lengths.plot)+geom_pointrange(aes(color = status, x=status, y=mean.length.value, ymax=UCI.value, ymin=LCI.value))+ xlab('Status') + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=10), legend.position = c(.15, .18))+ylab('End of season length (mm)') + scale_colour_manual(values=myPal)+guides(colour=FALSE)+ annotation_custom(grob1)+scale_x_discrete(labels=c("Uninvaded", expression(italic("Bytho.")),"Zebra mussels", "Both")) #ggsave("YEP_length__end_of_season.png", height=4, width=4, units="in") #end of season length differences end.length=subset(plot.diffs, round(DD_z,1)==end.dd) end.length.ci=subset(plot.diffs.ci, round(DD_z,1)==end.dd) end.length=merge(end.length, end.length.ci, by=c("DD", "status")) grob <- grobTree(textGrob("B", x=0.02, y=0.95, hjust=0, gp=gpar(col="black", fontsize=10))) # Plot panel5=ggplot(end.length)+geom_pointrange(aes(color = status, x=status, y=pred, ymax=UCI, ymin=LCI))+ xlab('Status') + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=10), legend.position = c(.15, .18))+ylab('Difference from uninvaded\nend of season length (mm)') + scale_colour_manual(values=myPal[-1])+geom_hline(yintercept=0, colour="grey20", lty=2)+guides(colour=FALSE)+ annotation_custom(grob)+scale_x_discrete(labels=c( expression(italic("Bytho.")),"Zebra mussels", "Both")) ggsave("YEP_length_difference_from_uninvaded_end_of_season.png", height=4, width=4, units="in") ######################################################## #multipanel plot MatrixPlot<-arrangeGrob(panel4,panel5, ncol=2) grid.arrange(MatrixPlot) ggsave("Fig6_YEP_predicted_end_of_season_multipanel.png", MatrixPlot,height=3, width=7, units="in") ##########################################################