# Code to fit model of age-0 walleye length vs degree days as function of invasion status (spiny water flea and zebra mussels) in Minnesota's large lakes as described in Hansen et al. Biological Invasions. This file runs model and generates figures. Model can be called directly here or separately in WalleyeModel.R and read in RDS output 
#G Hansen
#november 22, 2019


require(mvtnorm)
require(data.table)
library(doBy)
library(ggplot2)
library(RColorBrewer)
library(dplyr)
library(reshape2)
library(gridExtra)
library(grid)
#for plotting
myPal=c("black", "#F8766D", "#00BFC4", "#C77CFF")
mylabels=c("Uninvaded",  expression(italic("Bythotrephes")),"Zebra mussels", "Both")


#If needed, run Bayesian model in Jags (73.29 mins to run on computer cluster)
#################################################
#run walleye model code directly 
source("WalleyeModel.R")
#######################################


#alternatively, read in MCMC samples of posterior distributions of parameter estimates here 
###################################################
# Read in MCMC output
mcmcOut <- readRDS('Bugsout_new_swf_zm_wae.rds')
#####################################################

# Read in fish data
datf <- fread('wae_length_data.csv')
head(datf)
# Number of lakes
n.lake <- datf[,length(unique(Lake))]

# Numeric lake ID
datf[,lakeID:=as.numeric(as.factor(Lake))]

# Housekeeping
datf[, Treatment:=as.factor(Treatment)]
datf[,ZM.status:=as.factor(ZM.status)]
datf[,SWF.status:=as.factor(SWF.status)]

# Dummy variables for invasion status (1 = invaded, 0 otherwise)
datf$ZM <- ifelse(datf$ZM.status=="Invaded",1,0)
datf$SWF <- ifelse(datf$SWF.status =="Invaded",1,0)

datf[,range(Year)]
length(min(datf$Year):max(datf$Year))

# Number of obs per year sorted by year
samp.sizes <- datf[,.N, by=list(Lake,Year)][order(Lake,-Year)]
# write.csv(samp.sizes, "samp.sizes.csv")

lake.year <- paste(datf$Lake, datf$Year, sep="")
length(unique(datf$lake.year))
head(datf)

# Standardize DD
DD_z <- as.numeric(scale(datf$DD))
n.lake.year <- length(unique(lake.year))
n.years <- length(unique(datf$Year))
lake <- datf$lakeID
year <- as.numeric(as.factor(datf$Year))

lake.year = as.numeric(as.factor(lake.year))
J<-length(unique(datf$lake.year))

# Invasive covariate dummy variables
swf <- summaryBy(SWF ~ lakeID*Year, data=datf, FUN=mean) 
zm <- summaryBy(ZM ~ lakeID*Year, data=datf, FUN=mean) 

z1 <- as.numeric(swf$SWF.mean)
z2 <- as.numeric(zm$ZM.mean)

n.lake <- length(unique(datf$Lake))
lake <- as.numeric(as.factor(datf$Lake))

################
# Begin predict code
# Number of desired MCMC samples used for prediction (use a subset of all samples)
nsim <- 10000
# Chain length from analysis
chainLength <- length(mcmcOut$mu.alpha)
# Select thinned steps in chain for posterior predictions to ensure we take values from length of posterior
ID = seq( 1 , chainLength , floor(chainLength/nsim) )
# length(ID)


########## PREDICT Length for SWF, ZM, Both, and noninvaded along a DD gradient
mu.alpha <- mcmcOut$mu.alpha[ID]
mu.beta <- mcmcOut$mu.beta[ID]
gamma.alpha1 <- mcmcOut$gamma.alpha[ID,1]
gamma.alpha2 <- mcmcOut$gamma.alpha[ID,2]
gamma.beta1 <- mcmcOut$gamma.beta[ID,1]
gamma.beta2 <- mcmcOut$gamma.beta[ID,2]
mu.sigma <- mcmcOut$Mu.Sigma[ID]
gamma.sigma1 <- mcmcOut$gamma.sigma[ID,1]
gamma.sigma2 <- mcmcOut$gamma.sigma[ID,2]

# Degree days
DD <- seq(min(scale(datf$DD)), max(scale(datf$DD)), length=60)
pred.lengths.status <- array(NA, c(length(ID), length(DD), 4))
dim(pred.lengths.status)

for(i in 1:length(ID)){ # number of iterations
  for(j in 1:length(DD)){
    # SWF predictions
      pred.lengths.status[i,j,1] <- rnorm(1, (mu.alpha[i] + gamma.alpha1[i]) + (mu.beta[i] + gamma.beta1) * DD[j], (mu.sigma[i] + gamma.sigma1[i]))
    # ZM predictions  
      pred.lengths.status[i,j,2] <- rnorm(1, (mu.alpha[i] + gamma.alpha2[i]) + (mu.beta[i] + gamma.beta2[i]) * DD[j], (mu.sigma[i] + gamma.sigma2[i]))
    # ZM & SWF predictions  
      pred.lengths.status[i,j,3] <- rnorm(1, (mu.alpha[i] + gamma.alpha1[i] + gamma.alpha2[i]) + (mu.beta[i] + gamma.beta1[i] + gamma.beta2[i]) * DD[j], (mu.sigma[i] + gamma.sigma1[i] + gamma.sigma2[i]))
    # Noninvaded
      pred.lengths.status[i,j,4] <- rnorm(1, mu.alpha[i] + mu.beta[i] * DD[j], mu.sigma[i])
  }
}

str(pred.lengths.status)

status.means <- apply(pred.lengths.status, c(2,3), mean)
status.CIs <- apply(pred.lengths.status, c(2,3), quantile, c(0.025, 0.975))

################# PLOT ############################
Ymin <- min(status.CIs)
Ymax <- max(status.CIs)

plot.dat <- data.frame(status.means,DD,status.CIs[1,,1],status.CIs[2,,1],status.CIs[1,,2],status.CIs[2,,2],status.CIs[1,,3],status.CIs[2,,3],status.CIs[1,,4],status.CIs[2,,4])
colnames(plot.dat) <- c("mean.swf", 'mean.zm', 'mean.both','mean.non',"DD","lower.swf",'upper.swf','lower.zm','upper.zm','lower.both','upper.both','lower.non','upper.non')


#better data structure for first set of plots
plot.dat2=data.frame(status.means)
colnames(plot.dat2)=c("SWF", "ZM", "Both", "None")
plot.dat2$DD=DD

plot.dat.long=melt(plot.dat2, id.vars="DD", variable.name = "status", value.name = "predicted.length")

lcis=data.frame(cbind(status.CIs[1,,1],status.CIs[1,,2],status.CIs[1,,3],status.CIs[1,,4]))
colnames(lcis)=c("SWF", "ZM", "Both", "None")
 lcis=melt(lcis, variable.name="status",value.name = "LCI")

 ucis=data.frame(cbind(status.CIs[2,,1],status.CIs[2,,2],status.CIs[2,,3],status.CIs[2,,4]))
 colnames(ucis)=c("SWF", "ZM", "Both", "None")
 ucis=melt(ucis, variable.name="status",value.name = "UCI")
 
plot.dat.long=data.frame(cbind(plot.dat.long, "LCI"=lcis[,2], "UCI"=ucis[,2]))
plot.dat.long$status=factor(plot.dat.long$status, levels=c("None", "SWF", "ZM", "Both")) 
 
 #put on regular DD scale
plot.dat.long$DD_z=plot.dat.long$DD
plot.dat.long$DD=(plot.dat.long$DD_z*sd(datf$DD))+mean(datf$DD)

windows()
panel1=ggplot(plot.dat.long, aes(x=DD, y=predicted.length, group=status)) + 
  #geom_ribbon( aes(ymax=UCI, ymin=LCI, fill=status),  alpha=.15) +
  geom_line(aes(colour=status, lty=status), lwd=1) + theme_bw()+ theme(panel.grid = element_blank(), axis.title.y = element_text(size=11), axis.title.x = element_blank(),axis.text = element_text(size=11))+  ylab('Predicted walleye length\n(mm)')+scale_colour_manual("", values=myPal,labels=mylabels)+scale_fill_manual("", values=myPal,labels=mylabels)+scale_linetype_manual("", values=c(1,2,3,1),labels=mylabels)+geom_text(x=330, y=145, label="A")+ theme(legend.text.align = 0,legend.background = element_blank(), legend.text=element_text(size=9), legend.key.size = unit(0.1, 'lines'),legend.position =c(.80,.35))
print(panel1)

#ggsave("WAE_predicted_length_by_status_DD_withCI.png", height=4, width=4, units="in")


 
 #################################################################
 #############################################################
 
######### Probability of length > noninvaded
str(pred.lengths.status)
probs.lt.noninvaded <- array(NA, c(length(DD), 3))
  for(j in 1:length(DD)){
    probs.lt.noninvaded[j,1] <- mean(pred.lengths.status[,j,1] < pred.lengths.status[,j,4]) # prob that swf invaded lake < noninvaded
    probs.lt.noninvaded[j,2] <- mean(pred.lengths.status[,j,2] < pred.lengths.status[,j,4]) # prob that zm invaded lake < noninvaded
    probs.lt.noninvaded[j,3] <- mean(pred.lengths.status[,j,3] < pred.lengths.status[,j,4]) # prob that both invaded lake < noninvaded
}

plot.probs <- data.frame(probs.lt.noninvaded,DD)
colnames(plot.probs) <- c("SWF","ZM","Both","DD_z")


# Covert to long format
plot.probs <- melt(plot.probs, id.vars=c("DD_z"))
colnames(plot.probs) <- c("DD_z","status","pred")
plot.probs$DD=(plot.probs$DD_z*sd(datf$DD))+mean(datf$DD)



panel2=ggplot(plot.probs, aes(x = DD, y = pred, color = factor(status, labels = c("SWF","ZM", "Both")), lty = factor(status, labels = c("SWF","ZM", "Both")))) + 
  geom_line(lwd=1) + labs(color = "") + xlab(expression(Degree~days~(degree~C%.%days))) + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=11), legend.position = "none")+
  ylab('Probability walleye is\nsmaller in invaded lake') + scale_colour_manual(values=myPal[-1],labels=mylabels[2:4])+scale_linetype_manual("", values=c(2,3,1),labels=mylabels[2:4])+geom_text(x=330, y=.98, label="C", colour="black")
print(panel2)
#ggsave("WAE_probability_smaller_than_uninvaded_DD.png", height=4, width=4, units="in")




#differences

str(pred.lengths.status)
diffs.lt.noninvaded <- array(NA, c(length(DD), 3))
for(j in 1:length(DD)){
  diffs.lt.noninvaded[j,1] <- mean(pred.lengths.status[,j,1] - pred.lengths.status[,j,4]) # prob that swf invaded lake < noninvaded
  diffs.lt.noninvaded[j,2] <- mean(pred.lengths.status[,j,2] - pred.lengths.status[,j,4]) # prob that zm invaded lake < noninvaded
  diffs.lt.noninvaded[j,3] <- mean(pred.lengths.status[,j,3] - pred.lengths.status[,j,4]) # prob that both invaded lake < noninvaded
}

plot.diffs <- data.frame(diffs.lt.noninvaded,DD)
colnames(plot.diffs) <- c("SWF","ZM","Both","DD")

# Covert to long format
plot.diffs <- melt(plot.diffs, id.vars=c("DD"))
colnames(plot.diffs) <- c("DD","status","pred")

#upper CI

diffs.lt.noninvaded.ci <- array(NA, c(length(DD), 3))
for(j in 1:length(DD)){
  diffs.lt.noninvaded.ci[j,1] <- quantile(pred.lengths.status[,j,1] - pred.lengths.status[,j,4], c(0.95)) # prob that swf invaded lake < noninvaded
  diffs.lt.noninvaded.ci[j,2] <- quantile(pred.lengths.status[,j,2] - pred.lengths.status[,j,4], c(0.95)) # prob that zm invaded lake < noninvaded
  diffs.lt.noninvaded.ci[j,3] <- quantile(pred.lengths.status[,j,3] - pred.lengths.status[,j,4], c(0.95) )# prob that both invaded lake < noninvaded
}

plot.diffs.ci <- data.frame(diffs.lt.noninvaded.ci,DD)
colnames(plot.diffs.ci) <- c("SWF","ZM","Both","DD")

# Covert to long format
plot.diffs.ci <- melt(plot.diffs.ci, id.vars=c("DD"))
colnames(plot.diffs.ci) <- c("DD","status","UCI")

#lower CI
diffs.lt.noninvaded.ci <- array(NA, c(length(DD), 3))
for(j in 1:length(DD)){
  diffs.lt.noninvaded.ci[j,1] <- quantile(pred.lengths.status[,j,1] - pred.lengths.status[,j,4], c(0.025)) # prob that swf invaded lake < noninvaded
  diffs.lt.noninvaded.ci[j,2] <- quantile(pred.lengths.status[,j,2] - pred.lengths.status[,j,4], c(0.025)) # prob that zm invaded lake < noninvaded
  diffs.lt.noninvaded.ci[j,3] <- quantile(pred.lengths.status[,j,3] - pred.lengths.status[,j,4], c(0.025) )# prob that both invaded lake < noninvaded
}

plot.diffs2 <- data.frame(diffs.lt.noninvaded.ci,DD)
colnames(plot.diffs2) <- c("SWF","ZM","Both","DD")

# Covert to long format
plot.diffs2 <- melt(plot.diffs2, id.vars=c("DD"))
colnames(plot.diffs2) <- c("DD","status","LCI")

plot.diffs.ci=merge(plot.diffs.ci, plot.diffs2, by=c("DD", "status"))

#add real DD
plot.diffs.ci$DD_z=plot.diffs.ci$DD
plot.diffs.ci$DD=(plot.diffs.ci$DD_z*sd(datf$DD))+mean(datf$DD)


plot.diffs$DD_z=plot.diffs$DD
plot.diffs$DD=(plot.diffs$DD_z*sd(datf$DD))+mean(datf$DD)


plot.diffs=merge(plot.diffs, plot.diffs.ci)
##############



#original scale DD
windows()
panel3=ggplot(plot.diffs, aes(x = DD, y = pred, color = factor(status, labels = mylabels[2:4]), lty = factor(status, labels = mylabels[2:4]))) + 
 # geom_ribbon( aes(ymax=UCI, ymin=LCI, fill = factor(status, labels = mylabels[2:4])),  alpha=.15) +
  geom_line(lwd=1) + labs(color = "") +  theme_bw()+ theme(panel.grid = element_blank(), axis.title.y = element_text(size=11), axis.title.x = element_blank(),axis.text = element_text(size=11), legend.position = "none")+  ylab('Length difference from\nuninvaded lake (mm)') + scale_colour_manual(values=myPal[-1])+geom_hline(yintercept=0, colour="grey20", lty=2)+scale_linetype_manual("", values=c(2,3,1))+geom_text(x=330, y=2.5, label="B", colour="black")

print(panel3)
#ggsave("WAE_length_difference_from_uninvaded_DD_withCI.png", height=4, width=4, units="in")

########################################################
#multipanel plot
MatrixPlot<-arrangeGrob(panel1,panel3,panel2, ncol=1)
grid.arrange(MatrixPlot)
ggsave("Fig3_WAE_predicted_lengths_DD_multipanel.png", MatrixPlot,height=6, width=4, units="in")
##########################################################

#median DD value across all lake years is 1400
#standardize
end.dd=round((1400-mean(datf$DD))/sd(datf$DD),1)


end.lengths=subset(plot.dat, round(DD,1)==end.dd)
end.lengths.plot=data.frame("mean.length"=melt(end.lengths[1,c(1:4)]), status=c("SWF", "ZM", "Both", "None"), "UCI"=melt(end.lengths[1,c(7,9,11,13)]), "LCI"=melt(end.lengths[1,c(6,8,10,12)]))
end.lengths.plot$status=factor(end.lengths.plot$status, levels=c("None", "SWF", "ZM", "Both"))



grob1 <- grobTree(textGrob("A", x=0.02,  y=0.95, hjust=0, gp=gpar(col="black", fontsize=10)))

panel4=ggplot(end.lengths.plot)+geom_pointrange(aes(color = status, x=status, y=mean.length.value, ymax=UCI.value, ymin=LCI.value))+   xlab('Status') + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=10), legend.position = c(.15, .18))+ylab('End of season length (mm)') + scale_colour_manual(values=myPal)+guides(colour=FALSE)+ annotation_custom(grob1)+scale_x_discrete(labels=c("Uninvaded",  expression(italic("Bytho.")),"Zebra mussels", "Both"))
#ggsave("WAE_length__end_of_season.png", height=4, width=4, units="in")



#end of season length differences
end.length=subset(plot.diffs, round(DD_z,1)==end.dd)


grob <- grobTree(textGrob("B", x=0.02,  y=0.95, hjust=0, gp=gpar(col="black", fontsize=10)))
# Plot
panel5=ggplot(end.length)+geom_pointrange(aes(color = status, x=status, y=pred, ymax=UCI, ymin=LCI))+   xlab('Status') + theme_bw()+ theme(panel.grid = element_blank(), axis.title = element_text(size=11), axis.text = element_text(size=10), legend.position = c(.15, .18))+ylab('Difference from uninvaded\nend of season length (mm)') + scale_colour_manual(values=myPal[-1])+geom_hline(yintercept=0, colour="grey20", lty=2)+guides(colour=FALSE)+ annotation_custom(grob)+scale_x_discrete(labels=c(  expression(italic("Bytho.")),"Zebra mussels", "Both"))+scale_y_continuous(lim=c(-53,3))
#ggsave("WAE_length_difference_from_uninvaded_end_of_season.png", height=4, width=4, units="in")


########################################################
#multipanel plot
MatrixPlot<-arrangeGrob(panel4,panel5, ncol=2)
grid.arrange(MatrixPlot)
ggsave("Fig4_WAE_predicted_end_of_season_multipanel.png", MatrixPlot,height=3, width=7, units="in")
##########################################################