# Code to fit model of age-0 walleye length vs degree days as function of invasion status (spiny water flea and zebra mussels) in Minnesota's large lakes as described in Hansen et al. Biological Invasions. Can be run within figure code or separately here. #G Hansen #1/22/20 library(data.table) library(lubridate) library(jagsUI) library(MCMCpack) library(arm) library(dplyr) library(doBy) library(monomvn) dat <- fread('wae_length_data.csv') dat[, .N] head(dat) summary(dat) dat$Date=as.Date(dat$Date, format="%m/%d/%Y") # Number of lakes n.lake <- dat[,length(unique(Lake))] # Numeric lake ID dat[,lakeID:=as.numeric(as.factor(Lake))] # Housekeeping dat[, Treatment:=as.factor(Treatment)] dat[,ZM.status:=as.factor(ZM.status)] dat[,SWF.status:=as.factor(SWF.status)] # Dummy variables for invasion status (1 = invaded, 0 otherwise) dat$ZM <- ifelse(dat$ZM.status=="Invaded",1,0) dat$SWF <- ifelse(dat$SWF.status =="Invaded",1,0) # Date dat[, sampledate:= Date] dat[, day:= lubridate::day(sampledate)] dat[, month:= lubridate::month(sampledate)] head(dat) summary(dat) dat[,range(Year)] length(min(dat$Year):max(dat$Year)) # Number of obs per year sorted by year samp.sizes <- dat[,.N, by=list(Lake,Year)][order(Lake,-Year)] # write.csv(samp.sizes, "samp.sizes.csv") dat$lake.year <- paste(dat$Lake, dat$Year, sep="") length(unique(dat$lake.year)) head(dat) # For plotting (1 = ZM, 2 = SWF, 3 = both, 0 otherwise) dat$ZM_SWF<- ifelse(dat$ZM==1 & dat$SWF==1,3,ifelse(dat$ZM==1 & dat$SWF==0,1,ifelse(dat$ZM==0 & dat$SWF==0,0,ifelse(dat$SWF==1 & dat$ZM==0,2,0)))) names <- unique(dat$Lake) #If needed, run Bayesian model in Jags (73.29 mins to run on computer cluster) #alternatively can read in MCMC samples of posterior distributions below from .rds file ################################################################# ########## BUGS CODE ############################################ ################################################################# sink("model_2.txt") cat(" model { # Likelihood: # Level-1 of the model for (i in 1:n){ # number of observations y[i] ~ dnorm(mu[i], tau.y[lake.year[i]]) mu[i] <- alpha[lake.year[i]] + beta[lake.year[i]] * x[i] + u1[lake[i]] } for(i in 1:n.lake){ # number of lakes u1[i] ~ dnorm(0, tau.lake) } sigma.lake ~ dunif(0, 2) tau.lake <- pow(sigma.lake,-2) # hierarchical variance model for(j in 1:n.lake.year) { # number of lake/years log.sigma[j] ~ dnorm(mu.sigma[j], inv.omega.sigma.squared) log(sigma[j]) <- log.sigma[j] tau.y[j] <- 1/pow(sigma[j], 2) mu.sigma[j] <- Mu.Sigma + gamma.sigma[1] * z1[j] + gamma.sigma[2] * z2[j] # med.sigma[j] <- exp(mu.sigma[j]) } # Priors for variances Mu.Sigma ~ dnorm(0, 0.001) Med.Sigma <- exp(Mu.Sigma) omega.sigma ~ dunif(0, 50) inv.omega.sigma.squared <- 1/pow(omega.sigma, 2) for(i in 1:2){ gamma.sigma[i] ~ dnorm(0, 0.0001) } # Level-2 of the model for(j in 1:n.lake.year){ alpha[j] <- BB[j,1] beta[j] <- BB[j,2] BB[j,1:K] ~ dmnorm(BB.hat[j,], Tau.B[,]) BB.hat[j,1] <- mu.alpha + gamma.alpha[1] * z1[j] + gamma.alpha[2] * z2[j] # z_x = 1, if invaded BB.hat[j,2] <- mu.beta + gamma.beta[1] * z1[j] + gamma.beta[2] * z2[j] } # Priors on slopes and intercepts mu.alpha ~ dnorm(0, 0.0001) mu.beta ~ dnorm(0, 0.0001) for(i in 1:2){ gamma.alpha[i] ~ dnorm(0, 0.0001) gamma.beta[i] ~ dnorm(0, 0.0001) } ### Model variance-covariance Tau.B[1:K,1:K] ~ dwish(W[,], df) df <- K+1 Sigma.B[1:K,1:K] <- inverse(Tau.B[,]) for (k in 1:K){ for (k.prime in 1:K){ rho.B[k,k.prime] <- Sigma.B[k,k.prime]/ sqrt(Sigma.B[k,k]*Sigma.B[k.prime,k.prime]) } sigma.B[k] <- sqrt(Sigma.B[k,k]) } } # end model ",fill = TRUE) sink() head(dat) ############################### # Standardize DD DD_z <- as.numeric(scale(dat$DD)) n.lake.year <- length(unique(dat$lake.year)) n.years <- length(unique(dat$Year)) lake <- dat$lakeID year <- as.numeric(as.factor(dat$Year)) dat$lake.year = as.numeric(as.factor(dat$lake.year)) J<-length(unique(dat$lake.year)) # Invasive covariate dummy variables swf <- summaryBy(SWF ~ lakeID*Year, data=dat, FUN=mean) zm <- summaryBy(ZM ~ lakeID*Year, data=dat, FUN=mean) z1 <- as.numeric(swf$SWF.mean) z2 <- as.numeric(zm$ZM.mean) n.lake <- length(unique(dat$Lake)) lake <- as.numeric(as.factor(dat$Lake)) # Number of varying parameters K <- 2 # Create identity matrix for Wishart dist'n #!!!!!!!Number of parameters to estimate (K) W <- diag(K) # Load data data <- list(y = dat$Length, lake.year = dat$lake.year, n = dim(dat)[1], n.lake = n.lake, x=DD_z,n.year=n.years,K=K,W=W,year=year, z1=z1, z2=z2, n.lake.year=n.lake.year, lake=lake) # Initial values inits <- function (){ list (mu.alpha = rnorm(1), mu.beta=rnorm(1), BB=matrix(rnorm(J*K),nrow=J,ncol=K),Tau.B=rwish(K+1,diag(K)), Mu.Sigma = rnorm(1), omega.sigma = runif(1,0,20) ) } # Parameters monitored parameters <- c("mu.alpha","mu.beta","BB", "sigma", "Mu.Sigma", "omega.sigma","gamma.alpha", "gamma.beta","gamma.sigma","u1","sigma.lake") # MCMC settings ni <- 60000 nt <- 3 nb <- 30000 nc <- 3 start.time = Sys.time() # Set timer (4 mins) out <- jags(data = data, inits = inits, parameters.to.save = parameters, model.file = "model_2.txt", n.chains = nc, n.thin = nt, n.iter = ni, n.burnin = nb,DIC=FALSE, parallel = T, n.adapt = nb) end.time = Sys.time() elapsed.time = round(difftime(end.time, start.time, units='mins'), dig = 2) cat('Posterior computed in ', elapsed.time, ' minutes\n\n', sep='') # Calculate computation time print(elapsed.time) # str(out) # Summarize posteriors # print(out, dig = 3) # arm::traceplot(out) # jagsUI::traceplot(out, parameters=c("gamma.beta")) # jagsUI::traceplot(out, parameters=c("gamma.alpha")) # jagsUI::traceplot(out, parameters=c("mu.alpha")) # jagsUI::traceplot(out, parameters=c("sigma.lake.alpha")) # jagsUI::traceplot(out, parameters=c("sigma.year")) # Export model fit summary for each model fit BugsOut <- out$summary #write.csv(BugsOut, "Bugsout_new_swf_zm_wae.csv") # Export MCMC samples for each model fit mcmcOut <- out$sims.list saveRDS(mcmcOut, "Bugsout_new_swf_zm_wae.rds") ######################################################################### ########################################################################## #End Model fit ################################################## #read in model results file if needed #mcmcOut <- readRDS("Bugsout_new_swf_zm_wae.rds") str(mcmcOut) # Intercept with no SWF and no ZM mean(mcmcOut$mu.alpha) # Intercept with SWF mean(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1]) # Intercept with ZM mean(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]) # Difference in intercepts with and withmcmcOut SWF quantile(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1]), c(0.05,0.95)) # Difference in intercepts with and withmcmcOut ZM quantile(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]), c(0.05,0.95)) # Difference in intercepts with BOTH SWF and ZM quantile((mcmcOut$mu.alpha ) - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]+ mcmcOut$gamma.alpha[,1]), c(0.05,0.95)) # Difference in intercepts with SWF and ZM quantile((mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1]) - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]), c(0.05,0.95)) # Slope with no SWF mean(mcmcOut$mu.beta) # Slope with SWF mean(mcmcOut$mu.beta + mcmcOut$gamma.beta[,1]) # Slope with ZM mean(mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]) # Difference in slopes with and withmcmcOut SWF quantile(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,1]), c(0.05,0.95)) # Difference in slopes with and withmcmcOut ZM quantile(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]), c(0.05,0.95)) # Difference in slopes with BOTH SWF and ZM quantile((mcmcOut$mu.beta ) - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]+ mcmcOut$gamma.beta[,1]), c(0.05,0.95)) # Difference in slopes with SWF and ZM quantile((mcmcOut$mu.beta + mcmcOut$gamma.beta[,1]) - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]), c(0.05,0.95)) # SD with no SWF mean(mcmcOut$Mu.Sigma) # SD with SWF mean(mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,1]) # SD with ZM mean(mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]) # Difference in SD with and withmcmcOut SWF quantile(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,1]), c(0.05,0.95)) # Difference in SD with and withmcmcOut ZM quantile(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]), c(0.05,0.95)) quantile(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]), c(0.05,0.95)) # 90% CI # Difference in sd with BOTH SWF and ZM quantile((mcmcOut$Mu.Sigma ) - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]+ mcmcOut$gamma.sigma[,1]), c(0.05,0.95)) # Difference in sd with SWF and ZM quantile((mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,1]) - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]), c(0.05,0.95)) #save differences in parameters between invaded categories and uninvaded differences=data.frame("Invasion.category"=rep(c("SWF", "ZM", "Both"),3), "variable"=c(rep("intercept",3), rep("slope",3), rep("sd",3)), "difference"=c(mean(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1])), mean(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2])), mean((mcmcOut$mu.alpha ) - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]+ mcmcOut$gamma.alpha[,1])), mean(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,1])),mean(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2])), mean((mcmcOut$mu.beta ) - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]+ mcmcOut$gamma.beta[,1])), mean(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,1])),mean(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2])), mean((mcmcOut$Mu.Sigma ) - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]+ mcmcOut$gamma.sigma[,1]))), "difference.LCI"=c(quantile(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1]), 0.05),quantile(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]), 0.05), quantile((mcmcOut$mu.alpha ) - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]+ mcmcOut$gamma.alpha[,1]), 0.05), quantile(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,1]), 0.05),quantile(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]), 0.05), quantile((mcmcOut$mu.beta ) - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]+ mcmcOut$gamma.beta[,1]), 0.05), quantile(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,1]), 0.05),quantile(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]), 0.05), quantile((mcmcOut$Mu.Sigma ) - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]+ mcmcOut$gamma.sigma[,1]), 0.05)), "difference.UCI"= c( quantile(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1]), 0.95),quantile(mcmcOut$mu.alpha - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]), 0.95), quantile((mcmcOut$mu.alpha ) - (mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]+ mcmcOut$gamma.alpha[,1]), 0.95), quantile(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,1]), 0.95),quantile(mcmcOut$mu.beta - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]), 0.95), quantile((mcmcOut$mu.beta ) - (mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]+ mcmcOut$gamma.beta[,1]), 0.95), quantile(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,1]), 0.95),quantile(mcmcOut$Mu.Sigma - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]), 0.95), quantile((mcmcOut$Mu.Sigma ) - (mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]+ mcmcOut$gamma.sigma[,1]), 0.95)) ) #more intuitive if differences are invaded-uninvaded (this is how we present figures) differences=differences%>%mutate_if( is.numeric, ~ . * -1) write.csv(differences, "differences_CI_parameters_wae_newdata.csv", row.names=F) ######################################################################### ##################################################################### #save coefficients mean.parameters=data.frame("Invasion.category"=rep(c("Uninvaded", "SWF", "ZM", "Both"),3), "variable"=c(rep("intercept",4), rep("slope",4), rep("sd",4)), "mean"=c(mean(mcmcOut$mu.alpha), mean(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1]),mean(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2] ), mean(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2]+ mcmcOut$gamma.alpha[,1]), mean(mcmcOut$mu.beta), mean(mcmcOut$mu.beta + mcmcOut$gamma.beta[,1]), mean(mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]), mean(mcmcOut$mu.beta + mcmcOut$gamma.beta[,2]+mcmcOut$gamma.beta[,1]), mean(mcmcOut$Mu.Sigma), mean(mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,1]), mean(mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2]), mean(mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2] +mcmcOut$gamma.sigma[,1])), "UCI"=c(quantile(mcmcOut$mu.alpha, 0.95),quantile(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,1], 0.95),quantile(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2], 0.95),quantile(mcmcOut$mu.alpha+mcmcOut$gamma.alpha[,2]+ mcmcOut$gamma.alpha[,1], 0.95), quantile(mcmcOut$mu.beta, 0.95), quantile(mcmcOut$mu.beta+mcmcOut$gamma.beta[,1], 0.95), quantile(mcmcOut$mu.beta + mcmcOut$gamma.beta[,2], 0.95), quantile(mcmcOut$mu.beta+mcmcOut$gamma.beta[,2]+mcmcOut$gamma.beta[,1], 0.95), quantile(mcmcOut$Mu.Sigma, 0.95), quantile(mcmcOut$Mu.Sigma+mcmcOut$gamma.sigma[,1], 0.95), quantile(mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2], 0.95),quantile(mcmcOut$Mu.Sigma+mcmcOut$gamma.sigma[,2]+mcmcOut$gamma.sigma[,1], 0.95)), "LCI"=c(quantile(mcmcOut$mu.alpha, 0.05),quantile(mcmcOut$mu.alpha+mcmcOut$gamma.alpha[,1], 0.05),quantile(mcmcOut$mu.alpha + mcmcOut$gamma.alpha[,2], 0.05), quantile(mcmcOut$mu.alpha+mcmcOut$gamma.alpha[,2]+mcmcOut$gamma.alpha[,1], 0.05), quantile(mcmcOut$mu.beta, 0.05), quantile(mcmcOut$mu.beta+mcmcOut$gamma.beta[,1], 0.05), quantile(mcmcOut$mu.beta+ mcmcOut$gamma.beta[,2], 0.05), quantile(mcmcOut$mu.beta+mcmcOut$gamma.beta[,2]+mcmcOut$gamma.beta[,1], 0.05), quantile(mcmcOut$Mu.Sigma, 0.05), quantile(mcmcOut$Mu.Sigma+mcmcOut$gamma.sigma[,1], 0.05), quantile(mcmcOut$Mu.Sigma + mcmcOut$gamma.sigma[,2], 0.05),quantile(mcmcOut$Mu.Sigma+mcmcOut$gamma.sigma[,2]+mcmcOut$gamma.sigma[,1], 0.05)) ) write.csv(mean.parameters, "mean_CI_parameters_wae_newdata.csv", row.names=F) #merge into one table parameter.table=merge(mean.parameters, differences, all.x=T) write.csv(parameter.table, "mean_CI_parameters_and_differences_wae_newdata.csv", row.names=F) ################################################ ### explort varying intercepts and slopes, and SDs ################################################ head(dat) # Get unique years for each lake #### Take within-year mean by lake and year lake.samp <- dat[, .(Length=mean(Length, na.rm=T)), by=.(Lake, Year)] dim(lake.samp) lake.samp=lake.samp[order(lake.samp$Lake, lake.samp$Year)] # Grab parameter of interest - #intercept params <- mcmcOut$B[,,1] paramMeans <- apply(params, 2,mean) length(paramMeans) paramLCI <- apply(params, 2,quantile, 0.05) paramUCI <- apply(params,2,quantile, 0.95) # Merge lake.sampl info with estimates paramsMerged <- data.frame(lake.samp,paramMeans,paramLCI,paramUCI) head(paramsMerged) dim(paramsMerged) # Expand sites to include all years for each lake datExp <- expand.grid(Lake = unique(lake.samp$Lake), Year=min(lake.samp$Year):max(lake.samp$Year) ) dim(datExp) datExp <- data.table(datExp) # Sort by lake and year datExp <- datExp[order(Lake,Year)] # Merge in data from lake.samp - by lake and year datFull <- plyr::join(datExp, paramsMerged, by=c("Lake","Year"), type='left', match='all') dim(datFull) head(datFull) ### use this to export parameters and make plots in ggplot write.csv(datFull, "wae_intercepts_new_model.csv", row.names=F) ##### SLOPES # Grab parameter of interest params <- mcmcOut$B[,,2] paramMeans <- apply(params, 2,mean) length(paramMeans) paramLCI <- apply(params, 2,quantile, 0.05) paramUCI <- apply(params,2,quantile, 0.95) # Merge lake.sampl info with estimates paramsMerged <- data.frame(lake.samp,paramMeans,paramLCI,paramUCI) head(paramsMerged) dim(paramsMerged) # Expand sites to include all years for each lake datExp <- expand.grid(Lake = unique(lake.samp$Lake), Year=min(lake.samp$Year):max(lake.samp$Year) ) dim(datExp) datExp <- data.table(datExp) # Sort by lake and year datExp <- datExp[order(Lake,Year)] # Merge in data from lake.samp - by lake and year datFull <- plyr::join(datExp, paramsMerged, by=c("Lake","Year"), type='left', match='all') dim(datFull) head(datFull) ###I use this to export parameters and make plots in ggplot write.csv(datFull, "wae_slopes_new_model.csv", row.names=F) ##### SDs # Grab parameter of interest params <- mcmcOut$sigma paramMeans <- apply(params, 2,mean) length(paramMeans) paramLCI <- apply(params, 2,quantile, 0.05) paramUCI <- apply(params,2,quantile, 0.95) # Merge lake.sampl info with estimates paramsMerged <- data.frame(lake.samp,paramMeans,paramLCI,paramUCI) head(paramsMerged) dim(paramsMerged) # Expand sites to include all years for each lake datExp <- expand.grid(Lake = unique(lake.samp$Lake), Year=min(lake.samp$Year):max(lake.samp$Year) ) dim(datExp) datExp <- data.table(datExp) # Sort by lake and year datExp <- datExp[order(Lake,Year)] # Merge in data from lake.samp - by lake and year datFull <- plyr::join(datExp, paramsMerged, by=c("Lake","Year"), type='left', match='all') dim(datFull) head(datFull) ### use this to export parameters and make plots in ggplot write.csv(datFull, "wae_sds_new_model.csv", row.names=F) #################################### # Year and lake-specific model predictions #################################### x.pred <- seq(min(DD_z),max(DD_z),length=50) dat$DD_z <- scale(dat$DD) # Grab number of years with observations for each lake t1 <- as.matrix(table(dat$Lake,dat$Year)) t1 <- ifelse(t1==0,NA,t1) fun1 <- function(x){sum(!is.na(x))} lakeCount <- apply(t1,1,fun1) lakeCount <- as.numeric(lakeCount) sum(lakeCount) # Put each groups MCMC draws for all 3 parameters in its own list group.params <- list() for(m in 1:J){ group.params[[m]] <- mcmcOut$BB[,m,1:2] } # Container for predicted values linPredGroup <- array(NA, c(dim(mcmcOut$BB)[1],length(x.pred),J)) dim(linPredGroup) for(p in 1:J){ # loop over groups (J) for(i in 1:dim(mcmcOut$BB)[1]){ for(t in 1:length(x.pred)){ linPredGroup[i,t,p] <- group.params[[p]][i,1] + group.params[[p]][i,2]*x.pred[t] } } } # Means meanProbGroup <- apply(linPredGroup, c(2,3), mean ) # CIs for fitted values upperCI.Group <- apply(linPredGroup, c(2,3), quantile, probs=c(0.975) ) lowerCI.Group <- apply(linPredGroup, c(2,3), quantile, probs=c(0.025) ) dim(meanProbGroup) #create data frame to export lake.year.list=unique(dplyr::select(dat, Lake, Year, lake.year)) lake.year.list=lake.year.list[order(lake.year),] mean.probs=data.frame(meanProbGroup) colnames(mean.probs)=lake.year.list$lake.year mean.probs$DD=x.pred mean.probs=melt(mean.probs, id.vars="DD", variable.name="lake.year", value.name="length") upperCI.length=data.frame(upperCI.Group) colnames(upperCI.length)=lake.year.list$lake.year upperCI.length=melt(upperCI.length, variable.name="lake.year", value.name="UCI") lowerCI.length=data.frame(lowerCI.Group) colnames(lowerCI.length)=lake.year.list$lake.year lowerCI.length=melt(lowerCI.length, variable.name="lake.year", value.name="LCI") predicted.values=cbind(mean.probs, dplyr::select(upperCI.length, UCI),dplyr::select(lowerCI.length, LCI) ) predicted.values$DD.z=predicted.values$DD predicted.values$DD=(predicted.values$DD.z*sd(dat$DD))+mean(dat$DD) predicted.values=merge(predicted.values, lake.year.list, by="lake.year") write.csv(predicted.values, "all_lake_years_predicted_values_wae.csv", row.names=F)