# R code associated with Waananen et al. "The fitness effects of outcrossing # distance depend on parental flowering phenology in fragmented populations of a # tallgrass prairie forb" submitted to the New Phytologist. # This code is private-for-peer-review. # Load Libraries and Functions -------------------------------------------- library(aster) library(dplyr) library(tidyr) library(ggplot2) # library(plotly) # library(metR) library(patchwork) # Read in handy functions ------------------------------------------------- ### Custom ggplot theme theme_custom <- function(){ theme_bw() + theme( axis.text = element_text(size = 10, color = "black"), axis.title = element_text(size = 12, color = "black"), axis.line.x = element_line(color="black"), axis.line.y = element_line(color="black"), panel.border = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank(), plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), units = "cm"), plot.title = element_text(size = 18, vjust = 1, hjust = 0), legend.text = element_text(size = 10), # legend.title = element_blank(), # legend.position = c(0.95, 0.15), legend.key = element_blank(), legend.background = element_rect(color = "black", fill = "transparent", linewidth = 2, linetype = "blank")) } ### Aster AIC function ## read in function to compute the AIC value of an aster model AIC.aster <- function(m){ dev <- m$deviance k <- length(m$coef) AIC <- dev + 2*k return(AIC) } ### AIC Table # want to take a list of model objects of variable length # and assign them in order to variables named m1-n AIC.table <- function(ll){ for(i in 1:length(ll)){ nam <- paste("m",i,sep = "") assign(nam, ll[[i]]) } ncomp <- length(ll) # print(ncomp) foo <- unlist(lapply(1:ncomp, FUN = function(j){ AIC.aster(eval(parse(text = paste("m", j , sep = "")))) })) # print(foo) noPar <-unlist(lapply(1:ncomp, FUN = function(j){ length(eval(parse(text = paste("m", j ,sep = "")))$coef)})) AIC <- foo ## AIC values compared to the selected model deltaAIC <- as.data.frame(round(foo - min(foo),4)) ## The term exp((AICmin - AICi)/2) is interpreted as ## being proportional to the probability that the ith ## model minimizes the (estimated) information loss. prAIC <- as.data.frame(round(exp((min(foo) - foo)/2), 4)) wAIC <- round(as.data.frame(prAIC/sum(prAIC)), digits = 3) ## put model names in a vector # models <- as.data.frame(paste("m", 1:ncomp, sep = "")) models <- sapply(ll, function(x) as.character(x$formula)[3]) models <- sapply(models, function(x) gsub(".*fit\\:\\((.+)\\).*", "\\1", x), USE.NAMES = FALSE) ## make a table that displays the AIC for each model tableAICs <- cbind(models, noPar, AIC, deltaAIC, prAIC, wAIC) # names(tableAICs) <- c('model','noPar', 'AIC', 'delta', 'prAIC', 'wAIC') tableAICs } # AIC.table(list(m1, m2)) # Read in Data ------------------------------------------------------------ # Fitness Predictors # predictors <- read.csv('fitness_predictors.csv') predictors <- read.csv('Data and Code/fitness_predictors.csv') # Fitness Data # data <- read.csv('offspring_fitness_data.csv') data <- read.csv('Data and Code/offspring_fitness_data.csv') # Data checks ------------------------------------------------------------- # Checks for perfect collinearity between data at each node # Making sure there is variation for each transition ld in one year -> ld in next year # table(data[data$ld2006 %in% 1, 'ld2007']) # table(data[data$ld2007 %in% 1, 'ld2008']) # table(data[data$ld2008 %in% 1, 'ld2009']) # table(data[data$ld2009 %in% 1, 'ld2010']) # table(data[data$ld2010 %in% 1, 'ld2011']) # table(data[data$ld2011 %in% 1, 'ld2012']) # table(data[data$ld2012 %in% 1, 'ld2013']) # table(data[data$ld2013 %in% 1, 'ld2014']) # table(data[data$ld2014 %in% 1, 'ld2015']) # table(data[data$ld2015 %in% 1, 'ld2016']) # table(data[data$ld2016 %in% 1, 'ld2017']) # # Making sure there's variation in flowering (i.e., not all individuals that were alive flowered/didn't flower) # table(data[data$ld2009 %in% 1, 'fl2009']) # only one individual flowered; remove from analysis? # table(data[data$ld2010 %in% 1, 'fl2010']) # 8 individuals flowered - did any have more than one head? # table(data[data$ld2011 %in% 1, 'fl2011']) # table(data[data$ld2012 %in% 1, 'fl2012']) # table(data[data$ld2013 %in% 1, 'fl2013']) # table(data[data$ld2014 %in% 1, 'fl2014']) # table(data[data$ld2015 %in% 1, 'fl2015']) # table(data[data$ld2016 %in% 1, 'fl2016']) # table(data[data$ld2017 %in% 1, 'fl2017']) # # # Of those that produced flowering heads, variation in head count? # table(data[data$fl2010 %in% 1, 'hdCt2010']) # all plants that flowered in 2010 had one head # table(data[data$fl2011 %in% 1, 'hdCt2011']) # table(data[data$fl2012 %in% 1, 'hdCt2012']) # table(data[data$fl2013 %in% 1, 'hdCt2013']) # table(data[data$fl2014 %in% 1, 'hdCt2014']) # table(data[data$fl2015 %in% 1, 'hdCt2015']) # table(data[data$fl2016 %in% 1, 'hdCt2016']) # table(data[data$fl2017 %in% 1, 'hdCt2017']) # # # check for resurrections (ld = 0 -> ld = 1 forbidden) data %>% filter( (ld2006 == 0 & ld2007 == 1) | (ld2007 == 0 & ld2008 == 1) | (ld2008 == 0 & ld2009 == 1) | (ld2009 == 0 & ld2010 == 1) | (ld2010 == 0 & ld2011 == 1) | (ld2011 == 0 & ld2012 == 1) | (ld2012 == 0 & ld2013 == 1) | (ld2013 == 0 & ld2014 == 1) | (ld2014 == 0 & ld2015 == 1) | (ld2015 == 0 & ld2016 == 1) | (ld2016 == 0 & ld2017 == 1) | (ld2017 == 0 & ld2019 == 1) | (ld2019 == 0 & ld2020 == 1) | (ld2020 == 0 & ld2021 == 1) | (ld2021 == 0 & ld2022 == 1) ) %>% dplyr::select(cgPlaId, grep('ld',colnames(data))) # # # # check for non-zero flowering hd presence when fl = 1 data %>% filter( (fl2011 == 0 & hdCt2011 != 0) | # (fl2012 == 0 & hdCt2012 != 0) | # (fl2013 == 0 & hdCt2013 != 0) | # (fl2014 == 0 & hdCt2014 != 0) | # (fl2015 == 0 & hdCt2015 != 0) | # (fl2016 == 0 & hdCt2016 != 0) | # (fl2017 == 0 & hdCt2017 != 0) | (fl2019 == 0 & hdCt2019 != 0) | (fl2020 == 0 & hdCt2020 != 0) | (fl2021 == 0 & hdCt2021 != 0) | (fl2022 == 0 & hdCt2022 != 0)) %>% dplyr::select(cgPlaId, grep('fl',colnames(data))) # Fix the resurrections manually: # Assume that when a plant was "can't find" in a year preceding a year that it # was found, that the plant was alive in the "can't find" year and not located. # This can occur for numerous reasons including herbivory resulting in the plant # having no aboveground tissue, dense litter making locating the plant # difficult, or human error. data[data$cgPlaId %in% 21004, "ld2017"] <- 0 data[data$cgPlaId %in% 22950, "ld2017"] <- 0 data[data$cgPlaId %in% 23006, "ld2013"] <- 0 data[data$cgPlaId %in% 23934, "ld2013"] <- 0 data[data$cgPlaId %in% 24505, c("ld2011", "ld2012")] <- 1 data[data$cgPlaId %in% 25118, "ld2017"] <- 0 data[data$cgPlaId %in% 25404, "ld2013"] <- 0 data[data$cgPlaId %in% 26101, "ld2017"] <- 0 data[data$cgPlaId %in% 27320, "ld2017"] <- 0 data[data$cgPlaId %in% 27507, c("ld2011", "ld2012")] <- 1 data[data$cgPlaId %in% 23138, c("ld2013", "ld2014", "ld2015","ld2016")] <- 1 data[data$cgPlaId %in% 23622, c("ld2013", "ld2014", "ld2015","ld2016")] <- 1 data[data$cgPlaId %in% 21004, c("ld2013", "ld2014", "ld2015","ld2016", "ld2017")] <- 1 data[data$cgPlaId %in% 25118, c("ld2013", "ld2014", "ld2015","ld2016", "ld2017")] <- 1 data[data$cgPlaId %in% 26101, c("ld2014", "ld2015","ld2016", "ld2017")] <- 1 data[data$cgPlaId %in% 27320, c("ld2013", "ld2014", "ld2015","ld2016", "ld2017")] <- 1 # Resolve one issue with data from 2022, can't find without 'good' record # Data indicates this was a "can't find" data <- data %>% mutate(ld2022 = case_when(cgPlaId == 26008 ~ 0,TRUE ~ as.numeric(ld2022)), hdCt2022 = case_when(cgPlaId == 26008 ~ 0, TRUE ~ as.numeric(hdCt2022)), fl2022 = case_when(cgPlaId == 26008 ~ 0, TRUE ~ as.numeric(fl2022))) # Change fl to 0 when plants didn't successfully produce a flowering head # Don't do this if we are including a fl level to the model. # data[data$fl2011 %in% 1 & data$hdCt2011 %in% 0, "fl2011"] <- 0 # data[data$fl2012 %in% 1 & data$hdCt2012 %in% 0, "fl2012"] <- 0 # data[data$fl2013 %in% 1 & data$hdCt2013 %in% 0, "fl2013"] <- 0 # data[data$fl2014 %in% 1 & data$hdCt2014 %in% 0, "fl2014"] <- 0 # data[data$fl2015 %in% 1 & data$hdCt2015 %in% 0, "fl2015"] <- 0 # data[data$fl2016 %in% 1 & data$hdCt2016 %in% 0, "fl2016"] <- 0 # data[data$fl2017 %in% 1 & data$hdCt2017 %in% 0, "fl2017"] <- 0 # data[data$fl2021 %in% 1 & data$hdCt2021 %in% 0, "fl2022"] <- 0 # data[data$fl2022 %in% 1 & data$hdCt2022 %in% 0, "fl2022"] <- 0 # length(unique(data$cgPlaId)) # Aster Graph Set Up and Data Preparation --------------------------------- # Defining the nodes in the graph # Note that I removed the nodes for fl2009 and fl2010. All of the plants that # flowered in those years only had one head, so fl2009/hdCt2009 fl2010/hdCt2010 # would be collinear. vars <- c( "ld2006", "ld2007", "ld2008", "ld2009", "ld2010", "ld2011", "ld2012", "ld2013", "ld2014", "ld2015", "ld2016", "ld2017", "ld2019", "ld2020", "ld2021","ld2022", "fl2011", "fl2014", "fl2015", "fl2016", "fl2017", "fl2019", "fl2020", "fl2021","fl2022", "hdCt2010", 'hdCt2011', 'hdCt2012', "hdCt2013", "hdCt2014", "hdCt2015", "hdCt2016", "hdCt2017", "hdCt2019", "hdCt2020", "hdCt2021", "hdCt2022" ) pred <- c( # ld nodes 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, # fl nodes 6, 9, 10, 11, 12, 13, 14, 15, 16, # hdCt nodes 5, 17, 7, 8, 18, 19, 20, 21, 22, 23, 24, 25 ) # Check to make sure this is correct. data.frame(succ = vars, pred = c(NA, vars[pred])) # Encoding families # These represent the family of the arrow # The successor is drawn from this distribution, the sum of draws equal to the # value of the predecessor. # 1 = Bernoulli # 2 = Poisson # 3 = zero-truncated Poisson # graph structure when dataset limited to confidence > 0.8 fam <- c( rep(1, 16), # all ld transitions are Bernoulli rep(1, 9), # all ld -> fl transitions are Bernoulli c(2, 2, 2, 2, rep(2,8)) # all fl -> hdCt transitions are Poisson (may flower but produce 0 heads), links that go from ld -> hdCt are Poisson ) # Check that this looks good data.frame(succ = vars, pred = c(NA, vars[pred]), fam = fam) #### Reformatting the data frame #### # Reshape the data data$id <- 1:nrow(data) # add id col for reshaping redata <- data %>% pivot_longer(names_to = "varb", values_to = "resp", -c(id, row, pos, cgPlaId)) # Merge fitness data and fitness predictors redata <- merge(redata, predictors, by = "cgPlaId", all.x = T) redata <- redata[!is.na(redata$parentDist), ] # only keep rows for plants with inter-parent distance # See how many are left length(unique(redata$cgPlaId)) # 436 offspring # # re-do a few checks for collinearity in the data now that the dataset has been filtered for plants IDed in ajb study table(redata[redata$varb %in% 'hdCt2009' & redata$resp > 0, 'resp']) # the only plant that flowered in 2009 has unknown parent distance table(redata[redata$varb %in% 'hdCt2010' & redata$resp > 0, 'resp']) # one plant flowered in 2010, remove hdCt 2010 table(redata[redata$varb %in% 'hdCt2011' & redata$resp > 0, 'resp']) # keep hdCt 2011 table(redata[redata$varb %in% 'hdCt2012' & redata$resp > 0, 'resp']) # all plants that flowered in 2012 had one head table(redata[redata$varb %in% 'hdCt2013' & redata$resp > 0, 'resp']) # # based on these checks, I needed to remove nodes for fl2010 and fl2012 (now this is done above). # Center interparent dist for to help model converge redata$parentDistC <- (redata$parentDist - mean(redata$parentDist)) / sd(redata$parentDist) # Use difference in midpoint date of flowering rather than 1-overlap redata$parentAsync <- redata$diffStartDir # Add root for the model redata$root <- 1 # model begins with sample of 1 (represents one individual, which lives or dies in subsequent node) # Change the variables to factors with levels in order defined in vars redata$varb <- as.factor(redata$varb) redata$varb <- factor(redata$varb, levels = vars) # Rearrange factors here - need to have ld, fl, hdct ordering of rows redata <- redata %>% arrange(varb) # table(redata$varb) # Instead of estimating for each node, we estimate for each type # of node I.e., one coefficient for hdct, one for fl, one for ld) redata$layer <- as.factor(gsub("\\d", "", as.character(redata$varb))) # table(redata$layer) # Remove observations that aren't included in varbs - not sure if this is necessary? redata <- redata[!is.na(redata$layer), ] # Add dummy variable to indicate nodes associated with fitness measure (here, head count) redata$fit <- as.numeric(redata$layer == "hdCt") # Now the data is ready for analysis. length(unique(redata$cgPlaId)) # 436 with all scores with >80% confidence # Model Selection with Random Effects ------------------------------------- # Received an error when attempting a model with a random effect for maternal # line. So, focusing on maternal source population instead. ## Asynchrony comparisons null <- reaster( resp ~ varb + layer:(row + pos), random = list(pop = ~0 + fit:popCd.dam), pred, fam, varb, id, root, data = redata ) h1_h2 <- reaster( resp ~ varb + layer:(row + pos) + fit:(I(parentAsync^2)), random = list(pop = ~ 0 + fit : popCd.dam), pred, fam, varb, id, root, data = redata ) h1_h2_abs <- reaster( resp ~ varb + layer:(row + pos) + fit:(abs(parentAsync)), random = list(pop = ~ 0 + fit : popCd.dam), pred, fam, varb, id, root, data = redata ) h3 <- reaster( resp ~ varb + layer:(row + pos) + fit:(abs(parentAsync)+ I(parentAsync^2)), random = list(pop = ~ 0 + fit : popCd.dam), pred, fam, varb, id, root, data = redata ) dir <- reaster( resp ~ varb + layer:(row + pos) + fit:(parentAsync), random = list(pop = ~ 0 + fit : popCd.dam), pred, fam, varb, id, root, data = redata ) AIC.table(list(null, h1_h2_abs, h3, dir)) ## Distance comparisons h1_h2_dist <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC), pred, fam, varb, id, root, data = redata ) h3_dist <- aster( resp ~ varb + layer:(row + pos) + fit:(I(parentDistC^2)), pred, fam, varb, id, root, data = redata ) AIC.table(list(null, h1_h2_dist, h3_dist)) summary(h1_h2_dist, info.tol = 1e-09) # dist coefficient is positive, supporting H1 # Compare nested models to assess overall contributions of asynchrony and distance interactive <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * parentAsync), pred, fam, varb, id, root, data = redata ) additive <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC + parentAsync), pred, fam, varb, id, root, data = redata ) minusDist <- aster( resp ~ varb + layer:(row + pos) + fit:(parentAsync), pred, fam, varb, id, root, data = redata ) minusAsync <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC), pred, fam, varb, id, root, data = redata ) AIC.table(list(interactive, additive, minusAsync, minusDist)) summary(interactive, info.tol = 1e-09) # Make model summary table # Model Selection --------------------------------------------------------- ## Asynchrony comparisons null <- aster( resp ~ varb + layer:(row + pos), pred, fam, varb, id, root, data = redata ) h1_h2 <- aster( resp ~ varb + layer:(row + pos) + fit:(I(parentAsync^2)), pred, fam, varb, id, root, data = redata ) h1_h2_abs <- aster( resp ~ varb + layer:(row + pos) + fit:(abs(parentAsync)), pred, fam, varb, id, root, data = redata ) h3 <- aster( resp ~ varb + layer:(row + pos) + fit:(abs(parentAsync)+ I(parentAsync^2)), pred, fam, varb, id, root, data = redata ) dir <- aster( resp ~ varb + layer:(row + pos) + fit:(parentAsync), pred, fam, varb, id, root, data = redata ) AIC.table(list(null, h1_h2_abs, h3, dir)) ## Distance comparisons h1_h2_dist <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC), pred, fam, varb, id, root, data = redata ) h3_dist <- aster( resp ~ varb + layer:(row + pos) + fit:(I(parentDistC^2)), pred, fam, varb, id, root, data = redata ) AIC.table(list(null, h1_h2_dist, h3_dist)) summary(h1_h2_dist, info.tol = 1e-09) # dist coefficient is positive, supporting H1 # Compare nested models to assess overall contributions of asynchrony and distance interactive <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * parentAsync), pred, fam, varb, id, root, data = redata ) additive <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC + parentAsync), pred, fam, varb, id, root, data = redata ) minusDist <- aster( resp ~ varb + layer:(row + pos) + fit:(parentAsync), pred, fam, varb, id, root, data = redata ) minusAsync <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC), pred, fam, varb, id, root, data = redata ) AIC.table(list(interactive, additive, minusAsync, minusDist)) summary(interactive, info.tol = 1e-09) # Make model summary table # Generate Model Predictions ---------------------------------------------- # first, create model that uses uncentered dist, for easier interpretation mod <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDist * parentAsync), pred, fam, varb, id, root, data = redata ) newdata <- expand.grid( # Make a new dataset for predictions parentDist = seq(min(redata$parentDist), max(redata$parentDist), length.out = 15), parentAsync = c(-10, 0, 10), # make three levels of async varb = vars, resp = 1, root = 1, row = 5, # hypothetical individuals at row 5, position 5 pos = 5 ) # Reformat newdata to match data for Aster model objects newdata <- merge(x = newdata, y = newdata %>% distinct(parentDist, parentAsync) %>% mutate(id = 1:nrow(.))) # give each unique combo of dist and sync the same id for all vars newdata$varb <- factor(newdata$varb, levels = vars) # make sure varb column is a factor with same levels according to vars newdata$fit <- ifelse(grepl("hdCt", newdata$varb), newdata[grepl("hdCt", newdata$varb), "resp"], 0) # add dummy variable fit corresponding to hdCt vars newdata$layer <- as.factor(gsub("\\d", "", as.character(newdata$varb))) # add layer for estimating effects specific to each layer, rather than each varb newdata <- newdata %>% arrange(varb, id) # arrange the dataset by varb and id # Create structure for storing standard errors nDistSyncLevels <- length(unique(paste(as.character(newdata$parentDist), as.character(newdata$parentAsync)))) nnode <- length(vars) amat <- array(0, c(nDistSyncLevels, nnode, nDistSyncLevels)) foo <- grepl("hdCt", vars) for (k in 1:nDistSyncLevels) { amat[k, foo, k] <- 1 } # Run the prediction functions pout <- predict(mod, newdata = newdata, varvar = varb, idvar = id, root = root, se.fit = TRUE, amat = amat, info.tol = 1e-13) # Add fitness estimates and standard errors to data for prediction to visualize newdata$fit <- pout$fit newdata$se <- pout$se.fit # Make a separate dataset that only includes headCount predictions pout.hdCt <- newdata %>% filter(grepl("hdCt", varb)) %>% mutate(parentAsync = as.factor(parentAsync)) # Summarize observed cumulative head count data headCountTotals <- redata %>% filter(layer == "hdCt") %>% group_by(cgPlaId) %>% summarize(parentDist = unique(parentDist), parentAsync = unique(parentAsync), popCd.dam = unique(popCd.dam), cgPlaId.dam = unique(cgPlaId.dam), fit = sum(resp), popCd.sire=unique(popCd.sire), cgPlaId.sire = unique(cgPlaId.sire)) hdCt_summary <- headCountTotals %>% group_by(parentDist, fit) %>% summarize(n = n()) # FIGURE 1A: PREDICTED HEAD COUNT VS. DISTANCE ----------------------------- # colors <- sp::bpy.colors(5)[2:4] colors <- gray.colors(3,start = 0, end = 0.7, rev = T) p11 <- ggplot(pout.hdCt, aes(x = parentDist)) + labs(x = "Interparent Distance (m)", y = "Cumulative Head Count", size = 'Number of Individuals', color = "Interparent Asynchrony (Days)", fill = "Interparent Asynchrony (Days)") + geom_point(data = hdCt_summary, aes(x = parentDist, y = fit, size = n), fill = 'NA', color = 'black', shape = 21, alpha = 0.3, inherit.aes = FALSE)+ geom_ribbon(aes(ymin = fit - se, ymax = fit + se, fill = as.factor(parentAsync)), alpha = 0.25, color = NA) + geom_line(aes(y = fit, color = as.factor(parentAsync)), linewidth = 1.25) + xlab('Interparent Distance (m)') + ylab('Cumulative Head Count')+ theme_custom()+ scale_color_manual(values= colors, labels = c('-10 Days', '0 Days', '10 Days'), guide = 'none') + scale_fill_manual(values= colors, labels = c('-10 Days', '0 Days', '10 Days'), guide = 'none') + theme(axis.text = element_text(size = 12, color = "black"), axis.title = element_text(size = 14, color = "black"), legend.position = c(0.35, 0.75))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.spacing.x = unit(1,"mm"))+ scale_size_continuous(limits = range(c(hdCt_summary$n)), range = c(2,10), breaks = c(1, 5, 10, 40)) p11 # FIGURE 1B: FITNESS SURFACE FOR DIST & ASYNC ------------------------------ # Repeat prediction procedure with newdata to generate predictions across a surface of distance and asynchrony newdata <- expand.grid( # Make a new dataset for predictions parentDist = seq(min(redata$parentDist), max(redata$parentDist), length.out = 50), parentAsync = seq(-15, 15, length.out = 50), varb = vars, resp = 1, root = 1, row = 5, # hypothetical individuals at row 5, position 5 pos = 5 ) # Reformat newdata to match data for model objects newdata <- merge(x = newdata, y = newdata %>% distinct(parentDist, parentAsync) %>% mutate(id = 1:nrow(.))) # give each unique combo of dist and sync the same id for all vars newdata$varb <- factor(newdata$varb, levels = vars) # make sure varb column is a factor with same levels according to vars newdata$fit <- ifelse(grepl("hdCt", newdata$varb), newdata[grepl("hdCt", newdata$varb), "resp"], 0) # add dummy variable fit corresponding to hdCt vars newdata$layer <- as.factor(gsub("\\d", "", as.character(newdata$varb))) # add layer for estimating effects specific to each layer, rather than each varb newdata <- newdata %>% arrange(varb, id) # arrange the dataset by varb and id #Make a prediction with standard errors nDistSyncLevels <- length(unique(paste(as.character(newdata$parentDist), as.character(newdata$parentAsync)))) nnode <- length(vars) amat <- array(0, c(nDistSyncLevels, nnode, nDistSyncLevels)) foo <- grepl("hdCt", vars) for (k in 1:nDistSyncLevels) { amat[k, foo, k] <- 1 } # Run the prediction functions pout <- predict(mod, newdata = newdata, varvar = varb, idvar = id, root = root, se.fit = TRUE, amat = amat, info.tol = 1e-13) # Add fitness estimates and standard errors to data for prediction to visualize newdata$fit <- pout$fit newdata$se <- pout$se.fit # Make a separate dataset that only includes headCount pout.hdCt <- newdata %>% filter(grepl("hdCt", varb)) headCountTotals <- redata %>% filter(layer == "hdCt") %>% group_by(cgPlaId) %>% summarize(parentDist = unique(parentDist), parentAsync = unique(parentAsync), popCd.dam = unique(popCd.dam), cgPlaId.dam = unique(cgPlaId.dam), fit = sum(resp), popCd.sire=unique(popCd.sire), cgPlaId.sire = unique(cgPlaId.sire)) pout.hdCt <- pout.hdCt %>% mutate(fit_binned = cut(fit, breaks = c(0, 5, 7, 9, 11, 25))) p33 <- ggplot(pout.hdCt, aes(x = parentAsync, y = parentDist)) + labs(x = "Interparent Asynchrony (Days)", y = "Interparent Distance (m)", fill = "Cumulative\nHead Count") + geom_tile(aes(fill = fit))+ scale_fill_viridis_b(n.breaks = 8, option = 'B')+ geom_contour(aes(z = fit), color = 'white',binwidth = 1)+ theme_custom()+ theme( axis.text = element_text(size = 12, color = "black"), axis.title = element_text(size = 14, color = "black"), )+ geom_rug(data = headCountTotals, mapping = aes(x = parentAsync, y = parentDist), alpha = 1/2, position = 'jitter', inherit.aes = FALSE) p11 + p33 + plot_annotation(tag_levels = "A") # Make predictions for examples in results section ------------------------ # Repeat steps for making predictions from the section above newdata <- expand.grid( # Make a new dataset for predictions parentDist = c(0, 5000, 9000), parentAsync = c(-10, 0, 10), # make three levels of async varb = vars, resp = 1, root = 1, row = 5, # hypothetical individuals at row 5, position 5 pos = 5 ) # Reformat newdata to match data for Aster model objects newdata <- merge(x = newdata, y = newdata %>% distinct(parentDist, parentAsync) %>% mutate(id = 1:nrow(.))) # give each unique combo of dist and sync the same id for all vars newdata$varb <- factor(newdata$varb, levels = vars) # make sure varb column is a factor with same levels according to vars newdata$fit <- ifelse(grepl("hdCt", newdata$varb), newdata[grepl("hdCt", newdata$varb), "resp"], 0) # add dummy variable fit corresponding to hdCt vars newdata$layer <- as.factor(gsub("\\d", "", as.character(newdata$varb))) # add layer for estimating effects specific to each layer, rather than each varb newdata <- newdata %>% arrange(varb, id) # arrange the dataset by varb and id nDistSyncLevels <- length(unique(paste(as.character(newdata$parentDist), as.character(newdata$parentAsync)))) nnode <- length(vars) amat <- array(0, c(nDistSyncLevels, nnode, nDistSyncLevels)) foo <- grepl("hdCt", vars) for (k in 1:nDistSyncLevels) { amat[k, foo, k] <- 1 } # Run the prediction functions pout <- predict(mod, newdata = newdata, varvar = varb, idvar = id, root = root, se.fit = TRUE, amat = amat, info.tol = 1e-13) # Add fitness estimates and standard errors to data for prediction to visualize newdata$fit <- pout$fit newdata$se <- pout$se.fit # Make a separate dataset that only includes headCount pout.hdCt <- newdata %>% filter(grepl("hdCt2022", varb)) %>% mutate(parentAsync = as.factor(parentAsync)) # Alternative visualization of cumulative head count predictions by distance: ggplot(pout.hdCt, aes(x = as.factor(parentDist), y = fit, group = parentAsync, color = parentAsync))+ geom_linerange(aes(ymin = fit - se, ymax = fit + se), position = position_dodge(width = .25))+ geom_point(size = 3, position = position_dodge(width = .25))+ xlab('Interparent Distance (m)') + ylab('Cumulative Head Count')+ theme_custom()+ scale_color_manual(values= colors, labels = c('-10 Days', '0 Days', '10 Days')) + guides(color = guide_legend("Interparent Asynchrony (Days)"))+ theme(axis.text = element_text(size = 12, color = "black"), axis.title = element_text(size = 12, color = "black"))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.spacing.x = unit(1,"mm"), legend.position = c(0.25, 0.8)) # Async = 0, dist = 5km vs dist = 0km pout.hdCt[pout.hdCt$parentDist == 5000 & pout.hdCt$parentAsync %in% "0", c("fit","se")] pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit","se")] (pout.hdCt[pout.hdCt$parentDist == 5000 & pout.hdCt$parentAsync %in% "0", c("fit")] - pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit")]) / pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit")] # Async = -10, dist = 9km vs dist = 0km pout.hdCt[pout.hdCt$parentDist == 9000 & pout.hdCt$parentAsync %in% "-10", c("fit","se")] pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "-10", c("fit","se")] (pout.hdCt[pout.hdCt$parentDist == 9000 & pout.hdCt$parentAsync %in% "-10", c("fit")] - pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit")]) / pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit")] # Async = +10, dist = 9km pout.hdCt[pout.hdCt$parentDist == 9000 & pout.hdCt$parentAsync %in% "10", c("fit","se")] # Async = +10, dist = 0km pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "10", c("fit","se")] # Async = -10, dist = 9km pout.hdCt[pout.hdCt$parentDist == 9000 & pout.hdCt$parentAsync %in% "-10", c("fit","se")] pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit","se")] (pout.hdCt[pout.hdCt$parentDist == 5000 & pout.hdCt$parentAsync %in% "0", c("fit")] - pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit")]) / pout.hdCt[pout.hdCt$parentDist == 0 & pout.hdCt$parentAsync %in% "0", c("fit")] # Post-Hoc Model Comparisons ---------------------------------------------- # 1. Is the effect of asynchrony attributable to achene position? # Create ordinal values for achene position categories -- model did not converge with positions as factors. redata$ach_pos_rank <- as.numeric(as.factor(redata$ach_pos)) redata[redata$ach_pos_rank %in% 4, "ach_pos_rank"] <- 3.5 # First, assess correlation between asynchrony and achene position: # extract distinct individuals from data used in model dd <- redata %>% distinct(id, ach_pos_rank, parentAsync) # correlation test cor.test(dd$ach_pos_rank, dd$parentAsync,method = "kendall") # yes, they are correlated # Perform model comparisons to assess if achene position improves upon interactive model: achpos <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * parentAsync + ach_pos_rank), pred, fam, varb, id, root, data = redata ) achpos_noasync <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * ach_pos_rank), pred, fam, varb, id, root, data = redata ) summary(achpos, info.tol = 1e-09) anovaAsterOrReasterList(list(interactive, achpos), tolerance = 1e-09) AIC.table(list(achpos, achpos_noasync, interactive)) # No strong indication that including the achene position significantly improves # the model. # SUPPLMENTAL FIGURE S5: ACHENE POSITION & START DATE ---------------------- dd <- redata %>% distinct(id, ach_pos_rank, parentAsync, start.dam) ggplot(dd, aes(y = as.factor(ach_pos_rank), x = parentAsync, group = ach_pos_rank))+ geom_boxplot(width = 0.5)+ ylab('Fruit Position')+ xlab('Interparent Asynhrony (Days)')+ scale_y_discrete(name ="Achene Position", breaks =c(1, 2, 3, 3.5), labels = c("Bottom 30","Middle","Top 11-30", "Top 10"))+ geom_jitter(height = 0.1, width = 0.1, alpha = 0.25)+ theme_custom()+ coord_flip() # 2. Is directional asynchrony related to start date of flowering? dd <- predictors %>% filter(cgPlaId %in% redata$cgPlaId) %>% distinct(cgPlaId, diffStartDir, start.dam, start.sire) %>% mutate(start.dam = as.numeric(as.character(format(as.Date(start.dam), "%j"))), start.sire = as.numeric(as.character(format(as.Date(start.sire), "%j"))), start.dam.date = as.Date(start.dam), start.sire.date = as.Date(start.sire)) # Does start date of flowering explain offspring fitness as well or better than # interparent asynchrony? # Reformat date from character to Julian date (numeric) redata <- redata %>% mutate(start.dam = as.numeric(as.character(format(as.Date(start.dam), "%j"))), start.sire = as.numeric(as.character(format(as.Date(start.sire), "%j")))) # Make model with dist*asynchrony + maternal start date add_mat_sd <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * parentAsync + start.dam), pred, fam, varb, id, root, data = redata ) # Make model with dist*maternal start date replace_mat_sd <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * start.dam), pred, fam, varb, id, root, data = redata ) # Dist + maternal start date replace_mat_sd_additive <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC + start.dam), pred, fam, varb, id, root, data = redata ) # Dist*asynchrony + paternal start date add_pat_sd <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * parentAsync + start.sire), pred, fam, varb, id, root, data = redata ) # Dist*paternal start date replace_pat_sd <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC * start.sire), pred, fam, varb, id, root, data = redata ) # Dist + paternal start date replace_pat_sd_additive <- aster( resp ~ varb + layer:(row + pos) + fit:(parentDistC + start.sire), pred, fam, varb, id, root, data = redata ) # Compare models anova(interactive, add_mat_sd) # no difference when adding start date anova(interactive, add_pat_sd) # no difference when adding start date anova(interactive, add_mat_sd) # no difference when adding start date anova(interactive, add_pat_sd) # no difference when adding start date AIC.table(list(interactive, add_mat_sd, replace_mat_sd, replace_mat_sd_additive)) AIC.table(list(interactive, add_pat_sd, replace_pat_sd, replace_pat_sd_additive)) AIC.table(list(interactive, add_pat_sd, replace_pat_sd, replace_pat_sd_additive, add_mat_sd, replace_mat_sd, replace_mat_sd_additive)) AIC.table(list(interactive, replace_mat_sd)) AIC.table(list(interactive, replace_pat_sd)) # FIGURE 3: START DATE VS. ASYNCHRONY ------------------------------------- pd <- position_dodge2(0.9) colors <- sp::bpy.colors(5)[2:4] colors5 <- sp::bpy.colors(5) ggplot(dd, aes(x = diffStartDir))+ geom_jitter(aes(y = start.dam.date), color = "maroon", alpha = 0.35, size = 2)+ geom_jitter(aes(y = start.sire.date), color = "gold2", alpha = 0.35, size = 2)+ stat_smooth(aes(y = start.dam.date), method = "gam",formula = y ~ s(x, bs = "cs"), color = "maroon", fill = "maroon")+ stat_smooth(aes(y = start.sire.date), method = "gam",formula = y ~ s(x, bs = "cs"), color = "gold", fill = "gold")+ # stat_smooth(aes(y = start.dam), method = "gam",formula = y ~ s(x, bs = "cs"), color = colors[1], fill = colors[1])+ # stat_smooth(aes(y = start.sire), method = "gam",formula = y ~ s(x, bs = "cs"), color = colors[3], fill = colors[3])+ theme_custom()+ theme(legend.position = "right")+ ylab('Start Date of Flowering')+ xlab('Interparent Asynchrony')+ scale_y_date(date_labels = "%b %d") # SUPPLEMENTAL FIGURE S3: SUMMARIES BY POPULATION ---------------------------- dd.dam.start <- predictors %>% filter(cgPlaId %in% redata$cgPlaId) %>% distinct(cgPlaId.dam, start.dam, popCd.dam) %>% mutate(start.dam = as.numeric(as.character(format(as.Date(start.dam), "%j"))),) dd.sire.start <- predictors %>% filter(cgPlaId %in% redata$cgPlaId) %>% distinct(cgPlaId.sire, start.sire, popCd.sire) %>% mutate(start.sire = as.numeric(as.character(format(as.Date(start.sire), "%j")))) dd.dam.async <- predictors %>% filter(cgPlaId %in% redata$cgPlaId) %>% distinct(cgPlaId.dam, start.dam, popCd.dam, diffStartDir) %>% mutate(start.dam = as.numeric(as.character(format(as.Date(start.dam), "%j")))) dd.sire.async <- predictors %>% filter(cgPlaId %in% redata$cgPlaId) %>% distinct(cgPlaId.sire, start.sire, popCd.sire, diffStartDir) %>% mutate(start.sire = as.numeric(as.character(format(as.Date(start.sire), "%j")))) a <- ggplot(dd.dam.start, aes(x = popCd.dam, y = start.dam))+ ylim(175, 196)+ xlab('Source Population')+ ylab('Maternal Start Date of Flowering')+ geom_dotplot(binaxis='y', stackdir='center', binwidth = 1, dotsize = 0.75)+ theme_custom()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "none", panel.grid.major.y = element_line()) a <- a + annotate("text", x=0, y = Inf, label = "Maternal Plants", vjust=1, hjust=-0.1, size = 3) a <- a + coord_flip() a b <- ggplot(dd.sire.start, aes(x = popCd.sire, y = start.sire))+ geom_violin(fill = NA, color = 'gray77')+ geom_dotplot(binaxis='y', stackdir='center', dotsize = 0.35, binwidth = 1)+ ylim(175, 196)+ xlab('Source Population')+ ylab('Paternal Start Date of Flowering')+ theme_custom()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), panel.grid.major.y = element_line(), legend.position = 'none') b <- b + coord_flip() + annotate("text", x=Inf, y = Inf, label = "Paternal Plants", vjust=1, hjust=-0.1, size = 3) b c <- ggplot(dd.dam.async, aes(x = popCd.dam, y = diffStartDir))+ geom_boxplot()+ ylim(-13, 13)+ xlab('Maternal Source Population')+ ylab('Interparent Asynchrony (Days)')+ geom_dotplot(binaxis='y', stackdir='center', binwidth = 1, dotsize = 0.4)+ # geom_jitter(height = 0, width = 0.1, alpha = 0.25)+ theme_custom()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) c d <- ggplot(dd.sire.async, aes(x = popCd.sire, y = diffStartDir))+ geom_boxplot()+ ylim(-13, 13)+ ylab('Interparent Asynchrony (Days)')+ xlab('Paternal Source Population')+ # geom_jitter(height = 0, width = 0.1, alpha = 0.25)+ geom_dotplot(binaxis='y', stackdir='center', binwidth = 1, dotsize = 0.4)+ theme_custom()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) d a + b + c + d + plot_layout(ncol = 2) + plot_annotation(tag_levels = "A") # FIGURE S3: FITNESS SUMMARIES -------------------------------------------- # FIG. S3a : CUMULATIVE HEAD COUNT HISTOGRAM p1 <- ggplot(headCountTotals, aes(x = fit))+ geom_histogram(bins = 12, color = 'black')+ theme_custom()+ xlab('Cumulative Head Count')+ ylab('Number of Individuals')+ theme(axis.text = element_text(size = 10, color = "black"), axis.title = element_text(size = 12, color = "black"), legend.position = "none")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) p1 # FIG. S3b : NUMBER OF FLOWERING BOUTS HISTOGRAM flBoutTotals <- redata %>% filter(layer == "fl") %>% group_by(cgPlaId) %>% summarize(parentDist = unique(parentDist), parentAsync = unique(parentAsync), popCd.dam = unique(popCd.dam), cgPlaId.dam = unique(cgPlaId.dam), fit = sum(resp), popCd.sire=unique(popCd.sire), cgPlaId.sire = unique(cgPlaId.sire)) p2 <- ggplot(flBoutTotals, aes(x = fit))+ geom_histogram(bins = 7, color = 'black')+ theme_custom()+ xlab('Number of Flowering Bouts')+ ylab('Number of Individuals')+ theme(axis.text = element_text(size = 10, color = "black"), axis.title = element_text(size = 12, color = "black"), legend.position = "none")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) p2 # FIG. S3c : DISTANCE HISTOGRAM p4 <- ggplot(headCountTotals, aes(x = parentDist))+ geom_histogram(bins = 9, color = 'black')+ theme_custom()+ xlab('Interparent Distance (m)')+ ylab('Number of Individuals')+ theme(axis.text = element_text(size = 10, color = "black"), axis.title = element_text(size = 12, color = "black"), legend.position = "none")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.margin = margin(c(13,13,13,13))) p4 # FIG. S3d : ASYNCHRONY HISTOGRAM p5 <- ggplot(headCountTotals, aes(x = parentAsync))+ geom_histogram(bins = 9, color = 'black')+ theme_custom()+ xlab('Interparent Asynchrony (Days)')+ ylab('Number of Individuals')+ theme(axis.text = element_text(size = 10, color = "black"), axis.title = element_text(size = 12, color = "black"), legend.position = "none")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.margin = margin(c(13,13,13,13))) p5 # FIG. S3e : CORRELATION BETWEEN DISTANCE AND ASYNCHRONY tt2 <- headCountTotals %>% group_by(parentDist, parentAsync) %>% summarize(n = n()) p6 <- ggplot(headCountTotals, aes(x = parentDist, y = parentAsync))+ geom_jitter(alpha = 0.25,width = 70, height = 1, size = 2)+ # geom_point(data = tt2, aes(size = n), shape = 21, fill = "gray", alpha = 0.25)+ stat_smooth(method = 'lm')+ theme_custom()+ xlab('Interparent Distance (m)')+ ylab('Interparent Asynchrony (Days)')+ scale_size_area(name = 'Number of Individuals', breaks = c(1, 5, 15, 100))+ theme(axis.text = element_text(size = 10, color = "black"), axis.title = element_text(size = 12, color = "black"))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = 'bottom', plot.margin = margin(c(13,13,13,13))) p6 layout <- " AABB CCDD EE## " p1 + p2 + p4 + p5 + p6 + plot_annotation(tag_levels = 'A') + plot_layout(ncol = 2, design = layout) # Data summaries for results ---------------------------------------------- # Min and max of key variables min(redata$parentAsync) max(redata$parentAsync) min(redata$parentDist) max(redata$parentDist) # How many died before flowering? deadplas <- redata %>% filter(layer %in% 'ld' & resp == 0) %>% distinct(cgPlaId) headCountTotals %>% filter(cgPlaId %in% deadplas$cgPlaId) %>% count(fit) 236/length(unique(redata$cgPlaId)) # 54% died without producing any flowering heads # Maximum number of flowering bouts flBouts <- data %>% filter(cgPlaId %in% redata$cgPlaId) %>% select(starts_with("fl")) %>% rowSums(.) df <- data.frame(cgPlaId = data[data$cgPlaId %in% redata$cgPlaId, 'cgPlaId'], flBouts = flBouts) # Number of heads produced max(headCountTotals$fit) redata %>% filter(layer %in% c('fl','hdCt')) %>% filter(cgPlaId == 21243) # Correlation between inter-parent distance and inter-parent asynchrony cor(headCountTotals$parentDist, headCountTotals$parentAsync) cor.test(headCountTotals$parentDist, headCountTotals$parentAsync) # FIGURE S1: STUDY AREA MAP ------------------------------------------------ # Our study sites are located in areas that are easy to access, and E. # angustifolia is a desirable plant that is relatively hard to find # commercially. Unfortunately, we've experienced poaching -- people digging up # plants from the roots -- in the past. To avoid harm to these populations, we # have omitted the code we used to create our study maps, which uses the # population coordinates, here. We will make these data available for research # purposes upon request.