# This script will perform the aster analysis of annual maternal reproductive fitness
# based on the survival of seedlings produced in one reproductive bout over 8 years.


# Load packages -----------------------------------------------------------

# For data manipulation:
library(dplyr)
library(tidyr)
library(tidyverse)
library(reshape2)

# For Aster analysis:
library(aster)

# For visualizations:
library(ggplot2)
library(extrafont)
library(patchwork)
library(RColorBrewer)

# Load useful functions ---------------------------------------------------

### AIC Aster
# Function to calculate AIC (Akaike Information Criterion) from Aster model output

AIC.aster <- function(m){
  dev <- m$deviance
  k <- length(m$coef)
  AIC <- dev + 2*k
  return(AIC)
}

### AIC Table
# Make a table of AIC values for a list of model objects

AIC.table <- function(ll){
  
  for(i in 1:length(ll)){
    nam <- paste("m",i,sep = "")
    assign(nam, ll[[i]])
  }
  
  ncomp <- length(ll)
  
  foo <- unlist(lapply(1:ncomp, FUN = function(j){
    AIC.aster(eval(parse(text = paste("m", j ,
                                      sep = ""))))
  }))
  
  noPar <-unlist(lapply(1:ncomp, FUN = function(j){
    length(eval(parse(text = paste("m", j ,sep = "")))$coef)}))
  
  AIC <- foo
  
  ## AIC values compared to the selected model
  deltaAIC <- as.data.frame(round(foo - min(foo),4))
  
  ## The term exp((AICmin - AICi)/2) is interpreted as
  ## being proportional to the probability that the ith
  ## model minimizes the (estimated) information loss.
  prAIC <- as.data.frame(round(exp((min(foo) - foo)/2), 4))
  
  ## put model names in a vector
  models <- as.data.frame(paste("m", 1:ncomp, sep = ""))
  
  ## make a table that displays the AIC for each model
  tableAICs <- cbind(models, noPar, AIC, deltaAIC, prAIC) #
  names(tableAICs) <- c('model','noPar', 'AIC', 'delta', 'prAIC')
  
  tableAICs
}

### Custom ggplot theme
# This function makes a nice ggplot theme

theme_custom <- function(){
  theme_bw() +
    theme(
      axis.text = element_text(size = 8, color = "black"),
      axis.title = element_text(size = 12, color = "black"),
      axis.line.x = element_line(color="black"),
      axis.line.y = element_line(color="black"),
      panel.border = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.minor.y = element_blank(),
      panel.grid.major.y = element_blank(),
      plot.margin = unit(c(0.4, 0.4, 0.4, 0.4), units = "cm"),
      plot.title = element_text(size = 18, vjust = 1, hjust = 0),
      legend.text = element_text(size = 10),
      legend.key = element_blank(),
      legend.background = element_rect(color = "black",
                                       fill = "transparent",
                                       linewidth = 2, linetype = "blank"))
}


# LOAD DATA ---------------------------------------------------------------

# 1. Sling fitness data
dat <- read.csv('seedling_demography.csv')

# 2. Mating potential data
matingPotential <- read.csv('mating_potential.csv')

# MERGE DATASETS ----------------------------------------------------------

# update radius to sling search area
matingPotential$searchArea <- matingPotential$radius

# Produce summaries based on search area:
radius32 <- matingPotential[matingPotential$radius %in% 32,]
radius50 <- matingPotential[matingPotential$radius %in% 50,]
radius80 <- matingPotential[matingPotential$radius %in% 80,]
table(radius32$site, radius32$year)
table(radius50$site, radius50$year)
rm(radius32, radius50, radius80)

# merge sling data and mating potential data by focalCd
sling.merged <- merge(dat, matingPotential[,c('focalCd','hdCt.n','hdCt.o','searchArea','matingPotential1','matingPotential2')], by = 'focalCd', all.x = T, all.y = T)

# identify focalCds without mating potential
noMatPot <- unique(sling.merged[is.na(sling.merged$matingPotential1),'focalCd'])
# no mating potential for the following focal cds:
# c("ness-08-32", "randt-10-31", "sgc-13-31")

# all the circles missing mating potential data are random points
# remove them from mating potential analysis
sling.merged <- sling.merged[!sling.merged$focalCd %in% c("ness-08-32", "randt-10-31", "sgc-13-31"),]

# done with these, remove dat and matingPotential
rm(matingPotential, dat, noMatPot)

# Make a column for site, extracted from focalCd
sling.merged <- sling.merged %>% 
  mutate(focalCd = as.character(focalCd))

sling.merged$site <- unlist(lapply(strsplit(sling.merged$focalCd , split='-', fixed=TRUE), `[`, 1))
# table(sling.merged$site)
# colnames(sling.merged)

# Standardize a few site names:
sling.merged$site[sling.merged$site %in% 'lih'] = 'ngc'
sling.merged$site[sling.merged$site %in% 'nnwlf'] = 'nwlf'
sling.merged$site[sling.merged$site %in% 'randt'] = 'rndt'

# Add year to sling.merged for circles without slings
sling.merged$slingYr <- as.numeric(paste(20,sling.merged$focalCd %>% strsplit( "-" ) %>% sapply( "[", 2 ), sep = ''))
sling.merged[is.na(sling.merged$year),'year'] <- sling.merged[is.na(sling.merged$year),'slingYr']

# Add living-during status of 0 to circles without slings
sling.merged[is.na(sling.merged$newLD),'newLD'] <- 0

# Check data for completeness:
sling.merged %>%
  distinct(slingCd, .keep_all = TRUE) %>%
  select(hdCt.n, matingPotential1) %>%
  apply(2, function(x) sum(is.na(x)))
# no NAs in hdCt.n or matingPotential


# SET UP THE ASTER GRAPH --------------------------------------------------

### 1. Define a max age to use in the model
max.age = 8 # for now set to 8 (age 0 (spring), age 0 (fall), 1, 2, ...)

### 2. Define variables for the model
vars = as.character(c('anySlings','slingCt0spring','slingCt0fall', paste0('slingCt', 1:max.age)))

### 3. Define the graphical structure (predecessor)
pred = 0:(max.age+2)

# Check to make sure this works
rbind(succ = vars, pred = c("initial", vars)[pred + 1])

### 4. Define families
# Initial -> age0 (# slings at age 0) likely has greater than Poisson variance
# Every living-during transition should be Bernoulli

fam = c(1, 3, rep(1, (length(vars)) - 2))
fam

# check again
rbind(succ = vars, pred = c("initial", vars)[pred + 1], fam = fam)

### 5. Reshape the data:

# First, add initial sling counts, the number of Offspring found in spring searches
initial_finds <- sling.merged[sling.merged$year == sling.merged$slingYr &! is.na(sling.merged$slingCd),]
initial_finds$age <- '0spring' #  adjust year so that it is age 0 in next step
initial_finds[,'newLD'] <-  1 # set living-during code  to 1 for all plants found (assume all were alive when found initially)

# add age to sling.merged dataset to distirnguish from initial finds
sling.merged$age <- as.character(sling.merged$year - sling.merged$slingYr)
sling.merged <- rbind(sling.merged, initial_finds)
sling.merged[sling.merged$age %in% '0','age'] <- '0fall'
sling.merged$age <- factor(sling.merged$age, levels = c('0spring','0fall','1','2','3','4','5','6','7','8','9','10','11','12', '13', '14'))

# adjust age for circles without slings to be '0spring' (a count from the initial search)
sling.merged[is.na(sling.merged$slingCd),'age'] <- '0spring'

head(sling.merged)
table(sling.merged$age)

t2.aster <- sling.merged %>%
  # mutate(age = year - slingYr) %>%
  group_by(focalCd, age) %>%
  summarize(slingCt = sum(newLD),
            # including mating potential from maternal mating bout as a predictor
            matingPotential = unique(matingPotential1),
            # include number of normal heads in mating year as a predictor
            hdCt = unique(hdCt.n),
            # include age as a predictor
            age = unique(age),
            # keep sling year (year that search occurred) as possible predictor
            slingYr = unique(slingYr),
            # include site as a predictor
            site = unique(site),
            searchArea = unique(searchArea)) %>% 
  # copied and modified from code for q2
  # Gather all lds ad lfCt into 'resp' column
  gather(key = varb, value = resp, -c(focalCd, slingYr, site, hdCt, searchArea, matingPotential, age)) %>%
  # Change varb for ld years to `ld.ageX`
  # (if the entry in the  `varb` column is just 'slingCt', add "age" in after it)
  mutate(varb = ifelse(grepl('slingCt', varb), paste0(varb, age), varb))

# Need to fill out t2 for circles without any seedlings:

# Find circles without seedlings
circles_noslings <- t2.aster[t2.aster$age %in% '0spring' & t2.aster$resp == 0,] # 972 circles without slings
# Create a dataframe with each node and ID of plants without seedlings:
add <- expand.grid(circles_noslings$focalCd, vars[-1]) # expand grid for all varbs except for anySlings node, which we add
colnames(add) <- c('focalCd','varb')
test <- merge(circles_noslings, add, by = c('focalCd','varb'), all = T)

# Format no-seedling-circle data to add to seedling demography data:
test <- test %>% 
  group_by(focalCd) %>% 
  mutate(matingPotential = max(matingPotential, na.rm = T),
         hdCt = max(hdCt, na.rm = T),
         slingYr = max(slingYr, na.rm = T),
         site = site[!is.na(site)],
         searchArea = max(searchArea, na.rm = T),
         resp = 0,
         age = gsub('slingCt','',varb)) %>% 
  filter(!varb %in% 'slingCt0spring')

t2.aster <- rbind(t2.aster,test)

# Create rows for 0/1 if any seedlings produced node

anySlings <- t2.aster %>% 
  filter(varb %in% 'slingCt0spring') %>% 
  mutate(varb = "anySlings",
         resp = case_when(resp == 0 ~ 0,
                          resp > 0 ~ 1),
         age = NA)
t2.aster <- rbind(t2.aster, anySlings)

# Check if there are any non-declining sling cts
checkSlingsIDs <- sling.merged %>%
  group_by(slingCd) %>%
  arrange(slingCd, age)%>%
  mutate(slingDiff = newLD - lag(newLD, n = 1)) %>%
  select(focalCd, age, slingDiff, slingCd) %>%
  filter(slingDiff > 0)

## two issues to fix manually:
t2.aster[t2.aster$focalCd %in% 'kj-09-14',]
t2.aster[t2.aster$focalCd %in% 'kj-09-14' & t2.aster$age %in% 10,'resp'] <- 2
t2.aster[t2.aster$focalCd %in% 'spp-13-17' & t2.aster$age %in% 6,'resp'] <- 1

rm(checkSlingsIDs)

vars <- factor(vars, levels = vars)

# Now add other columns for aster:

t2.aster = merge(
  x = t2.aster,
  y = data.frame(varb = vars, pred = pred, fam = fam),
  by = 'varb') %>%
  
  # Re-sort rows
  arrange(focalCd, varb) %>%
  
  # Add a column for the root (initial sample)
  mutate(root = 1)

# slingYr should be a factor
t2.aster = t2.aster %>%
  mutate(slingYr = factor(slingYr),
         site = as.factor(site))

# Re-order t2.aster by varb and focalCd (aster need this to work)
t2.aster$varb <- factor(t2.aster$varb,levels = vars)
t2.aster <- t2.aster %>%
  arrange(varb, focalCd) %>%
  mutate(slingYr = factor(slingYr))

# add 'layer' column
t2.aster$layer <- as.factor(ifelse(t2.aster$varb %in% 'slingCt0spring','emergence','survival'))

# Add `fit` columns of interest:

# First is for initial number of slings (slingCt0spring)
# t2.aster$initial_fit = as.numeric(t2.aster$varb %in% 'slingCt0spring')
# try version where initial_fit includes binomial and poisson components of emergence
t2.aster$initial_fit = as.numeric(t2.aster$varb %in% 'slingCt0spring')
t2.aster$initial_fit_ZP = as.numeric(t2.aster$varb %in% 'anySlings')

# Alternatively, final fitness node could be is for total number of slings at age6
t2.aster$final_fit = as.numeric(t2.aster$varb %in% 'slingCt8')

# BASIC DATA SUMMARIES ----------------------------------------------------

# Is it appropriate to use Zero-truncated Poisson distribution after accounting for 0/1 if
# any slings were produced?

t2.aster %>% 
  filter(age %in% '0spring' & resp > 0) %>% 
  ggplot(., aes(x = resp))+
  geom_histogram()

sum(t2.aster[t2.aster$age %in% "0spring","resp"]) # 914 seedlings found
sum(t2.aster[t2.aster$age %in% 7,"resp"]) # 117 by age 7
sum(t2.aster[t2.aster$age %in% 8,"resp"]) # 98 by age 8
nrow(t2.aster[t2.aster$age %in% "0spring" & t2.aster$resp == 0,]) # 972 circles with 0 seedling in initial search
nrow(t2.aster[t2.aster$age %in% "0spring",]) # out of 1278 circles to begin with
head(sort(t2.aster[t2.aster$age %in% "0spring","resp"], decreasing = T)) # circles with the most seedlings
t2.aster[t2.aster$resp %in% 45,] # find which ID had the most seedlings
t2.aster[t2.aster$focalCd %in% "ness-08-18",] # by age 8, this maternal plant had 3 surviving progeny from this mating bout

# MAKE ASTER MODELS -------------------------------------------------------

sub = aster(
  resp ~ varb + initial_fit:(searchArea) + layer:(hdCt + slingYr + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

initial_count = aster(
  resp ~ varb + initial_fit:(searchArea) + layer:(hdCt + slingYr + site) + initial_fit:(matingPotential),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

final_count = aster(
  resp ~ varb + initial_fit:(searchArea) + layer:(hdCt + slingYr + site) + final_fit:(matingPotential),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

super = aster(
  resp ~ varb + initial_fit:(searchArea) + layer:(hdCt + slingYr + site) + initial_fit:(matingPotential) + final_fit:(matingPotential),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# MODEL COMPARISONS -------------------------------------------------------

# Compare sub model to final count model
anova(sub, initial_count, super) 
anova(sub, final_count, super)

summary(sub, info.tol = 1e-7)
summary(initial_count, info.tol = 1e-8)
summary(final_count, info.tol = 1e-7)
summary(super, info.tol = 1e-8)

AIC.table(list(sub, initial_count, final_count, super))

# ASSESS RELATIVE EFFECTS OF MODEL COVARIATES -----------------------------

#### Model selection for mating potential variables ####

# Full Model
m1 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential + hdCt + slingYr + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Head Count
m2 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential         + slingYr + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Cohort
m3 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential + hdCt            + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Site
m4 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential + hdCt + slingYr        ),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Mating Potential
m5 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(                  hdCt + slingYr + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Head Count + Cohort
m6 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential +                + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Head Count + Site

m7 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential +        slingYr ),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Head Count + Mating Potential

m8 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(                       + slingYr + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Cohort + Site
m9 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential + hdCt                 ),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Cohort + Mating Potential

m10 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(                  hdCt          + site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Site + Mating Potential

m11 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(                  hdCt + slingYr      ),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Head Count + Cohort + Site

m12 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(matingPotential                      ),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Head Count + Cohort + Mating Potential
m13 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(                                 site),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Cohort + Site + Mating Potential
m14 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:(                  hdCt               ),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Ddrop Head Count + Mating Potential + Site
m15 = aster(
  resp ~ varb + initial_fit:(searchArea) + initial_fit:( slingYr),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# Drop Head Count + Site + Cohort + Mating Potential

m16 = aster(
  resp ~ varb + initial_fit:(searchArea),
  pred = pred, fam = fam, varvar = varb, idvar = focalCd, root = root, data = t2.aster)

# source('functions.R')
AIC.table(list(m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16))
# best model includes matingPotential, hdCt, and slingYr, and cohort
# second best drops mating potential

rm(m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16)

# Generate model predictions ----------------------------------------------

# Make a dataset for which to generate predictions
newdata = expand.grid(
  matingPotential = seq(from = min(sling.merged$matingPotential1), to = max(sling.merged$matingPotential1), length.out = 300),
  slingYr = 2012,
  searchArea = 41,
  site = 'sap',
  hdCt = 1,
  varb = vars,
  resp = 1,
  root = 1
)

newdata = merge(
  x = newdata,
  y = newdata %>% distinct(matingPotential, slingYr, hdCt, site, searchArea) %>% mutate(id = 1:nrow(.))
) %>%
  mutate(initial_fit = as.numeric(varb %in% 'slingCt0spring'),
         final_fit = as.numeric(varb %in% 'slingCt8'),
         slingYr = as.factor(slingYr)) %>%
  arrange(varb, id)
newdata$layer <- as.factor(ifelse(newdata$varb %in% 'slingCt0spring','emergence','survival'))

# Next, make predictions for slingCt0spring and slingCt8 with super model ####

# make an array corresponding to all level combinations of the predictors in the dataset and the nodes in the dataset
nLevels <- length(unique(paste(as.character(newdata$matingPotential), as.character(newdata$slingYr), as.character(newdata$hdCt))))
nnode <- length(vars)
amat <- array(0, c(nLevels, nnode, nLevels))


all_predictions <- data.frame()

for(i in 1:length(vars)){
  vari <- vars[i]
  
  # identify the indices of this array that correspond to "fitness", in this case slingCt at age0
  # if multiple nodes correspond to fitness, these will sum across level-combinations in the array (i think)
  foo <- grepl(vari, vars)
  for (k in 1:nLevels) {
    amat[k, foo, k] <- 1
  }
  
  # Run the prediction functions
  pout <- predict(super,
                  newdata = newdata,
                  varvar = varb, 
                  idvar = id, 
                  root = root, 
                  se.fit = TRUE, 
                  amat = amat)
  
  # Add to a dataframe with the predictions, standard error for each predictor level combo
  
  predictions <- newdata %>% 
    select(matingPotential, slingYr) %>% 
    distinct() %>% 
    mutate(pred = pout$fit,
           se = pout$se.fit,
           slingYr = as.factor(slingYr),
           varb = vari) 
  
  all_predictions <- rbind(all_predictions, predictions)
}

all_predictions <- all_predictions %>% 
  filter(!varb %in% 'anySlings')

yearCols <- brewer.pal(8, "Accent")



# FIGURE 2 ----------------------------------------------------------------

p3 <- all_predictions %>%
  ggplot(aes(x = matingPotential, y = pred, group = varb, color = varb, fill = varb)) +
  geom_ribbon(aes(ymin = pred - se, ymax = pred + se), alpha = 0.1, color = NA) +
  geom_line(aes(linewidth = varb),lineend = "round") +
  ylab('Predicted Offspring Count') +
  xlab('Mating Potential')+
  theme_custom()+
  theme(axis.text = element_text(size = 10, color = "black"),
        axis.title = element_text(size = 12, color = "black"),
        legend.position = "none")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  scale_color_manual(values=c('#70be44', rep("lightgray",8) , '#c3a3cd'))+
  scale_fill_manual(values=c('#70be44', rep("lightgray",8), '#c3a3cd'))+
  scale_linewidth_manual(values = c(1, rep(0.5,8),1))

p3

#### Make a plot with unconditional estimates by cohort year ####

# Make a dataset of hypothetical individuals to generate predictions
newdata = expand.grid( matingPotential = mean(sling.merged$matingPotential1, na.rm = T), slingYr = unique(t2.aster$slingYr),
  searchArea = 41, site = 'sap', hdCt = 1, varb = vars, resp = 1, root = 1 )

newdata = merge(
  x = newdata,y = newdata %>% distinct(matingPotential, slingYr, hdCt, site, searchArea) %>% mutate(id = 1:nrow(.))) %>%
  mutate(initial_fit = as.numeric(varb %in% 'slingCt0spring'),
         final_fit = as.numeric(varb %in% 'slingCt8'),
         slingYr = as.factor(slingYr)) %>%
  arrange(varb, id)

newdata$layer <- as.factor(ifelse(newdata$varb %in% 'slingCt0spring','emergence','survival'))

pout.noAmat <- predict(initial_count,
                       newdata = newdata,
                       varvar = varb, 
                       idvar = id, 
                       root = root, 
                       se.fit = TRUE)

# rows by cohort, columns represent each node
# values are estimated sling count for each cohort/node
fit.bynode <- matrix(pout.noAmat$fit, ncol = nnode)
se.bynode <- matrix(pout.noAmat$se.fit, ncol = nnode)

# reshape these matrices for plotting
row.names(fit.bynode) <- unique(t2.aster$slingYr)
colnames(fit.bynode) <- vars
fit.bynode.long <- melt(fit.bynode)
colnames(fit.bynode.long) <- c('Cohort','Node','resp')


# Add SE to fitness dataframe
se.bynode.long <- melt(se.bynode)
fit.bynode.long$SE <- se.bynode.long$value

# Remove anySlings node for plotting
fit.bynode.long <- fit.bynode.long %>% 
  filter(!Node %in% 'anySlings')

# Update Node Names
fit.bynode.long$Node <- sub('slingCt','',fit.bynode.long$Node) # remove "slingCt" prefix from node names
table(fit.bynode.long$Node)

fit.bynode.long$age <- factor(fit.bynode.long$Node, levels = c('0spring','0fall',1, 2, 3, 4, 5, 6, 7, 8))
fit.bynode.long$age1 <- as.numeric(fit.bynode.long$age)
fit.bynode.long$Cohort <- as.factor(fit.bynode.long$Cohort)

fit.bynode.long_cohort <- fit.bynode.long

t2.aster$age <- factor(t2.aster$age, levels = c('0spring','0fall',1, 2, 3, 4, 5, 6, 7, 8))
t2.aster$age1 <- as.numeric(t2.aster$age)

n <- length(unique(fit.bynode.long$Cohort))

## Version with only emergence and year 8, Main Text Figure
cohort1 <- ggplot(data = fit.bynode.long_cohort[fit.bynode.long_cohort$Node %in% c('0spring','8'),])+
  geom_ribbon(aes(x = age, ymin = resp - SE, ymax = resp + SE,group = Cohort, fill = Cohort), alpha = 0.1, color = NA) +
  geom_path(aes(x = age, y = resp, group = Cohort, color = Cohort), alpha = 0.8)+
  geom_point(aes(x = age, y = resp, group = Cohort, color = Cohort), size = 2, alpha = 0.9)+
  scale_color_manual(
    values = c(rcartocolor::carto_pal(n = n, name = "Bold")[1:n-1], "grey50")
  )+
  scale_fill_manual(
    values = c(rcartocolor::carto_pal(n = n, name = "Bold")[1:n-1], "grey50")
  )+
  theme_custom()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 12, color = "black"),
        legend.position = c(0.82, 0.75),
        legend.text = element_text(size = 8),
        legend.title = element_blank())+
  scale_x_discrete(labels = c("Emergence", "Year 8"),expand = c(0.15, 0.15)) +
  xlab('')+
  ylim(-0.05, 1.5)+
  ylab('Predicted Offspring Count')
cohort1

# Version with each node (Supplemental Figure)
ggplot(data = fit.bynode.long_cohort)+
  geom_ribbon(aes(x = age, ymin = resp - SE, ymax = resp + SE,group = Cohort, fill = Cohort), alpha = 0.1, color = NA) +
  geom_line(aes(x = age, y = resp, group = Cohort, color = Cohort), alpha = 0.8)+
  geom_point(aes(x = age, y = resp, group = Cohort, color = Cohort))+
  scale_color_manual(
    values = c(rcartocolor::carto_pal(n = n, name = "Bold")[1:n-1], "grey50")
  )+
  scale_fill_manual(
    values = c(rcartocolor::carto_pal(n = n, name = "Bold")[1:n-1], "grey50")
  )+
  theme_custom()+
  theme(legend.position = c(0.85, 0.65))+
  scale_x_discrete(labels = c("Emergence", "Yr0", "Yr1", "Yr2", "Yr3", "Yr4", 'Yr5', "Yr6", "Yr7", 'Yr8')) +
  xlab('')+
  ylab('Offspring Count')

#### Make a plot with unconditional estimates by site ####

# Make a dataset of hypothetical individuals to generate predictions
newdata = expand.grid(
  matingPotential = mean(sling.merged$matingPotential1, na.rm = T),
  slingYr = 2012,
  hdCt = 1,
  site = unique(t2.aster$site),
  searchArea = 41,
  varb = vars,
  resp = 1,
  root = 1
)

newdata = merge(
  x = newdata,
  y = newdata %>% distinct(matingPotential, slingYr, hdCt, site, searchArea) %>% mutate(id = 1:nrow(.))
) %>%
  mutate(initial_fit = as.numeric(varb %in% 'slingCt0spring'),
         final_fit = as.numeric(varb %in% 'slingCt8'),
         slingYr = as.factor(slingYr),
         site = as.factor(site)) %>%
  arrange(varb, id)
newdata$layer <- as.factor(ifelse(newdata$varb %in% 'slingCt0spring','emergence','survival'))


pout.noAmat <- predict(initial_count,
                       newdata = newdata,
                       varvar = varb, 
                       idvar = id, 
                       root = root, 
                       se.fit = TRUE)

# rows by site, columns represent each node
# values are estimated sling count for each site/node
fit.bynode <- matrix(pout.noAmat$fit, ncol = nnode)
se.bynode <- matrix(pout.noAmat$se.fit, ncol = nnode)

# reshape these matrices for plotting in ggplot2
row.names(fit.bynode) <- unique(t2.aster$site)
colnames(fit.bynode) <- vars
fit.bynode

fit.bynode.long <- melt(fit.bynode)
colnames(fit.bynode.long) <- c('Site','Node','resp')

# Add SE to fit dataframe
se.bynode.long <- melt(se.bynode)
fit.bynode.long$SE <- se.bynode.long$value

# Remove anySlings node for plotting
fit.bynode.long <- fit.bynode.long %>% 
  filter(!Node %in% 'anySlings')

fit.bynode.long$Node <- sub('slingCt','',fit.bynode.long$Node)
table(fit.bynode.long$Node)

fit.bynode.long$age <- factor(fit.bynode.long$Node, levels = c('0spring','0fall',1, 2, 3, 4, 5, 6, 7, 8))
fit.bynode.long$age1 <- as.numeric(fit.bynode.long$age)
fit.bynode.long$Site <- as.factor(fit.bynode.long$Site)

t2.aster$age <- factor(t2.aster$age, levels = c('0spring','0fall',1, 2, 3, 4, 5, 6, 7, 8))
t2.aster$age1 <- as.numeric(t2.aster$age)

fit.bynode.long_site <- fit.bynode.long %>% 
  filter(!Node %in% 'anySlings')

n <- length(unique(fit.bynode.long_site$Site))

siteCols <- c('#4b3561','#426291','#3596ba','#48d7b3','#5fea7f','#b0ff89','#e5ffb4',
              '#312031','#6d2d4a','#be2e30', '#e96045','#fea352',
              '#ffe15d','#f9fe71')

## Version with only emergence and year 8, GxE style plot ####
site1 <- ggplot(data = fit.bynode.long_site[fit.bynode.long_site$Node %in% c('0spring','8'),])+
  geom_ribbon(aes(x = age, ymin = resp - SE, ymax = resp + SE,group = Site, fill = Site), alpha = 0.1, color = NA) +
  geom_path(aes(x = age, y = resp, group = Site, color = Site), alpha = 0.8)+
  geom_point(aes(x = age, y = resp, group = Site, color = Site), size = 2, alpha = 0.9)+
  scale_color_manual(
    values =  siteCols)+
  scale_fill_manual(
    values =  siteCols)+
  theme_custom()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 12, color = "black"),
        legend.position = c(0.82, 0.75),
        legend.text = element_text(size = 8),
        legend.title = element_blank())+
  guides(color = guide_legend(ncol = 2))+ 
  ylim(-0.05, 1.5)+
  scale_x_discrete(labels = c("Emergence", "Year 8"),expand = c(0.15, 0.15)) +
  xlab('')+
  ylab('Predicted Offspring Count')
site1

# Supplemental Figure:
ggplot(data = fit.bynode.long_site)+
  geom_ribbon(aes(x = age, ymin = resp - SE, ymax = resp + SE,group = Site, fill = Site), alpha = 0.1, color = NA) +
  geom_line(aes(x = age, y = resp, group = Site, color = Site), alpha = 0.75)+
  geom_point(aes(x = age, y = resp, group = Site, color = Site), alpha = 0.75)+
  scale_color_manual(
    values =  siteCols)+
  scale_fill_manual(
    values =  siteCols)+
  theme_custom()+
  theme(legend.position = c(0.8, 0.65),
        legend.text = element_text(size = 8),
        legend.title = element_blank())+
  scale_x_discrete(labels = c("Emergence", "Yr0", "Yr1", "Yr2", "Yr3", "Yr4", 'Yr5', "Yr6", "Yr7", 'Yr8')) +
  xlab('')+
  ylab('Offspring Count')+
  guides(color = guide_legend(ncol = 2))


# FIGURE 3 ----------------------------------------------------------------

cohort1 + site1 + plot_annotation(tag_levels = 'A')

# OTHER DATA SUMMARIES ----------------------------------------------------

# Initial number of seedlings
t2.aster %>% 
  filter(varb == 'slingCt0spring') %>% 
  summarize(mean = mean(resp),
            median = median(resp),
            max = max(resp),
            min = min(resp),
            sd = sd(resp),
            q20 = quantile(resp, .80),
            q80 = quantile(resp, .95))

# Distribution of initial seedling counts
t2.aster %>% 
  filter(varb == 'slingCt0spring') %>% 
  group_by(resp) %>% 
  count()

# How many plants with at least 9 seedlings?
t2.aster %>% 
  filter(varb == 'slingCt0spring') %>%
  filter(resp > 8) %>% 
  count()

# Distribution of seedling counts by year 0 in the fall
t2.aster %>% 
  filter(varb == 'slingCt0fall') %>% 
  group_by(resp) %>% 
  count()

# Distribution of seedling counts by year 8
t2.aster %>% 
  filter(varb == 'slingCt8') %>% 
  group_by(resp) %>% 
  count()

# How many distinct site-years?
t2.aster %>% 
  distinct(site, slingYr) %>% 
  summarize(n())

# MAKE A FOREST PLOT FOR MODEL COEFFICIENTS -------------------------------

# Extract coefficients from model object:
object <- initial_count
info.tol = sqrt(.Machine$double.eps)
infomat <- object$fisher
fred <- eigen(infomat, symmetric = TRUE)
sally <- fred$values < max(fred$values) * info.tol

if (any(sally)) {
  cat("apparent null eigenvectors of information matrix\n")
  cat("directions of recession or constancy of log likelihood\n")
  print(zapsmall(fred$vectors[, sally]))
  stop("cannot compute standard errors")
}
foo <- object$coefficients
foo <- cbind(foo, sqrt(diag(solve(infomat))))
foo <- cbind(foo, foo[, 1]/foo[, 2])
foo <- cbind(foo, 2 * pnorm(-abs(foo[, 3])))
dimnames(foo) <- list(names(object$coefficients), c("Estimate", 
                                                    "StdError", "z value", "Pr(>|z|)"))
object$coefficients <- foo

model_summary <- as.data.frame(foo)
model_summary$Coefficient <- rownames(foo)

conf.level <- 0.95
crit <- qnorm((1 + conf.level) / 2)
fred <- summary(initial_count)

CI <- matrix(nrow = nrow(fred$coef), ncol = 2)
for(i in 1:nrow(fred$coef)){
  CI[i,] <- fred$coef[i, "Estimate"] +
    + c(-1, 1) * crit * fred$coef[i, "Std. Error"]
}
CI

model_summary$LLCI <- CI[,1] 
model_summary$ULCI <- CI[,2]


model_summary$names <- c("Intercept", "Year 0 (Spring)","Year 0 (Fall)", "Year 1",
                         "Year 2", "Year 3", "Year 4", "Year 5",
                         "Year 6", "Year 7", "Year 8", "Search Area",
                         "Head Count (Emergence)", "Head Count (Survival)",
                         "2008 (Emergence)", "2008 (Survival)",
                         "2009 (Emergence)", "2009 (Survival)",
                         "2010 (Emergence)", "2010 (Survival)",
                         "2011 (Emergence)", "2011 (Survival)",
                         "2012 (Emergence)", "2012 (Survival)",
                         "2013 (Emergence)", "2013 (Survival)",
                         "ERI (Emergence)", "ERI (Survival)",
                         "ETH (Emergence)", "ETH (Survival)",
                         "KJ (Emergence)", "KJ (Survival)",
                         "LC (Emergence)", "LC (Survival)",
                         "LF (Emergence)", "LF (Survival)",
                         "NESS (Emergence)", "NESS (Survival)",
                         "NGC (Emergence)", "NGC (Survival)",
                         "NWLF (Emergence)", "NWLF (Survival)",
                         "RI (Emergence)", "RI (Survival)",
                         "RNDT (Emergence)", "RNDT (Survival)",
                         "SAP (Emergence)", "SAP (Survival)",
                         "SGC (Emergence)", "SGC (Survival)",
                         "SPP (Emergence)", "SPP (Survival)",
                         "Mating Potential (Emergence)")
# "Mating Potential (Year 8)") # include if visualizing "super" model

model_summary$coef_type <- factor(c(rep("Graph Nodes",11), "Search Area", "Head Count", "Head Count",
                                    rep("Emergence Year", 12), rep("Site", 26), "Mating Potential"), levels = c("Graph Nodes", "Search Area", "Head Count", "Emergence Year", "Site", "Mating Potential"))

model_summary <- model_summary %>% 
  arrange(desc(coef_type), Estimate)

model_summary$names <- fct_inorder(model_summary$names)


# Make the forest plot
ggplot(model_summary[!model_summary$coef_type %in% 'Graph Nodes',])+
  geom_vline(xintercept = 0)+
  geom_point(aes(x = Estimate, y = names, shape = coef_type))+
  geom_linerange(aes(y = names, xmin = LLCI, xmax = ULCI))+
  ylab("")+
  xlab("Effect Size")+
  theme_classic()+
  theme(
    axis.text = element_text(size = 10, color = 'black'),
    # axis.text.y = element_text(size = 10, color = model_summary[!model_summary$coef_type %in% 'Graph Nodes','coef_type']),
    legend.justification = "top"
  )+
  labs(shape = "Coefficient Type")+
  # scale_color_manual(values = c("#4c5b5c","#ff715b","#f9cb40","#bced09","#2f52e0"))+
  scale_shape_manual(values=c(1, 2, 4, 21, 8))

# ASSESS OVERALL CORRELATION BETWEEN EMERGENCE AND FINAL COUNTS -----------

em <- t2.aster %>% 
  filter(t2.aster$age %in% '0spring') %>% 
  select(focalCd, resp) %>% 
  rename(seedlingCt = resp)
df <- t2.aster %>% 
  filter(t2.aster$age %in% '8') %>%
  rename(finalCt = resp) %>% 
  left_join(em,by = 'focalCd')

cor.test(df$seedlingCt, df$finalCt, method = 'kendall')

# Assess correlation excluding zero-counts:
cor.test(df[df$seedlingCt > 0, 'seedlingCt'], df[df$seedlingCt > 0, 'finalCt'], method = 'kendall')
df_nozeros <- df[df$seedlingCt > 0,]
ggplot(df_nozeros, aes(x = seedlingCt, y = finalCt))+
  geom_jitter(width = 0.1,height = 0.1, size = 2, shape = 21, color = 'black', fill = 'gray', alpha = 0.7)+
  theme_custom()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 12, color = "black"))+
  xlab('Number of Seedlings')+
  ylab('Number of Offspring at 8 Years')

# STOCHASTIC LOTTERY SIMULATION -------------------------------------------

set.seed(100)

# First, visualize expectations based on binomial model:

dd <- data.frame(expand.grid(chance = seq(from = 0, to = 0.5, length.out = 100), high = seq(from = 0, to = 25, length.out = 100)))

heatmap_2 <- ggplot(dd, aes(x = chance, y = high))+
  geom_tile(aes(fill = chance*high), color = NA)+
  geom_function(fun = function(x) 1/x, inherit.aes = F)+
  # geom_segment(aes(x = 0.11, xend = 0.11, y = 0, yend = 9), color = 'black', linetype = 'dashed', arrow = arrow(length = unit(0.1, "inches")))+
  # geom_segment(aes(x = 0.11, xend = 0, y = 9, yend = 9), color = 'black', linetype = 'dashed', arrow = arrow(length = unit(0.1, "inches") ))+
  geom_segment(aes(x = 0.11, xend = 0.11, y = 0, yend = 9), color = 'black', linetype = 'dashed')+
  geom_segment(aes(x = 0.11, xend = 0, y = 9, yend = 9), color = 'black', linetype = 'dashed')+
  theme_custom()+
  theme(legend.position = "bottom", legend.key.width = unit(1, "cm"),
        axis.text = element_text(size = 10, color = "black"),
        axis.title = element_text(size = 12, color = "black"))+
  scale_x_continuous(limits = c(0,0.5), expand = c(0, 0)) +
  scale_y_continuous(limits = c(0,25), expand = c(0, 0))+
  scale_fill_viridis_c(direction = -1)+
  labs(fill = "Advantage \nMaintained")+
  ylab('Advantage at Emergence')+
  xlab('Chance of Juvenile Survival')

# Next, simulate the process:

# chance you survive to age 8
c <- seq(from = 1/100, to = 1, length.out = 20)

# number of seedlings for a high-emergence maternal plant
# high <- seq(from = 1, to = 100, length.out = 20)
high <- seq(from = 0, to = 50, length.out = 20)

# number of seedlings for a low-emergence maternal plant
low <- 1

# simulate X times whether or not the seedlings from each maternal plant survive
niter <- 100

out <- data.frame(chance = numeric(), high = numeric(), high_survivors = numeric(), low_survivors = numeric(), niter = numeric())

for(j in 1:length(c)){ # loop through vector of chances of survival
  cj <- c[j] # chance of surival for iteration
  for(k in 1:length(high)){ # loop through vector of seedlings of high emergence maternal plant
    highk <- high[k] # number of offspring for a high emergence plant
    for(i in 1:niter){
      # Calculate number of survivors for high and low plants
      high_survivors <- sum(sample(c(1,0),size = highk, prob = c(cj, 1-cj), replace = T))
      low_survivors <- sum(sample(c(1,0),size = low, prob = c(cj, 1-cj), replace = T))
      out <- rbind(out, data.frame(chance = cj, high = highk, high_survivors = high_survivors, low_survivors = low_survivors, niter = i))
    }
  }
}

# see whether there is a difference between high and low emergence plants 
out$relative_fitness <- (out$high_survivors+1)/(out$low_survivors+1)
out_summary <- out %>% 
  group_by(chance, high) %>% 
  summarise(high_final = high_survivors,mean_w = mean(relative_fitness), var_w = var(relative_fitness))

# Visualize mean fitness advantage of high emergence maternal plants based ####
# on emergence advantage and chance of juvenile survival 

# Full range of data
ggplot(out_summary, aes(x = chance, y = high))+
  geom_tile(aes(fill = mean_w), color = 'black')+
  geom_line(data = data.frame(chance = c, ZNFI = 1/c), aes(x = chance, y= ZNFI), linewidth = 1)+
  geom_point(data = data.frame(chance = c, ZNFI = 1/c), aes(x = chance, y= ZNFI), size = 3)+
  theme_bw(base_size = 14)+
  scale_fill_viridis_b(direction = -1)+
  labs(fill = "Relative # Surviving\n Offspring (High/Low)")+
  ylab('Emergence Advantage')+
  xlab('Chance of Juvenile Survival')

# Zoom in on lower emergence advantages
heatmap <- ggplot(out_summary, aes(x = chance, y = high))+
  geom_tile(aes(fill = high_final), color = 'gray')+
  geom_function(fun = function(x) 1/x)+
  scale_x_continuous(limits = c(0,1), expand = c(0, 0)) +
  scale_y_continuous(limits = c(0,50), expand = c(0, 0))+
  theme_custom()+
  theme(legend.position = "bottom", legend.key.width = unit(1, "cm"),
        axis.text = element_text(size = 10, color = "black"),
        axis.title = element_text(size = 12, color = "black"))+
  geom_segment(aes(x = 0.11, xend = 0.11, y = 0, yend = 9), linetype = "dashed", color = 'black')+
  geom_segment(aes(x = 0, xend = 0.11, y = 9, yend = 9), linetype = "dashed", color = 'black')+
  scale_fill_viridis_c(direction = -1, breaks = c(1, 5 , 10, 15, 20, 40))+
  labs(fill = "Final Offspring\nCount")+
  ylab('Emergence Advantage')+
  xlab('Chance of Juvenile Survival')

heatmap

# Visualize variance in mean fitness advantage of high emergence maternal plants based ####
# on emergence advantage and chance of juvenile survival 

ggplot(out_summary, aes(x = chance, y = high))+
  geom_tile(aes(fill = var_w), color = 'black')+
  scale_fill_viridis_b(direction = -1)+
  ylab('Emergence Advantage')+
  xlab('Chance of Juvenile Survival')+
  theme_custom()+
  theme(legend.position = "right",
        axis.text = element_text(size = 10, color = "black"),
        axis.title = element_text(size = 12, color = "black"))

# Zoom in on variance in range close to what is observed observed in the data
out_summary %>% 
  filter(chance < 0.25 & high < 10) %>% 
  ggplot(., aes(x = chance, y = high))+
  geom_tile(aes(fill = var_w), color = 'black')+
  scale_fill_viridis_b(direction = -1)+
  ylab('Emergence Advantage')+
  xlab('Chance of Juvenile Survival')+
  theme_custom()+
  theme(legend.position = "right",
        axis.text = element_text(size = 10, color = "black"),
        axis.title = element_text(size = 12, color = "black"))

# Make a dataframe of all survival chances and # of offspring from maternal plant
all_combos <- expand.grid(c, high)
all_combos$prob <- all_combos$Var1 * all_combos$Var2
all_combos$yn_offspring <- ifelse(all_combos$prob >= 1, 1, 0) 

# Visualize emergence advantage multiplied by chance of survival
ggplot(all_combos, aes(y = Var2, x = Var1))+
  geom_tile(aes(fill = as.factor(yn_offspring)), color = 'black')+
  ylab('Emergence Advantage')+
  xlab('Chance of Juvenile Survival')+
  labs(fill = "Emergence Advantage x \nChance of Survival > 1")+
  theme_bw(base_size = 14)

#### Plot S = 0.11 and E = 1-10 with range of possible final offspring counts ####

# chance you survive to age 8
c <- 0.11

# number of seedlings for a high-emergence maternal plant
high <- 0:10

# number of seedlings for a low-emergence maternal plant
low <- 1

# simulate X times whether or not the seedlings from each maternal plant survive
niter <- 100

out11 <- data.frame(chance = numeric(), high = numeric(), high_survivors = numeric(), low_survivors = numeric(), niter = numeric())

for(j in 1:length(c)){ # loop through vector of chances of survival
  cj <- c[j] # chance of surival for iteration
  for(k in 1:length(high)){ # loop through vector of seedlings of high emergence maternal plan
    highk <- high[k] # number of offspring for a high emergence plant
    for(i in 1:niter){
      # Calculate number of survivors for high and low plants
      high_survivors <- sum(sample(c(1,0),size = highk, prob = c(cj, 1-cj), replace = T))
      low_survivors <- sum(sample(c(1,0),size = low, prob = c(cj, 1-cj), replace = T))
      out11 <- rbind(out11, data.frame(chance = cj, high = highk, high_survivors = high_survivors, low_survivors = low_survivors, niter = i))
    }
  }
}

table(out11$chance, out11$high)

scatter <- ggplot(out11[out11$high < 7,], aes(x = as.factor(high), y = high_survivors))+
  geom_hline(yintercept = 0.85, linetype = 'dashed')+
  xlab('Emergence Advantage')+
  ylab('Final Offspring Count')+
  geom_jitter(height = 0.1, width = 0.25, alpha = 0.2, size = 1)+
  theme_custom()+
  theme(axis.text = element_text(size = 10, color = "black"),
        axis.title = element_text(size = 12, color = "black"))

scatter

# FIGURE 4 ----------------------------------------------------------------
# Make a plot with the expectations heatmap and scatter plot from simulations
heatmap_2 + scatter + plot_annotation(tag_levels = 'A')