knitr::opts_chunk$set(echo = TRUE)
library(dplyr, ggplot2) # for data wrangling and plotting
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse) # for data wrangling
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here) # for file management
## Warning: package 'here' was built under R version 4.4.2
## here() starts at C:/Users/EU01224884/Desktop/Oviposition Paper Materials/Data
library(boot) # for bootstrapping
library(kableExtra) # for formatting tables
## Warning: package 'kableExtra' was built under R version 4.4.2
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(coxed)
## Warning: package 'coxed' was built under R version 4.4.2
## Loading required package: rms
## Loading required package: Hmisc
## Warning: package 'Hmisc' was built under R version 4.4.2
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:boot':
## 
##     aml
## 
## Loading required package: mgcv
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.4.2
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(gridtext)
## Warning: package 'gridtext' was built under R version 4.4.2
library(grid)

ovi.data<- read.csv("Oviposition Data 2023.csv")
sa.data <- read.csv("Oviposition SA 2023.csv")

#Setting seed so simulated output remains constant
set.seed(100)

Beginning with Dakota skipper analysis for all substrates

Cleaning data frame

heda.ovi.data <- filter(ovi.data, Spp == "HEDA")

#Using the group_by function to group dataset by ID and Substrate, then calculating total number of eggs and days for each
heda.ovi.data <- heda.ovi.data |>
  group_by(ID, Substrate) |>
  mutate(
    Total.eggs = sum(Eggs),
    N.days = n_distinct(Date)
  )

#Getting rid of date and eggs columns, no longer needed
heda.ovi.data <- subset(heda.ovi.data, select = -c(Date, Eggs))

#Removing duplicate data
heda.ovi.data <- heda.ovi.data[!duplicated(heda.ovi.data),]
heda.ovi.data <- arrange(heda.ovi.data, ID)

#Eliminating observation of egg laid in a vial (row #19, after arranging dataset based on ID)
heda.ovi.data <- heda.ovi.data[-c(19), ]

Creating data frame for HEDA SR’s

#Calculating total surface area
sa.total <- sum(sa.data$SA)

#Calculating SA proportions and putting them into new dataframe
prop.SA <- as.data.frame(sa.data[,2]/sa.total)
prop.SA <- cbind.data.frame(sa.data$Substrate, prop.SA)

names(prop.SA)[1] <- "Substrate"
names(prop.SA)[2] <- "Prop.SA"

#Observed total egg values for each individual
heda.ind.total <- aggregate(Total.eggs ~ ID, data=heda.ovi.data, sum)

#Merging total eggs dataframe w/ proportion dataframe to be able to calculate expected eggs
heda.exp.total <- merge(heda.ind.total, prop.SA)

#Multiplying total eggs by proportion to get expected eggs for each substrate
heda.exp.total$Exp.eggs <- (heda.exp.total$Total.eggs * heda.exp.total$Prop.SA)

#Removing the columns no longer needed
heda.exp.total <- subset(heda.exp.total, select = -c(Total.eggs, Prop.SA))
heda.exp.total <- arrange(heda.exp.total, ID)

#Summarizing observed eggs by individual and substrate
heda.obs.total <- subset(heda.ovi.data, select = -c(Spp, N.days))
heda.obs.total <- arrange(heda.obs.total, ID)

#Using a right join to combine the observed and expected dataframes
heda.obs.v.exp <- right_join(heda.obs.total, heda.exp.total)
## Joining with `by = join_by(ID, Substrate)`
#Replacing NA's with 0's
heda.obs.v.exp[is.na(heda.obs.v.exp)] <- 0

#Arranging dataframe based on Substrate
heda.obs.v.exp <- arrange(heda.obs.v.exp, Substrate)

Binning non-graminoid substrates together

#Making new dataframe with only non-graminoid substrates
heda.non.gram.sub <- heda.obs.v.exp[c(1:10, 31:40,51:60, 71:80),]

#Grouping by ID to sum total eggs and expected eggs across non-graminoid substrates
heda.non.gram.sub <- heda.non.gram.sub |>
  group_by(ID) |>
  mutate(y=sum(Total.eggs)) |>
  mutate(z=sum(Exp.eggs))

heda.non.gram.sub <- heda.non.gram.sub |> ungroup()

#Cleaning up the dataframe
heda.non.gram.sub <- subset(heda.non.gram.sub, select = -c(Total.eggs, Exp.eggs))
colnames(heda.non.gram.sub)[3] <- "Total.eggs"
colnames(heda.non.gram.sub)[4] <- "Exp.eggs"

heda.non.gram.sub <- subset(heda.non.gram.sub, select = -c(Substrate))
heda.non.gram.sub <- heda.non.gram.sub[!duplicated(heda.non.gram.sub),]
heda.non.gram.sub$Substrate <- c("ABIOTIC")

#Now making data frame with only the three graminoid substrates and flower
heda.gram.sub <- heda.obs.v.exp[c(11:30, 41:50, 61:70),]

#Combining the two dataframes
heda.bin.obs.v.exp <- rbind(heda.non.gram.sub, heda.gram.sub)

#Adding a column for selection ratios for each individual for each substrate (observed eggs per substrate / expected eggs per substrate)
heda.bin.obs.v.exp$SR <- (heda.bin.obs.v.exp$Total.eggs/heda.bin.obs.v.exp$Exp.eggs)

Converting binned data frame to wide format

w.heda.bin.obs.v.exp <- subset(heda.bin.obs.v.exp, select = -c(Total.eggs, Exp.eggs))
w.heda.bin.obs.v.exp <- w.heda.bin.obs.v.exp %>% pivot_wider(names_from = Substrate, values_from = SR)

w.heda.bin.obs.v.exp <- w.heda.bin.obs.v.exp |> ungroup()

Calculating the simple mean

heda.si.mean <- w.heda.bin.obs.v.exp %>%
  dplyr::summarize(across(2:6, mean, na.rm=TRUE))
## Warning: There was 1 warning in `dplyr::summarize()`.
## ℹ In argument: `across(2:6, mean, na.rm = TRUE)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
heda.si.mean <- heda.si.mean %>% pivot_longer(cols=c('ABIOTIC', 'FLOWER', 'PDS', 'SMBR', 'TS'),
                                    names_to='Substrate',
                                    values_to='Mean')

Creating function to use for bootstrapping

mean.fun <- function(data, inds){
  av <- data[inds,] |>
    
 dplyr::summarize(
   result = across(2:6, mean, na.rm=TRUE))
  return(as.numeric(av$result))
}

Bootstrapping

heda.bootreps <- boot(data = w.heda.bin.obs.v.exp, statistic = mean.fun, R = 9999)

heda.bootstats <- data.frame(heda.bootreps$t) # contains the bootstrap statistics as columns
names(heda.bootstats) <- t(heda.si.mean[,1]) # Assign names to the different columns
heda.bootstats <- heda.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

Graphing the bootstrap distributions

substrate.names <- c('ABIOTIC' = "Abiotic",
                     'FLOWER' = "Flower",
                     'PDS' = "Prairie dropseed",
                     'SMBR' = "Smooth brome",
                     'TS' = "Tussock sedge")

ggplot(heda.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(substrate.names), scales = "free") +
  geom_vline(aes(xintercept = Mean, col = "red"), heda.si.mean, size = 1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Calculating 95% BCa CI’s

heda.cidata <- NULL
for(i in c(1:5)){
  # CI for ith selection ratio
  temp <- boot.ci(heda.bootreps, index = i, type = c("bca"))
  heda.cidata <- rbind(heda.cidata, data.frame(Substrate = heda.si.mean$Substrate[i],
                                     LCL.bca = temp$bca[1,4],
                                     UCL.bca = temp$bca[1,5]))
}  
heda.cidata[,2:3] <-round(heda.cidata[,2:3], 2)
colnames(heda.cidata)[2:3] <- c("95% LCL (bca)", 
                           "95% UCL (bca)") 
rownames(heda.cidata) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
heda.cidata <- rownames_to_column(heda.cidata, "Sub")
heda.cidata <- subset(heda.cidata, select = -c(Substrate))
colnames(heda.cidata)[1] <- "Substrate"

kable(heda.cidata, caption = "Dakota skipper 95% BCa Confidence Intervals")
Dakota skipper 95% BCa Confidence Intervals
Substrate 95% LCL (bca) 95% UCL (bca)
ABIOTIC 0.43 0.68
FLOWER 0.35 10.37
PDS 9.16 21.41
SMBR 3.13 9.84
TS 3.99 7.66

Plotting the CI’s

#Need to add the SR's to the cidata data frame first, to plot
rownames(heda.si.mean) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
## Warning: Setting row names on a tibble is deprecated.
heda.si.mean <- rownames_to_column(heda.si.mean, "Sub")
heda.si.mean <- subset(heda.si.mean, select = -c(Substrate))
colnames(heda.si.mean)[1] <- "Substrate"
colnames(heda.si.mean)[2] <- "Estimate"

heda.cidata <- inner_join(heda.si.mean, heda.cidata, by = "Substrate")
colnames(heda.cidata)[3] <- "lower"
colnames(heda.cidata)[4] <- "upper"

#Now attempting to make plot
ggplot(heda.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  ylim(0.3, 22) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=15, face="bold"))

Now calculating the population-level SR’s

#Calculating the population-level SR dataframe
heda.all.egg <- sum(heda.bin.obs.v.exp$Total.eggs)

heda.egg.by.sub <- heda.bin.obs.v.exp |>
  group_by(Substrate) |>
  dplyr::summarize(
    sum(Total.eggs)
  )

heda.prop.egg <- as.data.frame(heda.egg.by.sub[,2]/heda.all.egg)
heda.prop.egg <- cbind.data.frame(heda.egg.by.sub$Substrate, heda.prop.egg)
names(heda.prop.egg)[1] <- "Substrate"
names(heda.prop.egg)[2] <- "Prop.Egg"

#Need to bin abiotic substrates together in prop.SA
non.gram.SA <- prop.SA[c(5:8),]
non.gram.SA2 <- data.frame(Substrate = c("ABIOTIC"),
                           Prop.SA = c(sum(non.gram.SA$Prop.SA)))
prop.SA2 <- prop.SA[c(1:4),]
prop.SA <- rbind(non.gram.SA2, prop.SA2)

#Continuing to calculate population-level SR's
heda.prop.egg.SA <- left_join(heda.prop.egg, prop.SA, by="Substrate")

heda.pop.lvl.SR <- as.data.frame((heda.prop.egg.SA$Prop.Egg)/(heda.prop.egg.SA$Prop.SA))
heda.pop.lvl.SR <- cbind.data.frame(heda.prop.egg.SA$Substrate, heda.pop.lvl.SR)
names(heda.pop.lvl.SR)[1] <- "Substrate"
names(heda.pop.lvl.SR)[2] <- "SR"

Creating a function for bootstrapping

heda.pop.fun <- function(heda.bootdat, prop.SA=prop.SA){
  # Now repeat all the same calculations (using bootdat instead of obs.v.exp)
  # I'll append a ".b" to all the names to make it clear that these are done
  # with the bootstrap data set
  heda.all.egg.b <-sum(heda.bootdat$Total.eggs)  
  heda.egg.by.sub.b <- heda.bootdat |>
  group_by(Substrate) |> # Group just by substrate
  dplyr::summarize(
    sum(Total.eggs)
  )
  heda.prop.egg.b <- as.data.frame(heda.egg.by.sub.b[,2]/heda.all.egg.b)
  heda.prop.egg.b <- cbind.data.frame(heda.egg.by.sub.b$Substrate, heda.prop.egg.b)
  names(heda.prop.egg.b)[1] <- "Substrate"  
  names(heda.prop.egg.b)[2] <- "Prop.Egg"
  heda.prop.egg.SA.b <- right_join(heda.prop.egg.b, prop.SA, by="Substrate")

  heda.pop.lvl.SR.b <- as.numeric((heda.prop.egg.SA.b$Prop.Egg)/(heda.prop.egg.SA.b$Prop.SA))
  return(heda.pop.lvl.SR.b)
}

Testing the above function (output should be the same)

print(heda.pop.lvl.SR)
##   Substrate         SR
## 1   ABIOTIC  0.5113668
## 2    FLOWER  1.2205286
## 3       PDS 15.7374268
## 4      SMBR  6.5540937
## 5        TS  7.0040625
heda.pop.fun(heda.bin.obs.v.exp, prop.SA = prop.SA)
## [1]  0.5113668  1.2205286 15.7374268  6.5540937  7.0040625

Bootstrapping and calculating BCa CI’s

heda.IDS<-unique(heda.bin.obs.v.exp$ID)
heda.pop.lvl.bootreps<-matrix(NA, 9999, 5)
for(i in 1:9999){
  # Sample IDs with replacement
  heda.bootIDs <- data.frame(ID = sample(heda.IDS, replace=TRUE))
  
  # Create bootstrap data set from the wide version
  heda.bootdat <- left_join(heda.bootIDs, heda.bin.obs.v.exp, relationship = "many-to-many", by = "ID") 
  heda.pop.lvl.bootreps[i,] <- heda.pop.fun(heda.bootdat, prop.SA = prop.SA) # calculate the bootstrap ratios and store them in the ith row of bootstats
}


## BCa intervals
heda.cis.bca <- matrix(NA, 2, 5)
for(i in 1:5){
  heda.cis.bca[,i] <- t(bca(heda.pop.lvl.bootreps[,i])) # transpose results so we can store using 2 rows, one for the upper CL and one for the lower
}
colnames(heda.cis.bca)<-colnames(heda.pop.lvl.SR$Substrate)

heda.cis.bca <- as.data.frame(heda.cis.bca)
rownames(heda.cis.bca) <- c("95% LCL (bca)", "95% UCL (bca)")
colnames(heda.cis.bca) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
kable(heda.cis.bca, caption = "Dakota skipper 95% BCa Confidence Intervals")
Dakota skipper 95% BCa Confidence Intervals
ABIOTIC FLOWER PDS SMBR TS
95% LCL (bca) 0.4340844 0.4444657 9.920977 4.707788 4.745685
95% UCL (bca) 0.5863133 3.5447975 20.163617 10.444100 8.713582

Putting bootstrap stats into a data frame and graphing

#Putting bootstrap stats into dataframe
heda.pop.lvl.bootstats <- data.frame(heda.pop.lvl.bootreps) # contains the bootstrap statistics as columns
names(heda.pop.lvl.bootstats) <- t(heda.pop.lvl.SR[,1]) # Assign names to the different columns
heda.pop.lvl.bootstats <- heda.pop.lvl.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

#Graphing
ggplot(heda.pop.lvl.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(substrate.names), scales = "free") +
  geom_vline(aes(xintercept = SR, col = "red"), data=heda.pop.lvl.SR, size=1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Graphing the CI’s

#Need to add the actual SR's to the cidata data frame first, to plot
heda.cis.bca <- heda.cis.bca |> pivot_longer(cols = c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS"),
                                   names_to = "Substrate",
                                   values_to = "lower")
heda.cis.bca <- heda.cis.bca |>
  group_by(Substrate) |>
  mutate(
    upper = max(lower)
  ) |>
  ungroup()


heda.cis.bca <- heda.cis.bca[-c(6:10),]

rownames(heda.pop.lvl.SR) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
heda.pop.lvl.SR <- rownames_to_column(heda.pop.lvl.SR, "Sub")
heda.pop.lvl.SR <- subset(heda.pop.lvl.SR, select = -c(Substrate))
colnames(heda.pop.lvl.SR)[1] <- "Substrate"
colnames(heda.pop.lvl.SR)[2] <- "Estimate"

heda.cis.bca <- inner_join(heda.cis.bca, heda.pop.lvl.SR, by = "Substrate")

#Now attempting to make plot
ggplot(heda.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=15, face="bold")) +
  ylim(0.4, 21)

Moving on to complete the Dakota skipper analysis with only the three graminoid substrates

Creating a data frame with only the graminoid substrates

#Subsetting the SA data to only include the three graminoid substrates and calculating total SA for only graminoids
pl.sa.data <- sa.data[c(1:3),]
pl.sa.total <- sum(pl.sa.data$SA)

#Calculating new SA proportions and putting them into data frame
pl.prop.sa <- as.data.frame(pl.sa.data[,2]/pl.sa.total)
pl.prop.sa <- cbind.data.frame(pl.sa.data$Substrate, pl.prop.sa)

names(pl.prop.sa)[1] <- "Substrate"
names(pl.prop.sa)[2] <- "Prop.SA"

#Finding observed eggs on the graminoid substrates
heda.pl.ovi.dat <- heda.ovi.data
heda.pl.ovi.dat$exclude = 0
heda.pl.ovi.dat$exclude[heda.pl.ovi.dat$Substrate == 'FLOWER'] = 1
heda.pl.ovi.dat$exclude[heda.pl.ovi.dat$Substrate == 'CAGE'] = 1
heda.pl.ovi.dat$exclude[heda.pl.ovi.dat$Substrate == 'TUBE'] = 1
heda.pl.ovi.dat$exclude[heda.pl.ovi.dat$Substrate == 'POT'] = 1
heda.pl.ovi.dat$exclude[heda.pl.ovi.dat$Substrate == 'SOIL'] = 1
heda.pl.ovi.dat$exclude[heda.pl.ovi.dat$Substrate == 'VIAL'] = 1
heda.pl.ovi.dat = subset(heda.pl.ovi.dat, exclude == 0)

heda.pl.ovi.dat <- heda.pl.ovi.dat[-c(6)]
heda.pl.ovi.dat <- arrange(heda.pl.ovi.dat, Substrate)

#Observed individual total for only graminoids
heda.pl.ind.total <- aggregate(Total.eggs ~ ID, data=heda.pl.ovi.dat, sum)

#Merging total eggs dataframe w/ proportion dataframe to be able to calculate expected eggs
heda.pl.exp.total <- merge(heda.pl.ind.total, pl.prop.sa)

#Multiplying total eggs by proportion to get expected eggs for each substrate
heda.pl.exp.total$Exp.eggs <- (heda.pl.exp.total$Total.eggs * heda.pl.exp.total$Prop.SA)

#Removing the columns no longer needed
heda.pl.exp.total <- subset(heda.pl.exp.total, select = -c(Total.eggs, Prop.SA))
heda.pl.exp.total <- arrange(heda.pl.exp.total, ID)

#Summarizing observed eggs by individual and substrate
heda.pl.obs.total <- subset(heda.pl.ovi.dat, select = -c(Spp, N.days))
heda.pl.obs.total <- arrange(heda.pl.obs.total, ID)

Combining the observed and expected data frames

#Using a right join to combine the observed and expected dataframes
heda.pl.obs.v.exp <- right_join(heda.pl.obs.total, heda.pl.exp.total)
## Joining with `by = join_by(ID, Substrate)`
#Replacing NA's with 0's
heda.pl.obs.v.exp[is.na(heda.pl.obs.v.exp)] <- 0

#Adding a column for selection ratios for each individual for each substrate (observed eggs per substrate / expected eggs per substrate)
heda.pl.obs.v.exp$SR <- (heda.pl.obs.v.exp$Total.eggs/heda.pl.obs.v.exp$Exp.eggs)

#Arranging dataframe based on Substrate
heda.pl.obs.v.exp <- arrange(heda.pl.obs.v.exp, Substrate)

Converting data to wide format and only including information needed for bootstrapping

w.heda.pl.obs.v.exp <- subset(heda.pl.obs.v.exp, select = -c(Total.eggs, Exp.eggs))
w.heda.pl.obs.v.exp <- w.heda.pl.obs.v.exp %>% pivot_wider(names_from = Substrate, values_from = SR)

w.heda.pl.obs.v.exp <- w.heda.pl.obs.v.exp |> ungroup()

Calculating the simple mean and saving as new object

heda.pl.si.mean <- w.heda.pl.obs.v.exp %>%
  dplyr::summarize(across(2:4, mean, na.rm=TRUE))
heda.pl.si.mean <- heda.pl.si.mean %>% pivot_longer(cols=c('PDS','SMBR','TS'),
                                    names_to='Substrate',
                                    values_to='Mean')

Creating a function to calculate mean

pl.mean.fun <- function(data, inds){
  av <- data[inds,] |>
    
 dplyr::summarize(
   result = across(2:4, mean, na.rm=TRUE))
  return(as.numeric(av$result))
}

#Bootstrapping
heda.pl.bootreps <- boot(data = w.heda.pl.obs.v.exp, statistic = pl.mean.fun, R = 9999)

Creating data frame with bootstrap statistics

heda.pl.bootstats <- data.frame(heda.pl.bootreps$t) # contains the bootstrap statistics as columns
names(heda.pl.bootstats) <- t(heda.pl.si.mean[,1]) # Assign names to the different columns
heda.pl.bootstats <- heda.pl.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

Graphing the bootstrap statistics

pl.substrate.names <- c('PDS' = "Prairie dropseed",
                     'SMBR' = "Smooth brome",
                     'TS' = "Tussock sedge")

ggplot(heda.pl.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(pl.substrate.names), scales = "free") +
  geom_vline(aes(xintercept = Mean, col = "red"), heda.pl.si.mean, size=1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Calculating CI’s

heda.pl.cidata <- NULL
for(i in c(1:3)){
  # CI for ith selection ratio
  temp <- boot.ci(heda.pl.bootreps, index = i, type = c("bca"))
  heda.pl.cidata <- rbind(heda.pl.cidata, data.frame(Substrate = heda.pl.si.mean$Substrate[i],
                                     LCL.bca = temp$bca[1,4],
                                     UCL.bca = temp$bca[1,5]))
}  
heda.pl.cidata[,2:3] <-round(heda.pl.cidata[,2:3], 2)
colnames(heda.pl.cidata)[2:3] <- c("95% LCL (bca)", 
                           "95% UCL (bca)") 
rownames(heda.pl.cidata) <- c("PDS", "SMBR", "TS")
heda.pl.cidata <- rownames_to_column(heda.pl.cidata, "Sub")
heda.pl.cidata <- subset(heda.pl.cidata, select=-c(Substrate))
colnames(heda.pl.cidata)[1] <- "Substrate"
kable(heda.pl.cidata, caption = "Dakota skipper 95% BCa Confidence Intervals (graminoids)")
Dakota skipper 95% BCa Confidence Intervals (graminoids)
Substrate 95% LCL (bca) 95% UCL (bca)
PDS 1.06 2.64
SMBR 0.35 0.97
TS 0.58 1.76

Plotting CI’s

#Need to add the actual SR's to the cidata data frame first, to plot
rownames(heda.pl.si.mean) <- c("PDS", "SMBR", "TS")
## Warning: Setting row names on a tibble is deprecated.
heda.pl.si.mean <- rownames_to_column(heda.pl.si.mean, "Sub")
heda.pl.si.mean <- subset(heda.pl.si.mean, select = -c(Substrate))
colnames(heda.pl.si.mean)[1] <- "Substrate"
colnames(heda.pl.si.mean)[2] <- "Estimate"

heda.pl.cidata <- inner_join(heda.pl.si.mean, heda.pl.cidata, by = "Substrate")
colnames(heda.pl.cidata)[3] <- "lower"
colnames(heda.pl.cidata)[4] <- "upper"

#Now attempting to make plot
ggplot(heda.pl.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 15, face = "bold")) +
  ylim(0.2, 2.8)

Repeating the above steps but for population-level SR’s

#Calculating the population-level SR dataframe
heda.pl.all.egg <- sum(heda.pl.obs.v.exp$Total.eggs)

heda.pl.egg.by.sub <- heda.pl.obs.v.exp |>
  group_by(Substrate) |> # Group just by substrate
  dplyr::summarize(
    sum(Total.eggs)
  )

heda.pl.prop.egg <- as.data.frame(heda.pl.egg.by.sub[,2]/heda.pl.all.egg)
heda.pl.prop.egg <- cbind.data.frame(heda.pl.egg.by.sub$Substrate, heda.pl.prop.egg)
names(heda.pl.prop.egg)[1] <- "Substrate"
names(heda.pl.prop.egg)[2] <- "Prop.Egg"

heda.pl.prop.egg.SA <- left_join(heda.pl.prop.egg, pl.prop.sa, by="Substrate")

heda.pl.pop.lvl.SR <- as.data.frame((heda.pl.prop.egg.SA$Prop.Egg)/(heda.pl.prop.egg.SA$Prop.SA))
heda.pl.pop.lvl.SR <- cbind.data.frame(heda.pl.prop.egg.SA$Substrate, heda.pl.pop.lvl.SR)
names(heda.pl.pop.lvl.SR)[1] <- "Substrate"
names(heda.pl.pop.lvl.SR)[2] <- "SR"

Creating function to calculate population-level SR’s to use in a bootstrap

heda.pl.pop.fun <- function(heda.pl.bootdat, pl.prop.sa=pl.prop.sa){
  # Now repeat all the same calculations (using bootdat instead of obs.v.exp)
  # I'll append a ".b" to all the names to make it clear that these are done
  # with the bootstrap data set
  heda.pl.all.egg.b <-sum(heda.pl.bootdat$Total.eggs)  
  heda.pl.egg.by.sub.b <- heda.pl.bootdat |>
  group_by(Substrate) |> # Group just by substrate
  dplyr::summarize(
    sum(Total.eggs)
  )
  heda.pl.prop.egg.b <- as.data.frame(heda.pl.egg.by.sub.b[,2]/heda.pl.all.egg.b)
  heda.pl.prop.egg.b <- cbind.data.frame(heda.pl.egg.by.sub.b$Substrate, heda.pl.prop.egg.b)
  names(heda.pl.prop.egg.b)[1] <- "Substrate"  
  names(heda.pl.prop.egg.b)[2] <- "Prop.Egg"
  heda.pl.prop.egg.SA.b <- right_join(heda.pl.prop.egg.b, pl.prop.sa, by="Substrate")

  heda.pl.pop.lvl.SR.b <- as.numeric((heda.pl.prop.egg.SA.b$Prop.Egg)/(heda.pl.prop.egg.SA.b$Prop.SA))
  return(heda.pl.pop.lvl.SR.b)
}

Testing to make sure above function works (output should be the same)

print(heda.pl.pop.lvl.SR)
##   Substrate        SR
## 1       PDS 1.7930086
## 2      SMBR 0.7467261
## 3        TS 0.7979922
heda.pl.pop.fun(heda.pl.obs.v.exp, pl.prop.sa = pl.prop.sa)
## [1] 1.7930086 0.7467261 0.7979922

Bootstrapping and calculating CI’s

heda.pl.IDS<-unique(heda.pl.obs.v.exp$ID)
heda.pl.pop.lvl.bootreps<-matrix(NA, 9999, 3)
for(i in 1:9999){
  # Sample IDs with replacement
  heda.pl.bootIDs <- data.frame(ID = sample(heda.pl.IDS, replace=TRUE))
  
  # Create bootstrap data set from the wide version
  heda.pl.bootdat <- left_join(heda.pl.bootIDs, heda.pl.obs.v.exp, relationship = "many-to-many", by = "ID") 
  heda.pl.pop.lvl.bootreps[i,] <- heda.pl.pop.fun(heda.pl.bootdat, pl.prop.sa = pl.prop.sa) # calculate the bootstrap ratios and store them in the ith row of bootstats
}


## Bca intervals
heda.pl.cis.bca <- matrix(NA, 2, 3)
for(i in 1:3){
  heda.pl.cis.bca[,i] <- t(bca(heda.pl.pop.lvl.bootreps[,i])) # transpose results so we can store using 2 rows, one for the upper CL and one for the lower
}
colnames(heda.pl.cis.bca)<- heda.pl.pop.lvl.SR$Substrate

heda.pl.cis.bca <- as.data.frame(heda.pl.cis.bca)
rownames(heda.pl.cis.bca) <- c("95% LCL (bca)", "95% UCL (bca)")
colnames(heda.pl.cis.bca) <- c("PDS", "SMBR", "TS")
kable(heda.pl.cis.bca, caption = "Dakota skipper 95% BCa Confidence Intervals (graminoids)")
Dakota skipper 95% BCa Confidence Intervals (graminoids)
PDS SMBR TS
95% LCL (bca) 1.241018 0.5513416 0.518685
95% UCL (bca) 2.255583 1.0490077 1.129907

Putting bootstrap statistics into a data frame, then graphing

#Putting bootstrap stats into dataframe
heda.pl.pop.lvl.bootstats <- data.frame(heda.pl.pop.lvl.bootreps) # contains the bootstrap statistics as columns
names(heda.pl.pop.lvl.bootstats) <- t(heda.pl.pop.lvl.SR[,1]) # Assign names to the different columns
heda.pl.pop.lvl.bootstats <- heda.pl.pop.lvl.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

#Graphing
ggplot(heda.pl.pop.lvl.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(pl.substrate.names), scales = "free") +
  geom_vline(aes(xintercept = SR, col = "red"), data=heda.pl.pop.lvl.SR, size=1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plotting CI’s

#Need to add the actual SR's to the cidata data frame first, to plot
heda.pl.cis.bca <- heda.pl.cis.bca |> pivot_longer(cols = c("PDS", "SMBR", "TS"),
                                   names_to = "Substrate",
                                   values_to = "lower")
heda.pl.cis.bca <- heda.pl.cis.bca |>
  group_by(Substrate) |>
  mutate(
    upper = max(lower)
  ) |>
  ungroup()


heda.pl.cis.bca <- heda.pl.cis.bca[-c(4:6),]

rownames(heda.pl.pop.lvl.SR) <- c("PDS", "SMBR", "TS")
heda.pl.pop.lvl.SR <- rownames_to_column(heda.pl.pop.lvl.SR, "Sub")
heda.pl.pop.lvl.SR <- subset(heda.pl.pop.lvl.SR, select = -c(Substrate))
colnames(heda.pl.pop.lvl.SR)[1] <- "Substrate"
colnames(heda.pl.pop.lvl.SR)[2] <- "Estimate"

heda.pl.cis.bca <- inner_join(heda.pl.cis.bca, heda.pl.pop.lvl.SR, by = "Substrate")

#Now attempting to make plot
ggplot(heda.pl.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 15, face = "bold")) +
  ylim(0.4, 2.3)

This marks the end of the Dakota skipper analysis. Moving on to conduct the same analysis with Poweshiek skipperling

oapo.ovi.data <- filter(ovi.data, Spp == "OAPO")

#Using the group_by function to group dataset by ID and Substrate, then calculating total number of eggs and days for each
oapo.ovi.data <- oapo.ovi.data |>
  group_by(ID, Substrate) |>
  mutate(
    Total.eggs = sum(Eggs),
    N.days = n_distinct(Date)
  )

#Getting rid of date and eggs columns, no longer needed
oapo.ovi.data <- subset(oapo.ovi.data, select = -c(Date, Eggs))

#Removing duplicate data
oapo.ovi.data <- oapo.ovi.data[!duplicated(oapo.ovi.data),]
oapo.ovi.data <- arrange(oapo.ovi.data, ID)

Creating data frame for OAPO SR’s

#Calculating SA proportions and putting them into new dataframe
prop.SA.b <- as.data.frame(sa.data[,2]/sa.total)
prop.SA.b <- cbind.data.frame(sa.data$Substrate, prop.SA.b)

names(prop.SA.b)[1] <- "Substrate"
names(prop.SA.b)[2] <- "Prop.SA"

#Observed total egg values for each individual
oapo.ind.total <- aggregate(Total.eggs ~ ID, data=oapo.ovi.data, sum)

#Merging total eggs dataframe w/ proportion dataframe to be able to calculate expected eggs
oapo.exp.total <- merge(oapo.ind.total, prop.SA.b)

#Multiplying total eggs by proportion to get expected eggs for each substrate
oapo.exp.total$Exp.eggs <- (oapo.exp.total$Total.eggs * oapo.exp.total$Prop.SA)

#Removing the columns no longer needed
oapo.exp.total <- subset(oapo.exp.total, select = -c(Total.eggs, Prop.SA))
oapo.exp.total <- arrange(oapo.exp.total, ID)

#Summarizing observed eggs by individual and substrate
oapo.obs.total <- subset(oapo.ovi.data, select = -c(Spp, N.days))
oapo.obs.total <- arrange(oapo.obs.total, ID)

#Using a right join to combine the observed and expected dataframes
oapo.obs.v.exp <- right_join(oapo.obs.total, oapo.exp.total)
## Joining with `by = join_by(ID, Substrate)`
#Replacing NA's with 0's
oapo.obs.v.exp[is.na(oapo.obs.v.exp)] <- 0

#Arranging dataframe based on Substrate
oapo.obs.v.exp <- arrange(oapo.obs.v.exp, Substrate)

Binning non-graminoid substrates together

#Making new dataframe with only non-graminoid substrates
oapo.non.gram.sub <- oapo.obs.v.exp[c(1:13, 40:52, 66:78, 92:104),]

#Grouping by ID to sum total eggs and expected eggs across non-graminoid substrates
oapo.non.gram.sub <- oapo.non.gram.sub |>
  group_by(ID) |>
  mutate(y=sum(Total.eggs)) |>
  mutate(z=sum(Exp.eggs))

oapo.non.gram.sub <- oapo.non.gram.sub |> ungroup()

#Cleaning up the dataframe
oapo.non.gram.sub <- subset(oapo.non.gram.sub, select = -c(Total.eggs, Exp.eggs))
colnames(oapo.non.gram.sub)[3] <- "Total.eggs"
colnames(oapo.non.gram.sub)[4] <- "Exp.eggs"

oapo.non.gram.sub <- subset(oapo.non.gram.sub, select = -c(Substrate))
oapo.non.gram.sub <- oapo.non.gram.sub[!duplicated(oapo.non.gram.sub),]
oapo.non.gram.sub$Substrate <- c("ABIOTIC")

#Now making data frame with only the three graminoid substrates and flower
oapo.gram.sub <- oapo.obs.v.exp[c(14:39, 53:65, 79:91),]

#Combining the two dataframes
oapo.bin.obs.v.exp <- rbind(oapo.non.gram.sub, oapo.gram.sub)

#Adding a column for selection ratios for each individual for each substrate (observed eggs per substrate / expected eggs per substrate)
oapo.bin.obs.v.exp$SR <- (oapo.bin.obs.v.exp$Total.eggs/oapo.bin.obs.v.exp$Exp.eggs)

Converting binned data frame into wide format

w.oapo.bin.obs.v.exp <- subset(oapo.bin.obs.v.exp, select = -c(Total.eggs, Exp.eggs))
w.oapo.bin.obs.v.exp <- w.oapo.bin.obs.v.exp %>% pivot_wider(names_from = Substrate, values_from = SR)

w.oapo.bin.obs.v.exp <- w.oapo.bin.obs.v.exp |> ungroup()

Calculating simple mean

oapo.si.mean <- w.oapo.bin.obs.v.exp %>%
  dplyr::summarize(across(2:6, mean, na.rm=TRUE))
oapo.si.mean <- oapo.si.mean %>% pivot_longer(cols=c('ABIOTIC', 'FLOWER', 'PDS', 'SMBR', 'TS'),
                                    names_to='Substrate',
                                    values_to='Mean')

Bootstrapping using the mean.fun function created earlier

oapo.bootreps <- boot(data = w.oapo.bin.obs.v.exp, statistic = mean.fun, R = 9999)

oapo.bootstats <- data.frame(oapo.bootreps$t) # contains the bootstrap statistics as columns
names(oapo.bootstats) <- t(oapo.si.mean[,1]) # Assign names to the different columns
oapo.bootstats <- oapo.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

Graphing the bootstrap distributions

ggplot(oapo.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(substrate.names), scales = "free") +
  geom_vline(aes(xintercept = Mean, col = "red"), oapo.si.mean, size = 1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Calculating 95% CI’s

oapo.cidata <- NULL
for(i in c(1:5)){
  # CI for ith selection ratio
  temp <- boot.ci(oapo.bootreps, index = i, type = c("bca"))
  oapo.cidata <- rbind(oapo.cidata, data.frame(Substrate = oapo.si.mean$Substrate[i],
                                     LCL.bca = temp$bca[1,4],
                                     UCL.bca = temp$bca[1,5]))
}  
oapo.cidata[,2:3] <-round(oapo.cidata[,2:3], 2)
colnames(oapo.cidata)[2:3] <- c("95% LCL (bca)", 
                           "95% UCL (bca)") 
rownames(oapo.cidata) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
oapo.cidata <- rownames_to_column(oapo.cidata, "Sub")
oapo.cidata <- subset(oapo.cidata, select = -c(Substrate))
colnames(oapo.cidata)[1] <- "Substrate"

kable(oapo.cidata, caption = "Poweshiek skipperling 95% BCa Confidence Intervals")
Poweshiek skipperling 95% BCa Confidence Intervals
Substrate 95% LCL (bca) 95% UCL (bca)
ABIOTIC 0.52 0.84
FLOWER 0.57 2.86
PDS 6.19 17.31
SMBR 3.21 7.66
TS 2.27 6.51

Plotting the CI’s

#Need to add the SR's to the cidata data frame first, to plot
rownames(oapo.si.mean) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
## Warning: Setting row names on a tibble is deprecated.
oapo.si.mean <- rownames_to_column(oapo.si.mean, "Sub")
oapo.si.mean <- subset(oapo.si.mean, select = -c(Substrate))
colnames(oapo.si.mean)[1] <- "Substrate"
colnames(oapo.si.mean)[2] <- "Estimate"

oapo.cidata <- inner_join(oapo.si.mean, oapo.cidata, by = "Substrate")
colnames(oapo.cidata)[3] <- "lower"
colnames(oapo.cidata)[4] <- "upper"

#Now attempting to make plot
ggplot(oapo.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  ylim(0.3, 22) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=15, face="bold"))

Now calculating the population-level SR’s

#Calculating the population-level SR dataframe
oapo.all.egg <- sum(oapo.bin.obs.v.exp$Total.eggs)

oapo.egg.by.sub <- oapo.bin.obs.v.exp |>
  group_by(Substrate) |>
  dplyr::summarize(
    sum(Total.eggs)
  )

oapo.prop.egg <- as.data.frame(oapo.egg.by.sub[,2]/oapo.all.egg)
oapo.prop.egg <- cbind.data.frame(oapo.egg.by.sub$Substrate, oapo.prop.egg)
names(oapo.prop.egg)[1] <- "Substrate"
names(oapo.prop.egg)[2] <- "Prop.Egg"

oapo.prop.egg.SA <- left_join(oapo.prop.egg, prop.SA, by="Substrate")

oapo.pop.lvl.SR <- as.data.frame((oapo.prop.egg.SA$Prop.Egg)/(oapo.prop.egg.SA$Prop.SA))
oapo.pop.lvl.SR <- cbind.data.frame(oapo.prop.egg.SA$Substrate, oapo.pop.lvl.SR)
names(oapo.pop.lvl.SR)[1] <- "Substrate"
names(oapo.pop.lvl.SR)[2] <- "SR"

Creating a function for bootstrapping

oapo.pop.fun <- function(oapo.bootdat, prop.SA=prop.SA){
  # Now repeat all the same calculations (using bootdat instead of obs.v.exp)
  # I'll append a ".b" to all the names to make it clear that these are done
  # with the bootstrap data set
  oapo.all.egg.b <-sum(oapo.bootdat$Total.eggs)  
  oapo.egg.by.sub.b <- oapo.bootdat |>
  group_by(Substrate) |> # Group just by substrate
  dplyr::summarize(
    sum(Total.eggs)
  )
  oapo.prop.egg.b <- as.data.frame(oapo.egg.by.sub.b[,2]/oapo.all.egg.b)
  oapo.prop.egg.b <- cbind.data.frame(oapo.egg.by.sub.b$Substrate, oapo.prop.egg.b)
  names(oapo.prop.egg.b)[1] <- "Substrate"  
  names(oapo.prop.egg.b)[2] <- "Prop.Egg"
  oapo.prop.egg.SA.b <- right_join(oapo.prop.egg.b, prop.SA, by="Substrate")

  oapo.pop.lvl.SR.b <- as.numeric((oapo.prop.egg.SA.b$Prop.Egg)/(oapo.prop.egg.SA.b$Prop.SA))
  return(oapo.pop.lvl.SR.b)
}

Testing above function

print(oapo.pop.lvl.SR)
##   Substrate         SR
## 1   ABIOTIC  0.5756233
## 2    FLOWER  1.0621716
## 3       PDS 13.6429045
## 4      SMBR  6.8300755
## 5        TS  4.9887873
oapo.pop.fun(oapo.bin.obs.v.exp, prop.SA = prop.SA)
## [1]  0.5756233  1.0621716 13.6429045  6.8300755  4.9887873

Bootstrapping and calculating CI’s

oapo.IDS<-unique(oapo.bin.obs.v.exp$ID)
oapo.pop.lvl.bootreps<-matrix(NA, 9999, 5)
for(i in 1:9999){
  # Sample IDs with replacement
  oapo.bootIDs <- data.frame(ID = sample(oapo.IDS, replace=TRUE))
  
  # Create bootstrap data set from the wide version
  oapo.bootdat <- left_join(oapo.bootIDs, oapo.bin.obs.v.exp, relationship = "many-to-many", by = "ID") 
  oapo.pop.lvl.bootreps[i,] <- oapo.pop.fun(oapo.bootdat, prop.SA = prop.SA) # calculate the bootstrap ratios and store them in the ith row of bootstats
}


## BCa intervals
oapo.cis.bca <- matrix(NA, 2, 5)
for(i in 1:5){
  oapo.cis.bca[,i] <- t(bca(oapo.pop.lvl.bootreps[,i])) # transpose results so we can store using 2 rows, one for the upper CL and one for the lower
}
colnames(oapo.cis.bca)<-colnames(oapo.pop.lvl.SR$Substrate)

oapo.cis.bca <- as.data.frame(oapo.cis.bca)
rownames(oapo.cis.bca) <- c("95% LCL (bca)", "95% UCL (bca)")
colnames(oapo.cis.bca) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
kable(oapo.cis.bca, caption = "Poweshiek skipperling 95% BCa Confidence Intervals")
Poweshiek skipperling 95% BCa Confidence Intervals
ABIOTIC FLOWER PDS SMBR TS
95% LCL (bca) 0.4324908 0.5313276 8.152304 4.582132 3.139106
95% UCL (bca) 0.7446029 1.7054636 20.636942 8.692303 7.653885

Putting bootstrap stats into data frame and graphing

#Putting bootstrap stats into dataframe
oapo.pop.lvl.bootstats <- data.frame(oapo.pop.lvl.bootreps) # contains the bootstrap statistics as columns
names(oapo.pop.lvl.bootstats) <- t(oapo.pop.lvl.SR[,1]) # Assign names to the different columns
oapo.pop.lvl.bootstats <- oapo.pop.lvl.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

#Graphing
ggplot(oapo.pop.lvl.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(substrate.names), scales = "free") +
  geom_vline(aes(xintercept = SR, col = "red"), data=oapo.pop.lvl.SR, size=1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plotting CI’s

#Need to add the actual SR's to the cidata data frame first, to plot
oapo.cis.bca <- oapo.cis.bca |> pivot_longer(cols = c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS"),
                                   names_to = "Substrate",
                                   values_to = "lower")
oapo.cis.bca <- oapo.cis.bca |>
  group_by(Substrate) |>
  mutate(
    upper = max(lower)
  ) |>
  ungroup()


oapo.cis.bca <- oapo.cis.bca[-c(6:10),]

rownames(oapo.pop.lvl.SR) <- c("ABIOTIC", "FLOWER", "PDS", "SMBR", "TS")
oapo.pop.lvl.SR <- rownames_to_column(oapo.pop.lvl.SR, "Sub")
oapo.pop.lvl.SR <- subset(oapo.pop.lvl.SR, select = -c(Substrate))
colnames(oapo.pop.lvl.SR)[1] <- "Substrate"
colnames(oapo.pop.lvl.SR)[2] <- "Estimate"

oapo.cis.bca <- inner_join(oapo.cis.bca, oapo.pop.lvl.SR, by = "Substrate")

#Now attempting to make plot
ggplot(oapo.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=15, face="bold")) +
  ylim(0.4, 21)

Moving on to Poweshiek skipperling analysis with only the three graminoid substrates

Creating data frame with only the graminoid substrates

#Finding observed eggs on the graminoid substrates
oapo.pl.ovi.dat <- oapo.ovi.data
oapo.pl.ovi.dat$exclude = 0
oapo.pl.ovi.dat$exclude[oapo.pl.ovi.dat$Substrate == 'FLOWER'] = 1
oapo.pl.ovi.dat$exclude[oapo.pl.ovi.dat$Substrate == 'CAGE'] = 1
oapo.pl.ovi.dat$exclude[oapo.pl.ovi.dat$Substrate == 'TUBE'] = 1
oapo.pl.ovi.dat$exclude[oapo.pl.ovi.dat$Substrate == 'POT'] = 1
oapo.pl.ovi.dat$exclude[oapo.pl.ovi.dat$Substrate == 'SOIL'] = 1
oapo.pl.ovi.dat$exclude[oapo.pl.ovi.dat$Substrate == 'VIAL'] = 1
oapo.pl.ovi.dat = subset(oapo.pl.ovi.dat, exclude == 0)

oapo.pl.ovi.dat <- oapo.pl.ovi.dat[-c(6)]
oapo.pl.ovi.dat <- arrange(oapo.pl.ovi.dat, Substrate)

#Observed individual total for only graminoids
oapo.pl.ind.total <- aggregate(Total.eggs ~ ID, data=oapo.pl.ovi.dat, sum)

#Merging total eggs dataframe w/ proportion dataframe to be able to calculate expected eggs
oapo.pl.exp.total <- merge(oapo.pl.ind.total, pl.prop.sa)

#Multiplying total eggs by proportion to get expected eggs for each substrate
oapo.pl.exp.total$Exp.eggs <- (oapo.pl.exp.total$Total.eggs * oapo.pl.exp.total$Prop.SA)

#Removing the columns no longer needed
oapo.pl.exp.total <- subset(oapo.pl.exp.total, select = -c(Total.eggs, Prop.SA))
oapo.pl.exp.total <- arrange(oapo.pl.exp.total, ID)

#Summarizing observed eggs by individual and substrate
oapo.pl.obs.total <- subset(oapo.pl.ovi.dat, select = -c(Spp, N.days))
oapo.pl.obs.total <- arrange(oapo.pl.obs.total, ID)

Combining observed and expected data frames

#Using a right join to combine the observed and expected dataframes
oapo.pl.obs.v.exp <- right_join(oapo.pl.obs.total, oapo.pl.exp.total)
## Joining with `by = join_by(ID, Substrate)`
#Replacing NA's with 0's
oapo.pl.obs.v.exp[is.na(oapo.pl.obs.v.exp)] <- 0

#Adding a column for selection ratios for each individual for each substrate (observed eggs per substrate / expected eggs per substrate)
oapo.pl.obs.v.exp$SR <- (oapo.pl.obs.v.exp$Total.eggs/oapo.pl.obs.v.exp$Exp.eggs)

#Arranging dataframe based on Substrate
oapo.pl.obs.v.exp <- arrange(oapo.pl.obs.v.exp, Substrate)

Converting to wide format

w.oapo.pl.obs.v.exp <- subset(oapo.pl.obs.v.exp, select = -c(Total.eggs, Exp.eggs))
w.oapo.pl.obs.v.exp <- w.oapo.pl.obs.v.exp %>% pivot_wider(names_from = Substrate, values_from = SR)

w.oapo.pl.obs.v.exp <- w.oapo.pl.obs.v.exp |> ungroup()

Calculating the simple mean and saving as new object

oapo.pl.si.mean <- w.oapo.pl.obs.v.exp %>%
  dplyr::summarize(across(2:4, mean, na.rm=TRUE))
oapo.pl.si.mean <- oapo.pl.si.mean %>% pivot_longer(cols=c('PDS','SMBR','TS'),
                                    names_to='Substrate',
                                    values_to='Mean')

Bootstrapping using the pl.mean.fun function created earlier

oapo.pl.bootreps <- boot(data = w.oapo.pl.obs.v.exp, statistic = pl.mean.fun, R = 9999)

Creating data frame with bootstrap statistics

oapo.pl.bootstats <- data.frame(oapo.pl.bootreps$t) # contains the bootstrap statistics as columns
names(oapo.pl.bootstats) <- t(oapo.pl.si.mean[,1]) # Assign names to the different columns
oapo.pl.bootstats <- oapo.pl.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

Graphing the bootstrap statistics

ggplot(oapo.pl.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(pl.substrate.names), scales = "free") +
  geom_vline(aes(xintercept = Mean, col = "red"), oapo.pl.si.mean, size=1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Calculating CI’s

oapo.pl.cidata <- NULL
for(i in c(1:3)){
  # CI for ith selection ratio
  temp <- boot.ci(oapo.pl.bootreps, index = i, type = c("bca"))
  oapo.pl.cidata <- rbind(oapo.pl.cidata, data.frame(Substrate = oapo.pl.si.mean$Substrate[i],
                                     LCL.bca = temp$bca[1,4],
                                     UCL.bca = temp$bca[1,5]))
}  
oapo.pl.cidata[,2:3] <-round(oapo.pl.cidata[,2:3], 2)
colnames(oapo.pl.cidata)[2:3] <- c("95% LCL (bca)", 
                           "95% UCL (bca)") 
rownames(oapo.pl.cidata) <- c("PDS", "SMBR", "TS")
oapo.pl.cidata <- rownames_to_column(oapo.pl.cidata, "Sub")
oapo.pl.cidata <- subset(oapo.pl.cidata, select=-c(Substrate))
colnames(oapo.pl.cidata)[1] <- "Substrate"
kable(oapo.pl.cidata, caption = "Poweshiek skipperling 95% BCa Confidence Intervals (graminoids)")
Poweshiek skipperling 95% BCa Confidence Intervals (graminoids)
Substrate 95% LCL (bca) 95% UCL (bca)
PDS 1.48 2.05
SMBR 0.72 1.01
TS 0.50 0.89

Plotting CI’s

#Need to add the actual SR's to the cidata data frame first, to plot
rownames(oapo.pl.si.mean) <- c("PDS", "SMBR", "TS")
## Warning: Setting row names on a tibble is deprecated.
oapo.pl.si.mean <- rownames_to_column(oapo.pl.si.mean, "Sub")
oapo.pl.si.mean <- subset(oapo.pl.si.mean, select = -c(Substrate))
colnames(oapo.pl.si.mean)[1] <- "Substrate"
colnames(oapo.pl.si.mean)[2] <- "Estimate"

oapo.pl.cidata <- inner_join(oapo.pl.si.mean, oapo.pl.cidata, by = "Substrate")
colnames(oapo.pl.cidata)[3] <- "lower"
colnames(oapo.pl.cidata)[4] <- "upper"

#Now attempting to make plot
ggplot(oapo.pl.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 15, face = "bold")) +
  ylim(0.2, 2.8)

Repeating the above steps but for population-level SR’s

#Calculating the population-level SR dataframe
oapo.pl.all.egg <- sum(oapo.pl.obs.v.exp$Total.eggs)

oapo.pl.egg.by.sub <- oapo.pl.obs.v.exp |>
  group_by(Substrate) |> # Group just by substrate
  dplyr::summarize(
    sum(Total.eggs)
  )

oapo.pl.prop.egg <- as.data.frame(oapo.pl.egg.by.sub[,2]/oapo.pl.all.egg)
oapo.pl.prop.egg <- cbind.data.frame(oapo.pl.egg.by.sub$Substrate, oapo.pl.prop.egg)
names(oapo.pl.prop.egg)[1] <- "Substrate"
names(oapo.pl.prop.egg)[2] <- "Prop.Egg"

oapo.pl.prop.egg.SA <- left_join(oapo.pl.prop.egg, pl.prop.sa, by="Substrate")

oapo.pl.pop.lvl.SR <- as.data.frame((oapo.pl.prop.egg.SA$Prop.Egg)/(oapo.pl.prop.egg.SA$Prop.SA))
oapo.pl.pop.lvl.SR <- cbind.data.frame(oapo.pl.prop.egg.SA$Substrate, oapo.pl.pop.lvl.SR)
names(oapo.pl.pop.lvl.SR)[1] <- "Substrate"
names(oapo.pl.pop.lvl.SR)[2] <- "SR"

Creating function to calculate population-level SR’s to use in a bootstrap

oapo.pl.pop.fun <- function(oapo.pl.bootdat, pl.prop.sa=pl.prop.sa){
  # Now repeat all the same calculations (using bootdat instead of obs.v.exp)
  # I'll append a ".b" to all the names to make it clear that these are done
  # with the bootstrap data set
  oapo.pl.all.egg.b <-sum(oapo.pl.bootdat$Total.eggs)  
  oapo.pl.egg.by.sub.b <- oapo.pl.bootdat |>
  group_by(Substrate) |> # Group just by substrate
  dplyr::summarize(
    sum(Total.eggs)
  )
  oapo.pl.prop.egg.b <- as.data.frame(oapo.pl.egg.by.sub.b[,2]/oapo.pl.all.egg.b)
  oapo.pl.prop.egg.b <- cbind.data.frame(oapo.pl.egg.by.sub.b$Substrate, oapo.pl.prop.egg.b)
  names(oapo.pl.prop.egg.b)[1] <- "Substrate"  
  names(oapo.pl.prop.egg.b)[2] <- "Prop.Egg"
  oapo.pl.prop.egg.SA.b <- right_join(oapo.pl.prop.egg.b, pl.prop.sa, by="Substrate")

  oapo.pl.pop.lvl.SR.b <- as.numeric((oapo.pl.prop.egg.SA.b$Prop.Egg)/(oapo.pl.prop.egg.SA.b$Prop.SA))
  return(oapo.pl.pop.lvl.SR.b)
}

Testing to make sure above function works (output should be the same)

print(oapo.pl.pop.lvl.SR)
##   Substrate        SR
## 1       PDS 1.7519088
## 2      SMBR 0.8770617
## 3        TS 0.6406188
oapo.pl.pop.fun(oapo.pl.obs.v.exp, pl.prop.sa = pl.prop.sa)
## [1] 1.7519088 0.8770617 0.6406188

Bootstrapping and calculating CI’s

oapo.pl.IDS<-unique(oapo.pl.obs.v.exp$ID)
oapo.pl.pop.lvl.bootreps<-matrix(NA, 9999, 3)
for(i in 1:9999){
  # Sample IDs with replacement
  oapo.pl.bootIDs <- data.frame(ID = sample(oapo.pl.IDS, replace=TRUE))
  
  # Create bootstrap data set from the wide version
  oapo.pl.bootdat <- left_join(oapo.pl.bootIDs, oapo.pl.obs.v.exp, relationship = "many-to-many", by = "ID") 
  oapo.pl.pop.lvl.bootreps[i,] <- oapo.pl.pop.fun(oapo.pl.bootdat, pl.prop.sa = pl.prop.sa) # calculate the bootstrap ratios and store them in the ith row of bootstats
}


## Bca intervals
oapo.pl.cis.bca <- matrix(NA, 2, 3)
for(i in 1:3){
  oapo.pl.cis.bca[,i] <- t(bca(oapo.pl.pop.lvl.bootreps[,i])) # transpose results so we can store using 2 rows, one for the upper CL and one for the lower
}
colnames(oapo.pl.cis.bca)<- oapo.pl.pop.lvl.SR$Substrate

oapo.pl.cis.bca <- as.data.frame(oapo.pl.cis.bca)
rownames(oapo.pl.cis.bca) <- c("95% LCL (bca)", "95% UCL (bca)")
colnames(oapo.pl.cis.bca) <- c("PDS", "SMBR", "TS")
kable(oapo.pl.cis.bca, caption = "Poweshiek skipperling 95% BCa Confidence Intervals (graminoids)")
Poweshiek skipperling 95% BCa Confidence Intervals (graminoids)
PDS SMBR TS
95% LCL (bca) 1.370710 0.7354446 0.4451808
95% UCL (bca) 2.217676 1.0141101 0.8803450

Putting bootstrap statistics into a data frame, then graphing

#Putting bootstrap stats into dataframe
oapo.pl.pop.lvl.bootstats <- data.frame(oapo.pl.pop.lvl.bootreps) # contains the bootstrap statistics as columns
names(oapo.pl.pop.lvl.bootstats) <- t(oapo.pl.pop.lvl.SR[,1]) # Assign names to the different columns
oapo.pl.pop.lvl.bootstats <- oapo.pl.pop.lvl.bootstats |> pivot_longer(
  cols = everything(),
  values_to ="Bootstrap.Statistic",
  names_to = "Substrate"
  )

#Graphing
ggplot(oapo.pl.pop.lvl.bootstats, aes(x= Bootstrap.Statistic)) +
  geom_histogram(fill = "skyblue", color = "black", alpha = 0.7) +   
  facet_wrap(~ Substrate, labeller = as_labeller(pl.substrate.names), scales = "free") +
  geom_vline(aes(xintercept = SR, col = "red"), data=oapo.pl.pop.lvl.SR, size=1) +
  labs(color = "Sample Statistic") +
  scale_color_manual(labels = c("Mean"), values = c("red")) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plotting CI’s

#Need to add the actual SR's to the cidata data frame first, to plot
oapo.pl.cis.bca <- oapo.pl.cis.bca |> pivot_longer(cols = c("PDS", "SMBR", "TS"),
                                   names_to = "Substrate",
                                   values_to = "lower")
oapo.pl.cis.bca <- oapo.pl.cis.bca |>
  group_by(Substrate) |>
  mutate(
    upper = max(lower)
  ) |>
  ungroup()


oapo.pl.cis.bca <- oapo.pl.cis.bca[-c(4:6),]

rownames(oapo.pl.pop.lvl.SR) <- c("PDS", "SMBR", "TS")
oapo.pl.pop.lvl.SR <- rownames_to_column(oapo.pl.pop.lvl.SR, "Sub")
oapo.pl.pop.lvl.SR <- subset(oapo.pl.pop.lvl.SR, select = -c(Substrate))
colnames(oapo.pl.pop.lvl.SR)[1] <- "Substrate"
colnames(oapo.pl.pop.lvl.SR)[2] <- "Estimate"

oapo.pl.cis.bca <- inner_join(oapo.pl.cis.bca, oapo.pl.pop.lvl.SR, by = "Substrate")

#Now attempting to make plot
ggplot(oapo.pl.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 15, face = "bold")) +
  ylim(0.4, 2.3)

Plots with population-level CI’s

pl.heda.1 <- ggplot(heda.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=13), title = element_text(size = 18, face = "bold")) +
  ggtitle("Dakota skipper") +
  annotate("text", x=5.25, y=21, label="A", size=10) +
  ylim(0.4, 21)

pl.oapo.1 <- ggplot(oapo.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=13), title = element_text(size = 18, face = "bold")) +
  ggtitle("Poweshiek skipperling") +
  annotate("text", x=5.25, y=21, label="B", size=10) +
  ylim(0.4, 21)

pl.heda.2 <- ggplot(heda.pl.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 13)) +
  annotate("text", x=3.35, y=2.3, label="C", size=10) +
  ylim(0.4, 2.3)

pl.oapo.2 <- ggplot(oapo.pl.cis.bca, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 13)) +
  annotate("text", x=3.35, y=2.3, label="D", size=10) +
  ylim(0.4, 2.3)


grid.arrange(pl.heda.1, pl.oapo.1, pl.heda.2, pl.oapo.2, nrow = 2, 
             left = textGrob("Selection Ratio", gp=gpar(fontsize = 17), rot = 90),
             bottom = textGrob("Substrate", gp=gpar(fontsize = 17)))

Plots with individual-level CI’s

pl.heda.3 <- ggplot(heda.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  ylim(0.3, 22) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=13), title = element_text(size = 18, face = "bold")) +
  ggtitle("Dakota skipper") +
  annotate("text", x=5.25, y=22, label="A", size=10)

pl.oapo.3 <- ggplot(oapo.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  ylim(0.3, 22) +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size=13), title = element_text(size = 18, face = "bold")) +
  ggtitle("Poweshiek skipperling") +
  annotate("text", x=5.25, y=22, label="B", size=10)

pl.heda.4 <- ggplot(heda.pl.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 13)) +
  ylim(0.2, 2.8) +
  annotate("text", x=3.35, y=2.8, label="C", size=10)

pl.oapo.4 <- ggplot(oapo.pl.cidata, aes(Substrate, Estimate)) + geom_point(size = 2) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.5, size = 0.75, alpha = 0.5) +
  geom_abline(aes(intercept = 1, slope = 0), lty = 2, color = "red") +
  theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text = element_text(size = 13)) +
  ylim(0.2, 2.8) +
  annotate("text", x=3.35, y=2.8, label="D", size=10)


grid.arrange(pl.heda.3, pl.oapo.3, pl.heda.4, pl.oapo.4, nrow=2,
            left = textGrob("Selection Ratio", gp=gpar(fontsize = 17), rot = 90),
            bottom = textGrob("Substrate", gp=gpar(fontsize = 17)))

sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Chicago
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] gridtext_0.1.5   gridExtra_2.3    coxed_0.3.3      mgcv_1.9-1      
##  [5] nlme_3.1-164     survival_3.6-4   rms_6.9-0        Hmisc_5.2-1     
##  [9] kableExtra_1.4.0 boot_1.3-30      here_1.0.1       lubridate_1.9.3 
## [13] forcats_1.0.0    stringr_1.5.1    purrr_1.0.2      readr_2.1.5     
## [17] tidyr_1.3.1      tibble_3.2.1     ggplot2_3.5.1    tidyverse_2.0.0 
## [21] dplyr_1.1.4     
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1   viridisLite_0.4.2  farver_2.1.2       fastmap_1.2.0     
##  [5] TH.data_1.1-2      digest_0.6.37      rpart_4.1.23       timechange_0.3.0  
##  [9] lifecycle_1.0.4    cluster_2.1.6      magrittr_2.0.3     compiler_4.4.1    
## [13] rlang_1.1.4        sass_0.4.9         tools_4.4.1        utf8_1.2.4        
## [17] yaml_2.3.10        data.table_1.16.2  knitr_1.49         labeling_0.4.3    
## [21] htmlwidgets_1.6.4  xml2_1.3.6         multcomp_1.4-26    polspline_1.1.25  
## [25] withr_3.0.2        foreign_0.8-86     nnet_7.3-19        fansi_1.0.6       
## [29] colorspace_2.1-1   scales_1.3.0       MASS_7.3-60.2      cli_3.6.3         
## [33] mvtnorm_1.3-2      crayon_1.5.3       rmarkdown_2.29     generics_0.1.3    
## [37] rstudioapi_0.17.1  tzdb_0.4.0         cachem_1.1.0       splines_4.4.1     
## [41] base64enc_0.1-3    vctrs_0.6.5        Matrix_1.7-0       sandwich_3.1-1    
## [45] jsonlite_1.8.9     SparseM_1.84-2     hms_1.1.3          Formula_1.2-5     
## [49] htmlTable_2.4.3    systemfonts_1.1.0  jquerylib_0.1.4    glue_1.8.0        
## [53] codetools_0.2-20   stringi_1.8.4      gtable_0.3.6       munsell_0.5.1     
## [57] pillar_1.9.0       htmltools_0.5.8.1  quantreg_5.99.1    R6_2.5.1          
## [61] rprojroot_2.0.4    evaluate_1.0.1     lattice_0.22-6     backports_1.5.0   
## [65] bslib_0.8.0        MatrixModels_0.5-3 Rcpp_1.0.13-1      svglite_2.1.3     
## [69] checkmate_2.3.2    xfun_0.49          zoo_1.8-12         pkgconfig_2.0.3