# header ------------------------------------------------------------------
This code will pull in the datasets and analyze results of experiments to evaluate the viability and germination of seeds from four species of aquatic plants. The seeds were collected for use in a seed mix to be use for seeding a set of experimental plots. These experiments aim to understand the number of sprouts we might expect to get from the seeds and how we might boost that number using pre-treatments.
Associated article:
Verhoeven, M.R., Bacon, J.A., and D.J. Larkin. Seed traits and germination treatments for dormancy break of four aquatic plant species. Target Journal: Aquatic Botany.
To-do: Calculate “corrected germination” results Table 1 update clean up libraries drop the time to germ analysis
# load libraries - needs check for necessity
library(ggplot2)
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.2
library(rstanarm)
## Warning: package 'rstanarm' was built under R version 4.3.1
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.3.1
## This is rstanarm version 2.21.4
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
library(shinystan)
## Warning: package 'shinystan' was built under R version 4.3.1
## Loading required package: shiny
## Warning: package 'shiny' was built under R version 4.3.1
##
## This is shinystan version 2.6.0
library(bayesplot)
## Warning: package 'bayesplot' was built under R version 4.3.1
## This is bayesplot version 1.10.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.3.1
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.3.1
library(ggridges)
## Warning: package 'ggridges' was built under R version 4.3.1
# load in data ------------------------------------------------------------
#load germination dataset
germ <- fread(file = "scripts&data/data/input/Seeds_germination.csv", nrows = 640) # uses nrows to make sure extraneous blank rows aren't loaded
#load viability dataset
viab <- fread(file = "scripts&data/data/input/Seeds_viability.csv")
# explore & clean up germination ------------------------------------------
This dataset includes germination data for 640 seeds that were part of Jonah’s lab experiments working to break dormancy of four species of seed. The same covariates as in the viability dataset show up here, too (massed and photographed to get those covariates)
names(germ)
## [1] "Seed_ID" "Photo_ID(SeedID_###.CR2)"
## [3] "Species" "Treatment"
## [5] "Date_GA3" "Time_GA3"
## [7] "Date_chamber" "Time_chamber"
## [9] "Tray" "Group"
## [11] "Number" "Date_germinated"
## [13] "Germ_Photo_ID(Germ_###.CR2)" "Date_planted"
## [15] "Plant_Photo_ID(Plant_###.CR2)" "Jar_col"
## [17] "Jar_row" "Mass_grams"
## [19] "Area" "Major_axis"
## [21] "Minor_axis" "Blue_mean"
## [23] "Blue_StdDev" "Blue_Mode"
## [25] "Green_mean" "Green_StdDev"
## [27] "Green_Mode" "Red_mean"
## [29] "Red_StdDev" "Red_Mode"
## [31] "Notes"
str(germ)
## Classes 'data.table' and 'data.frame': 640 obs. of 31 variables:
## $ Seed_ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Photo_ID(SeedID_###.CR2) : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Species : chr "Nup_var" "Nup_var" "Nup_var" "Nup_var" ...
## $ Treatment : chr "SC+GA" "SC+GA" "SC+GA" "SC+GA" ...
## $ Date_GA3 : chr "12-May-2020" "12-May-2020" "12-May-2020" "12-May-2020" ...
## $ Time_GA3 : chr "13:00" "13:00" "13:00" "13:00" ...
## $ Date_chamber : chr "13-May-2020" "13-May-2020" "13-May-2020" "13-May-2020" ...
## $ Time_chamber : chr "10:00" "10:00" "10:00" "10:00" ...
## $ Tray : chr "Right" "Right" "Right" "Right" ...
## $ Group : chr "A" "A" "A" "A" ...
## $ Number : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Date_germinated : chr "" "" "" "" ...
## $ Germ_Photo_ID(Germ_###.CR2) : chr "" "" "" "" ...
## $ Date_planted : chr "" "" "" "" ...
## $ Plant_Photo_ID(Plant_###.CR2): chr "" "" "" "" ...
## $ Jar_col : chr "" "" "" "" ...
## $ Jar_row : int NA NA NA NA NA 11 NA NA NA NA ...
## $ Mass_grams : num 0.05 0.044 0.05 0.038 0.04 0.052 0.051 0.051 0.043 0.04 ...
## $ Area : num 2485012 2395857 2429253 1998438 2247414 ...
## $ Major_axis : num 2027 1967 1864 1913 2066 ...
## $ Minor_axis : num 1561 1550 1660 1330 1385 ...
## $ Blue_mean : num 34.3 38.1 37.9 37 41.9 ...
## $ Blue_StdDev : num 22 23.9 22.4 23.8 22.6 ...
## $ Blue_Mode : int 20 24 26 22 33 22 29 31 23 30 ...
## $ Green_mean : num 60.9 67.9 67 72.6 76.1 ...
## $ Green_StdDev : num 31.9 27.1 29.5 29.3 27.7 ...
## $ Green_Mode : int 37 55 50 57 63 43 61 59 39 66 ...
## $ Red_mean : num 112 134 137 137 143 ...
## $ Red_StdDev : num 37.1 30.4 30.4 26.1 27.7 ...
## $ Red_Mode : int 84 123 127 122 136 124 138 138 135 139 ...
## $ Notes : chr "" "" "" "" ...
## - attr(*, ".internal.selfref")=<externalptr>
#change date formats:
germ[ ,Date_chamber := as.IDate(Date_chamber, format = "%d-%b-%Y"), ]
germ[ ,Date_germinated := as.IDate(Date_germinated, format = "%d-%b-%Y"), ]
#make Treatment column values factors
germ$Treatment <- as.factor(germ$Treatment)
#subtract dates to get time to germ
germ[ ,N_day_germ := Date_germinated - Date_chamber] #New column with days-to-germination
#and add a column for yes or no germinated
germ[ ,Germ_yn := as.logical(N_day_germ), ]
#replace NAs with "FALSE"
germ$Germ_yn <- replace(germ$Germ_yn, is.na(germ$Germ_yn), FALSE)
# species pot_amp is actually pot_ill (based on Mike Verhoeven ID of grown-out adult of "B" seed's plant)
germ[ , .N, Species]
## Species N
## <char> <int>
## 1: Nup_var 160
## 2: Pot_amp 160
## 3: Pot_nat 160
## 4: Bra_sch 160
germ[Species == "Pot_amp", Species := "Pot_ill"]
germ[ ,Species := as.factor(Species), ]
germ[ ,N_day_germ := as.integer(N_day_germ) ,]
# clean up treatment data - needs to be two sep columns for GA and SC
germ[ , .N , .(Species, Treatment)]
## Species Treatment N
## <fctr> <fctr> <int>
## 1: Nup_var SC+GA 40
## 2: Pot_ill SC+GA 40
## 3: Pot_nat SC+GA 40
## 4: Bra_sch SC+GA 40
## 5: Nup_var GA 40
## 6: Pot_ill GA 40
## 7: Pot_nat GA 40
## 8: Bra_sch GA 40
## 9: Nup_var CT 40
## 10: Pot_ill CT 40
## 11: Pot_nat CT 40
## 12: Bra_sch CT 40
## 13: Nup_var SC 40
## 14: Pot_ill SC 40
## 15: Pot_nat SC 40
## 16: Bra_sch SC 40
germ[Treatment == "SC+GA" | Treatment == "SC", scarify := TRUE , ]
germ[ is.na(scarify) , scarify := FALSE , ]
germ[Treatment == "SC+GA" | Treatment == "GA", gibberellic := TRUE , ]
germ[ is.na(gibberellic) , gibberellic := FALSE , ]
#check
germ[ , .N , .(Treatment, gibberellic, scarify)]
## Treatment gibberellic scarify N
## <fctr> <lgcl> <lgcl> <int>
## 1: SC+GA TRUE TRUE 160
## 2: GA TRUE FALSE 160
## 3: CT FALSE FALSE 160
## 4: SC FALSE TRUE 160
#l:w ratio
germ[ ,lw_ratio := Major_axis/Minor_axis , ]
#center and scale size pars within each species (why within and not across? Because we'll use these parameters for indiv species models--we do not analyze the whole gamut, instead we analyze each species independently)
germ[Species == "Pot_ill" , sc_lw := scale(lw_ratio)]
germ[Species == "Pot_nat" , sc_lw := scale(lw_ratio)]
germ[Species == "Nup_var" , sc_lw := scale(lw_ratio)]
germ[Species == "Bra_sch" , sc_lw := scale(lw_ratio)]
germ[Species == "Pot_ill" , sc_mass := scale(Mass_grams)]
germ[Species == "Pot_nat" , sc_mass := scale(Mass_grams)]
germ[Species == "Nup_var" , sc_mass := scale(Mass_grams)]
germ[Species == "Bra_sch" , sc_mass := scale(Mass_grams)]
# explore & clean up viability ------------------------------------------
This dataset includes one record for each of 168 seeds that were massed and photographed before being cut open and stained with tetrazolium to test for viability. Jonah scored viability based on red-ness of the embryo. A set of negative controls were autoclaved before staining, and we determined that based on those neg controls any staining (score < 6 ) meant that the embryo was viable (these seeds are not in the dataset). In the dataset we have two “positive controls” which were harvested in August the day before the viability assay. We determined that these were bad positive controls because they had immature embryos.
names(viab)
## [1] "Seed_ID" "PreTZ_Photo_ID(Viability_###.CR2)"
## [3] "PostTZ_Photo_ID(TZ_###.CR2)" "Species"
## [5] "Tray" "Row"
## [7] "Column" "Embryo_stained"
## [9] "Mass_grams" "Area"
## [11] "Major_axis" "Minor_axis"
## [13] "Blue_mean" "Blue_StdDev"
## [15] "Blue_Mode" "Green_mean"
## [17] "Green_StdDev" "Green_Mode"
## [19] "Red_mean" "Red_StdDev"
## [21] "Red_Mode" "Notes"
str(viab)
## Classes 'data.table' and 'data.frame': 168 obs. of 22 variables:
## $ Seed_ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ PreTZ_Photo_ID(Viability_###.CR2): int 1 2 3 4 5 6 7 8 9 10 ...
## $ PostTZ_Photo_ID(TZ_###.CR2) : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Species : chr "Bra_sch" "Bra_sch" "Bra_sch" "Bra_sch" ...
## $ Tray : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Row : chr "A" "A" "A" "A" ...
## $ Column : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Embryo_stained : int 1 6 6 6 6 5 6 6 3 4 ...
## $ Mass_grams : num 0.033 0.023 0.025 0.021 0.021 0.03 0.029 0.021 0.018 0.026 ...
## $ Area : num 1897129 1361338 1492694 1435508 1591342 ...
## $ Major_axis : num 1743 1674 1522 1557 1564 ...
## $ Minor_axis : num 1386 1035 1249 1174 1295 ...
## $ Blue_mean : num 18.1 17 17.9 19.5 18.2 ...
## $ Blue_StdDev : num 14.2 13.2 13.4 14.6 13.3 ...
## $ Blue_Mode : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Green_mean : num 32.6 34.5 31 38.1 39 ...
## $ Green_StdDev : num 16.3 15.9 15.4 21.9 21.4 ...
## $ Green_Mode : int 24 27 24 25 27 23 31 28 36 26 ...
## $ Red_mean : num 97.7 112 89.2 110 111.1 ...
## $ Red_StdDev : num 25.7 24 29 24.1 30.8 ...
## $ Red_Mode : int 92 113 77 103 111 77 121 111 128 95 ...
## $ Notes : chr "" "" "" "" ...
## - attr(*, ".internal.selfref")=<externalptr>
# add a column for viable yes or no
viab[ ,Viab_yn := as.logical(Embryo_stained < 6), ]
# species pot_amp is actually pot_ill (based on Mike V ID of grown-out "B" seed's plant)
viab[ , .N, Species]
## Species N
## <char> <int>
## 1: Bra_sch 30
## 2: Nup_var 18
## 3: Pot_amp 30
## 4: Pot_nat 30
## 5: Nup_var_cont 30
## 6: pot_amp_cont 30
viab[Species == "Pot_amp", Species := "Pot_ill"]
viab[Species == "pot_amp_cont", Species := "Pot_ill_cont"]
viab[ ,Species := as.factor(Species), ]
#l:w ratio
viab[ ,lw_ratio := Major_axis/Minor_axis , ]
#center and scale size pars (again done by species to fit with species by species modeling):
viab[Species == "Pot_ill" , sc_lw := scale(lw_ratio)]
viab[Species == "Pot_nat" , sc_lw := scale(lw_ratio)]
viab[Species == "Nup_var" , sc_lw := scale(lw_ratio)]
viab[Species == "Bra_sch" , sc_lw := scale(lw_ratio)]
viab[Species == "Pot_ill" , sc_mass := scale(Mass_grams)]
viab[Species == "Pot_nat" , sc_mass := scale(Mass_grams)]
viab[Species == "Nup_var" , sc_mass := scale(Mass_grams)]
viab[Species == "Bra_sch" , sc_mass := scale(Mass_grams)]
#drop the data for the visual pos controls (these were used to set the scale for our TZ stain, but are not to be used for analysis):
viab <- viab[!Species %in% c('Pot_ill_cont', 'Nup_var_cont')]
viab[ , Species := droplevels(Species)]
# viability ---------------------------------------------------------------
#prep fields for figure:
viab[is.na(Viab_yn) , viab_words := "No embryo" ]
viab[Viab_yn == "TRUE" , viab_words := "Viable" ]
viab[Viab_yn == "FALSE" , viab_words := "Non-viable" ]
viab[, summary(viab_words)]
## Length Class Mode
## 108 character character
#set ggplot theme
theme_set(theme_bw(base_size = 14))
#write plot
viabilityplot <- ggplot(viab, aes(x = Species, fill = viab_words))+
geom_bar(position = "stack") +
coord_flip()+
scale_fill_manual(values=c("#D55E00", "#E69F00", "#009E73")) +
#geom_text() +
geom_text(aes(label = after_stat(count)), stat = "count", size = 5, position = position_stack(vjust = 0.5), colour = "black")+
labs(fill = NULL) +
scale_y_continuous(expand = c(0,0)) +
ylab("Number of Seeds") +
xlab("")+
theme_bw() +
scale_x_discrete(position = "top", limits=c("Bra_sch", "Nup_var", "Pot_ill", "Pot_nat" ),
labels = c(" Brasenia schreberi", " Nuphar variegata", " Potamogeton illinoiensis", " Potamogeton natans"))+
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
text = element_text(size = 14, color = "black"),
axis.text.y = element_text(size = 12, face = "italic", hjust = 1),
legend.position = c(0.85,0.4),
legend.background = element_blank())
#view plot
viabilityplot
# bayesian viability ------------------------------------------------------
Bayesian estimation of posteriors for viability (resp ~ 0 + pred will suppress intercepts, resp ~ pred - 1 gives means coded model matrix) using binomial error distributions. Results are log-odds, and use back-transformation (plogis) to get back to the % viable scale:
#viability in bayesian framework:
#reorder species list
viab[, Species := factor(Species, levels = c('Pot_nat', 'Pot_ill', 'Nup_var', 'Bra_sch'))]
#write model
bayes_species_viab <- stan_glm(Viab_yn ~ Species - 1 , data = viab[Species %in% c('Pot_ill','Bra_sch','Nup_var','Pot_nat'), ,], family = binomial(), set.seed(4913))
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 3.1e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.31 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.098 seconds (Warm-up)
## Chain 1: 0.105 seconds (Sampling)
## Chain 1: 0.203 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 9e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.061 seconds (Warm-up)
## Chain 2: 0.063 seconds (Sampling)
## Chain 2: 0.124 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.097 seconds (Warm-up)
## Chain 3: 0.095 seconds (Sampling)
## Chain 3: 0.192 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1.3e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.102 seconds (Warm-up)
## Chain 4: 0.104 seconds (Sampling)
## Chain 4: 0.206 seconds (Total)
## Chain 4:
summary(bayes_species_viab)
##
## Model Info:
## function: stan_glm
## family: binomial [logit]
## formula: Viab_yn ~ Species - 1
## algorithm: sampling
## sample: 4000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 102
## predictors: 4
##
## Estimates:
## mean sd 10% 50% 90%
## SpeciesPot_nat 1.4 0.5 0.8 1.3 2.0
## SpeciesPot_ill -0.3 0.4 -0.8 -0.3 0.2
## SpeciesNup_var 2.3 0.8 1.3 2.2 3.3
## SpeciesBra_sch 0.1 0.4 -0.3 0.1 0.6
##
## Fit Diagnostics:
## mean sd 10% 50% 90%
## mean_PPD 0.6 0.1 0.6 0.6 0.7
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## SpeciesPot_nat 0.0 1.0 4631
## SpeciesPot_ill 0.0 1.0 4724
## SpeciesNup_var 0.0 1.0 3150
## SpeciesBra_sch 0.0 1.0 4550
## mean_PPD 0.0 1.0 4769
## log-posterior 0.0 1.0 1533
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
#results in fig
viabmodelfig <- plot_model(bayes_species_viab,
bpe = "median",
prob.inner = 0.5,
prob.outer = 0.95,
transform = 'plogis',
show.values = T,
title = "", axis.title = c("Proportion viable", NULL),
axis.lim = c(0,1),
axis.labels = c(" ", " ", " "," "),
colors = "bw",
text = element_text(size = 14, color = "black"))
#view figure
viabmodelfig
#print model results to table:
ci95 <- plogis(posterior_interval(bayes_species_viab, prob = 0.95))
bayes_species_viab_ci_tab <- data.table(round(ci95, 2))
bayes_species_viab_ci_tab[ , species := c("Potamogeton natans","Potamogeton illinoensis", "Nuphar variegata", "Brasenia schreberi" ) , ]
bayes_species_viab_ci_tab[ , mean := round(plogis(bayes_species_viab$coefficients),2) , ]
bayes_species_viab_ci_tab[, maxgerm := round(c(11/40, 14/40, 1/40, 0/40),2)] #GA and Scarif rates
bayes_species_viab_ci_tab[, germ_of_viab := round(maxgerm/mean, 2)]
#view simple results output
bayes_species_viab_ci_tab
## 2.5% 97.5% species mean maxgerm germ_of_viab
## <num> <num> <char> <num> <num> <num>
## 1: 0.62 0.91 Potamogeton natans 0.79 0.28 0.35
## 2: 0.24 0.62 Potamogeton illinoensis 0.42 0.35 0.83
## 3: 0.72 0.98 Nuphar variegata 0.90 0.03 0.03
## 4: 0.36 0.71 Brasenia schreberi 0.53 0.00 0.00
# Write viability results to table
#fwrite(bayes_species_viab_ci_tab, file = "viability_table.csv")
# Viability Viz: ---------------------------------------------------------------
#model diagnostics:
We follow https://mc-stan.org/users/documentation/case-studies/rstan_workflow.html for diagnostics:
# launch_shinystan(bayes_species_viab)
Plot designed following: https://cran.r-project.org/web/packages/sjPlot/vignettes/plot_model_estimates.html We can see that all species had non-zero viability. We can use these median value for each species to discount our germ in the germination studies. We will call this a “realized” germination. Basically, because only 53% of the B screberi seeds were viable, we only really tested germination in 53% of the 40 seeds because 47% were nonviable anyway and could thus NEVER germinate.
#two plot components:
viabmodelfig
viabilityplot
Viabviz <- ggarrange( viabilityplot, ggplot() + theme_void(), viabmodelfig,
widths = c(2,-0.06, 1),
labels = c("A", "", "B"),
label.x = c(0.01,0,0.05),
label.y = c(0.99,0,0.99),
font.label = list(size = 18, color = "black", face = "bold", family = NULL),
nrow = 1)
Viabviz
# write to file:
# # Customizing the output
# tiff(filename = "Figure_1.tiff", # File name
# width = 11, height = 4, units = "in", # Width and height in inches
# # bg = "white", # Background color
# # colormodel = "cmyk", # Color model (cmyk is required for most publications)
# # paper = "A4"
# # pointsize = 12,
# # compression = c("none", "rle", "lzw", "jpeg", "zip", "lzw+p", "zip+p"),
# res = 300, family = "", restoreConsole = TRUE,
# type = "cairo"
# )
#
# # Creating a plot
# Figure1
#
# # Closing the graphical device
# dev.off()
# analysis of germ --------------------------------------------------------
NOt corrected for viability
# Code for stacked barplot
germ[, Species := factor(Species, levels = c('Pot_nat', 'Pot_ill', 'Nup_var', 'Bra_sch'))]
species.names <- c("P. natans","P. illinoensis","N. variegata","B. schreberi" )
names(species.names) <- c("Pot_nat","Pot_ill","Nup_var","Bra_sch" )
#write out plot
uncorr_germ_viz <- ggplot(germ[ , .N, .(Species, Germ_yn, Treatment)] , aes(x = Treatment, y = N , fill = Germ_yn, label = N)) +
geom_bar(stat = "identity") +
scale_fill_manual(values=c("#D55E00", "#009E73"), labels = c("Ungerminated", "Germinated")) +
facet_grid(vars(group = Species), labeller = labeller(group = species.names)) +
geom_text(size = 5, position = position_stack(vjust = 0.85)) +
labs(fill = "Germinated?") +
scale_x_discrete( breaks=c("CT","GA","SC","SC+GA"), labels=c("Control", "GA", "Scarified", "Scarified+ \n GA")) +
ylab("Number of Seeds") +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
strip.text.y = element_text(size = 12, face = "bold.italic"),
panel.spacing = unit(1, "lines"),
text = element_text(size = 12, face = "bold", color = "black"),
legend.title = element_blank()
)
#view plot
uncorr_germ_viz
# # Write fig to file
# # Customizing the output
# tiff(filename = "Figure_2.tiff", # File name
# width = 5.2, height = 6.5, units = "in", # Width and height in inches
# # bg = "white", # Background color
# # colormodel = "cmyk", # Color model (cmyk is required for most publications)
# # paper = "A4"
# # pointsize = 12,
# # compression = c("none", "rle", "lzw", "jpeg", "zip", "lzw+p", "zip+p"),
# res = 300, family = "", restoreConsole = TRUE,
# type = "cairo"
# )
#
# # Creating a plot
#
# # Closing the graphical device
# dev.off()
We can see two important things from this figure that we want to put stat values on:
Thus, for each species, we can evaluate these treatment hypotheses using a model for germination. IN our reporting we will use viability to generate corrected germination.
Results for binomial GLMs in the frequentist OLS pumped out odd results. Digging into the seed germ analysis lit suggests that the high occurrence of 0% germ in our data (termed “complete separation”) means that there’s no variance in those categories for a model to fit to… And the minimum sample size of n*p > 5 for a binomial estimator tells us that for a 1% germination rate we should have used a sample size of 500. Because we saw at best 1/1185 Pot nat sprouting (sprouting experiment with 0 sprouts), we should have run a sample size of at least 5925 (5/(1/1185)) for the Pot nat controls (no GA or Scarif)….
So, with all of that in consideration, the hyp testing and the model selection is prob inappropriate with the data that we collected being evaluated with a binomial glm in a frequentist framework.See:
Gianinetti, A. (2020). Basic features of the analysis of germination data with generalized linear mixed models. Data, 5(1). https://doi.org/10.3390/data5010006
McNair, J. N., Sunkara, A., & Frobish, D. (2012). How to analyse seed germination data using statistical time-to-event analysis: Non-parametric and semi-parametric methods. Seed Science Research, Vol. 22, pp. 77–95. https://doi.org/10.1017/S0960258511000547
The issue at hand is that we have categories that produced true zeroes. While this wasn’t entirely unexpected, it does result in a model that has no real variance estimate for the zero categories (SE estimates of Infinity for those categories).
One way for us to overcome this is to avoid the issue of calculating the variance using frequentist approaches and switch to estimating the models via a bayesian implementation of a glm.
More info: https://stats.stackexchange.com/questions/381915/glm-for-count-data-with-all-zeroes-in-one-category
rstanrm vignette: http://mc-stan.org/rstanarm/articles/rstanarm.html
rstanrm user guide: http://dx.doi.org/10.20982/tqmp.14.2.p099
# hypothesis testing by species ---------------------------------------------
#Pot nat:
#model
bayes_pnat <- stan_glm(data = germ[Species == "Pot_nat"], Germ_yn ~ gibberellic * scarify + sc_mass + sc_lw, family = binomial(link = 'logit'), set.seed(4913))
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 2.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.376 seconds (Warm-up)
## Chain 1: 0.378 seconds (Sampling)
## Chain 1: 0.754 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1.7e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.37 seconds (Warm-up)
## Chain 2: 0.396 seconds (Sampling)
## Chain 2: 0.766 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1.5e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.322 seconds (Warm-up)
## Chain 3: 0.405 seconds (Sampling)
## Chain 3: 0.727 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1.6e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.391 seconds (Warm-up)
## Chain 4: 0.365 seconds (Sampling)
## Chain 4: 0.756 seconds (Total)
## Chain 4:
#output
summary(bayes_pnat)
##
## Model Info:
## function: stan_glm
## family: binomial [logit]
## formula: Germ_yn ~ gibberellic * scarify + sc_mass + sc_lw
## algorithm: sampling
## sample: 4000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 160
## predictors: 6
##
## Estimates:
## mean sd 10% 50% 90%
## (Intercept) -5.8 1.7 -8.1 -5.6 -3.8
## gibberellicTRUE 1.5 1.9 -0.9 1.4 4.0
## scarifyTRUE 4.3 1.8 2.3 4.1 6.7
## sc_mass 0.5 0.3 0.1 0.5 0.8
## sc_lw 0.3 0.2 0.0 0.3 0.6
## gibberellicTRUE:scarifyTRUE -1.1 2.0 -3.6 -1.1 1.4
##
## Fit Diagnostics:
## mean sd 10% 50% 90%
## mean_PPD 0.1 0.0 0.1 0.1 0.2
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## (Intercept) 0.1 1.0 985
## gibberellicTRUE 0.1 1.0 1034
## scarifyTRUE 0.1 1.0 1012
## sc_mass 0.0 1.0 3085
## sc_lw 0.0 1.0 3215
## gibberellicTRUE:scarifyTRUE 0.1 1.0 1058
## mean_PPD 0.0 1.0 4099
## log-posterior 0.1 1.0 1331
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
#visualize results on data scale
plot_model(bayes_pnat, prob = 0.5, prob_outer = 0.95, transform = 'plogis', axis.lim = c(0,1), title = "Potamogeton natans")
#visualize on model scale (log-odds)
plot_model(bayes_pnat, prob = 0.5, prob_outer = 0.95, title = "Potamogeton natans", colors = "black", transform = NULL,
axis.labels = c("GA3 + Scarification", "Length:Width", "Mass", "Scarify", "GA3"))
#print model results to table:
ci.table <- cbind(posterior_interval(bayes_pnat, prob = 0.50),posterior_interval(bayes_pnat, prob = 0.95))
bayes_germ_ci_tab <- data.table(species = "Potamogeton natans",
parameter = rownames(ci.table),
est = bayes_pnat$coefficients,
ci.table
)
bayes_germ_ci_tab_master <- bayes_germ_ci_tab
#Pot ill
bayes_pill <- stan_glm(data = germ[Species == "Pot_ill"], Germ_yn ~ gibberellic * scarify + sc_mass + sc_lw , family = binomial(link = 'logit'), set.seed(4913))
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 1.6e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.392 seconds (Warm-up)
## Chain 1: 0.341 seconds (Sampling)
## Chain 1: 0.733 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.411 seconds (Warm-up)
## Chain 2: 0.384 seconds (Sampling)
## Chain 2: 0.795 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1.3e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.391 seconds (Warm-up)
## Chain 3: 0.39 seconds (Sampling)
## Chain 3: 0.781 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1.2e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.346 seconds (Warm-up)
## Chain 4: 0.38 seconds (Sampling)
## Chain 4: 0.726 seconds (Total)
## Chain 4:
summary(bayes_pill)
##
## Model Info:
## function: stan_glm
## family: binomial [logit]
## formula: Germ_yn ~ gibberellic * scarify + sc_mass + sc_lw
## algorithm: sampling
## sample: 4000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 160
## predictors: 6
##
## Estimates:
## mean sd 10% 50% 90%
## (Intercept) -6.5 2.0 -9.2 -6.3 -4.1
## gibberellicTRUE -0.4 2.7 -3.9 -0.5 2.9
## scarifyTRUE 4.4 2.1 1.9 4.2 7.2
## sc_mass 0.4 0.4 0.0 0.4 0.9
## sc_lw 0.1 0.3 -0.4 0.1 0.5
## gibberellicTRUE:scarifyTRUE 1.9 2.7 -1.5 1.9 5.4
##
## Fit Diagnostics:
## mean sd 10% 50% 90%
## mean_PPD 0.1 0.0 0.1 0.1 0.2
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## (Intercept) 0.1 1.0 1358
## gibberellicTRUE 0.1 1.0 1283
## scarifyTRUE 0.1 1.0 1382
## sc_mass 0.0 1.0 2253
## sc_lw 0.0 1.0 2155
## gibberellicTRUE:scarifyTRUE 0.1 1.0 1316
## mean_PPD 0.0 1.0 4239
## log-posterior 0.1 1.0 1391
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
#visualize model results
plot_model(bayes_pill, title = "Potamogeton illinoensis", colors = "black", transform = NULL)
plot_model(bayes_pill, title = "Potamogeton illinoensis", colors = "black", transform = "exp", ci.style = "bar")
plot_model(bayes_pill, prob = 0.5, prob_outer = 0.95, transform = 'plogis', axis.lim = c(0,1), title = "Potamogeton illinoensis",vline = 0.5, vline.color = "black")
#print model results to table:
ci.table <- cbind(posterior_interval(bayes_pill, prob = 0.50),posterior_interval(bayes_pill, prob = 0.95))
bayes_germ_ci_tab <- data.table(species = "Potamogeton illinoensis",
parameter = rownames(ci.table),
est = bayes_pill$coefficients,
ci.table
)
bayes_germ_ci_tab_master <- rbind(bayes_germ_ci_tab_master, bayes_germ_ci_tab)
we have decided that while we can estimate the effects for N var, our lack of overall germ should be used to indicate that it’s not a real good idea to use these data to estimate trt or trait effects..
#nup var
#we have decided that while we can estimate the efects for N var, our lack of overall germ should be
# bayes_nvar <- stan_glm(Germ_yn ~ gibberellic * scarify + sc_mass + sc_lw, data = germ[Species == "Nup_var"], family = binomial(link = 'logit'), set.seed(4913))
# summary(bayes_nvar) # no sig trt effects
# plot(bayes_nvar)
# plot_model(bayes_nvar, type = 'est', prob = 0.5, prob_outer = 0.5,
# title = "Nuphar variegata",
# colors = "black", transform = NULL)
# plot_model(bayes_nvar, prob = 0.5, prob_outer = 0.95, transform = 'plogis', axis.lim = c(0,1), title = "Nuphar variegata")
#
#
# #print model results to table:
# ci.table <- cbind(posterior_interval(bayes_nvar, prob = 0.50),posterior_interval(bayes_nvar, prob = 0.95))
# bayes_germ_ci_tab <- data.table(species = "Nuphar variegata",
# parameter = rownames(ci95),
# est = bayes_nvar$coefficients,
# ci.table
# )
#
# bayes_germ_ci_tab_master <- rbind(bayes_germ_ci_tab_master, bayes_germ_ci_tab)
Because Brasenia sample includes no germ in any treatment or set of traits, this model will not run. There’s nothing to estimate on.
# bra schr
# Brasenia sample includes no germ--this model will not run
# bayes_bsch <- stan_glm(Germ_yn ~ gibberellic * scarify + sc_mass + sc_lw , data = germ[Species == "Bra_sch"], family = binomial(link = 'logit'), set.seed(4913)) # all zeros = not gonna work
# Write CI table to file
# write.csv(bayes_germ_ci_tab_master, file = "bayes_germ_ci_table.csv")
# Figure 2: Predictors of germination ---------------------------------------------------------
bayes_germ_ci_tab_master[ , parameter := factor(parameter, levels = c(rev(unique(bayes_germ_ci_tab_master$parameter)))) ,]
bayes_germ_ci_tab_master[ , species := factor(species, levels = c(unique(bayes_germ_ci_tab_master$species))) ,]
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE:scarifyTRUE", letter := c(" A", "B") ]
Figure2 <- ggplot(bayes_germ_ci_tab_master[parameter != "(Intercept)"], aes( parameter, est ))+
geom_crossbar(aes(ymax = bayes_germ_ci_tab_master[parameter != "(Intercept)"]$'75%',
ymin = bayes_germ_ci_tab_master[parameter != "(Intercept)"]$'25%'),
fill = "gray",
fatten = 1.5,
width = .5)+
geom_linerange(aes(ymax = bayes_germ_ci_tab_master[parameter != "(Intercept)"]$'97.5%',
ymin = bayes_germ_ci_tab_master[parameter != "(Intercept)"]$'2.5%'), orientation = "x")+
facet_wrap(~species)+
geom_vline(xintercept = 0)+
geom_hline(yintercept = 0)+
ylab("Log-Odds")+
xlab(NULL)+
scale_x_discrete(labels=c("GA3:Scarification", "Length/Width Ratio", "Mass", "Scarify", "GA3", "Intercept"))+
theme(axis.text.x = element_text())+
theme(
strip.text.x = element_text(
size = 12, face = "italic"
))+
theme(
strip.background = element_rect(
color= "black", fill= "white", size=.5, linetype="solid"
)
)+
ylim(c(-13,10))+
# geom_text(aes(label = letter), vjust = -0.1, hjust = 3, size = 15)+
coord_flip()
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## â„ą Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Figure2
# # # Customizing the output
# tiff(filename = "Figure_2.tiff", # File name
# width = 7, height = 3.25, units = "in", # Width and height in inches
# # bg = "white", # Background color
# # colormodel = "cmyk", # Color model (cmyk is required for most publications)
# # paper = "A4"
# # pointsize = 12,
# compression = c("lzw"),
# res = 300, family = "", restoreConsole = TRUE,
# type = "cairo"
# )
#
# # Creating a plot
# Figure2
#
# # Closing the graphical device
# dev.off()
# bayesian model diagnostics ----------------------------------------------
We follow https://mc-stan.org/users/documentation/case-studies/rstan_workflow.html for diagnostics:
# launch_shinystan(bayes_pnat)
# launch_shinystan(bayes_pill)
# launch_shinystan(bayes_nvar)
# Figure 1: trait distributions -----------------------------------------------------
Explore distribution of traits and plot density plots for Fig 1:
#visualize some trait sets
ggplot(germ, aes(x = Mass_grams, y = Species)) + geom_density_ridges()
## Picking joint bandwidth of 0.00153
#outlier excluded viz...
ggplot(germ[Area < 4000000], aes(x = Area, y = Species)) + geom_density_ridges()
## Picking joint bandwidth of 79900
ggplot(germ, aes(x = lw_ratio, y = Species)) + geom_density_ridges()
## Picking joint bandwidth of 0.0296
ggplot(germ, aes(x = Red_mean, y = Species)) + geom_density_ridges()
## Picking joint bandwidth of 4.51
# subset, rearrange, rename and plot
names(germ)[c(18:22,25,28,36)]
## [1] "Mass_grams" "Area" "Major_axis" "Minor_axis" "Blue_mean"
## [6] "Green_mean" "Red_mean" "lw_ratio"
plot_dat <- melt(germ[Area< 4000000 & Green_mean < 120 , ,], id.vars = "Species", measure.vars = names(germ)[c(18:22,25,28,36)], variable.name = "trait", value.name = "value")
plot_dat <- plot_dat[trait %in% c("Mass_grams", "lw_ratio", "Major_axis", "Minor_axis")]
plot_dat[, trait := factor(trait, levels = c("Mass_grams", "lw_ratio", "Major_axis", "Minor_axis")) ]
plot_dat[trait == "Mass_grams" , trait := "Mass (g)"]
plot_dat[trait == "lw_ratio" , trait := "Length/Width Ratio"]
plot_dat[trait %in% c("Major_axis", "Minor_axis"), value := value/1000]
plot_dat[trait == "Major_axis" , trait := "Length (mm)"]
plot_dat[trait == "Minor_axis" , trait := "Width (mm)"]
Figure1 <- ggplot(plot_dat[], aes(value, Species))+
geom_density_ridges(aes( fill = Species))+
facet_wrap(~trait, scales = "free" , ncol = 2)+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
legend.text=element_text(face = "italic"))+
xlab("")+
ylab("")+
scale_fill_discrete(position = "top", limits=c("Bra_sch", "Nup_var", "Pot_ill", "Pot_nat" ),
labels = c("Brasenia schreberi", "Nuphar variegata", "Potamogeton illinoiensis", "Potamogeton natans"))
Figure1
## Picking joint bandwidth of 0.00154
## Picking joint bandwidth of 0.0295
## Picking joint bandwidth of 0.0443
## Picking joint bandwidth of 0.033
# # # Customizing the output
# tiff(filename = "Figure_1.tiff", # File name
# width = 6, height = 5, units = "in", # Width and height in inches
# # bg = "white", # Background color
# # colormodel = "cmyk", # Color model (cmyk is required for most publications)
# # paper = "A4"
# # pointsize = 12,
# compression = c("lzw"),
# res = 300, family = "", restoreConsole = TRUE,
# type = "cairo"
# )
#
# # Creating a plot
# Figure1
#
# # Closing the graphical device
# dev.off()
# Table 1: Corrected germinability -----------------------------------------
# extract viability
bayes_species_viab_ci_tab
## 2.5% 97.5% species mean maxgerm germ_of_viab
## <num> <num> <char> <num> <num> <num>
## 1: 0.62 0.91 Potamogeton natans 0.79 0.28 0.35
## 2: 0.24 0.62 Potamogeton illinoensis 0.42 0.35 0.83
## 3: 0.72 0.98 Nuphar variegata 0.90 0.03 0.03
## 4: 0.36 0.71 Brasenia schreberi 0.53 0.00 0.00
# extract germinability cis on transformed scales
#here it is important to remember that we are using the model to estimate a credible interval and the point estimate here because we're after the partial effect (after accounting for the trait effects). The bayesian point estimate is less useful to us, as we can simply use the
bayes_germ_ci_tab_master
## species parameter est 25%
## <fctr> <fctr> <num> <num>
## 1: Potamogeton natans (Intercept) -5.60835307 -6.8217758
## 2: Potamogeton natans gibberellicTRUE 1.44321383 0.2749200
## 3: Potamogeton natans scarifyTRUE 4.12759675 3.0812064
## 4: Potamogeton natans sc_mass 0.45339293 0.2725358
## 5: Potamogeton natans sc_lw 0.28550657 0.1146235
## 6: Potamogeton natans gibberellicTRUE:scarifyTRUE -1.05606005 -2.3378816
## 7: Potamogeton illinoensis (Intercept) -6.29788332 -7.7302957
## 8: Potamogeton illinoensis gibberellicTRUE -0.45109814 -2.0785839
## 9: Potamogeton illinoensis scarifyTRUE 4.17454144 2.8432399
## 10: Potamogeton illinoensis sc_mass 0.39427984 0.1638485
## 11: Potamogeton illinoensis sc_lw 0.07394702 -0.1572737
## 12: Potamogeton illinoensis gibberellicTRUE:scarifyTRUE 1.89233212 0.1058621
## 75% 2.5% 97.5% letter
## <num> <num> <num> <char>
## 1: -4.5775722 -9.83783775 -3.1794283 <NA>
## 2: 2.7227467 -1.99650836 5.4744456 <NA>
## 3: 5.3593130 1.53659670 8.3991912 <NA>
## 4: 0.6318741 -0.06959904 0.9791939 <NA>
## 5: 0.4464154 -0.17967632 0.7508092 <NA>
## 6: 0.2293743 -5.17508339 2.6070748 A
## 7: -4.9729974 -11.14469168 -3.2790112 <NA>
## 8: 1.3527608 -6.01306991 4.8557251 <NA>
## 9: 5.6293201 0.96720276 9.0511867 <NA>
## 10: 0.6458118 -0.31634603 1.1133816 <NA>
## 11: 0.3022512 -0.61914175 0.7155767 <NA>
## 12: 3.6211811 -3.28180423 7.4331443 B
bayes_germ_ci_tab_master <- cbind(bayes_germ_ci_tab_master[], exp(bayes_germ_ci_tab_master[, 3:7,]))
setnames(bayes_germ_ci_tab_master, old = c(9:13), new = c("est_exp", "25%_exp", "75%_exp", "2.5%_exp", "97.5%_exp") )
#germ - means & credible interval estimates
#GA3 mean
bayes_species_viab_ci_tab[ ,
GA3 := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE" ,est]),
0, 0) ,]
#GA3 2.5%
bayes_species_viab_ci_tab[ ,
GA3_2.5 := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE" ,`2.5%`]),
0, 0) ,]
#GA3 97.5%
bayes_species_viab_ci_tab[ ,
GA3_97.5 := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE" ,`97.5%`]),
0, 0) ,]
#SCARIFY mean
bayes_species_viab_ci_tab[ ,
SCARIFY := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "scarifyTRUE" ,est]),
0, 0) ,]
#SCARIFY 2.5%
bayes_species_viab_ci_tab[ ,
SCARIFY_2.5 := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "scarifyTRUE" ,`2.5%`]),
0, 0) ,]
#SCARIFY 97.5%
bayes_species_viab_ci_tab[ ,
SCARIFY_97.5 := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "scarifyTRUE" ,`97.5%`]),
0, 0) ,]
#GA3:SCARIFY mean
bayes_species_viab_ci_tab[ ,
GA3_SC := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE" ,est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE:scarifyTRUE" ,est]),
0, 0) ,]
#GA3:SCARIFY 2.5%
bayes_species_viab_ci_tab[ ,
GA3_SC_2.5 := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE" , est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE:scarifyTRUE" ,`2.5%`]),
0, 0) ,]
#GA3:SCARIFY 97.5%
bayes_species_viab_ci_tab[ ,
GA3_SC_97.5 := c( plogis(bayes_germ_ci_tab_master[ parameter == "(Intercept)" ,est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE" , est]+
bayes_germ_ci_tab_master[ parameter == "gibberellicTRUE:scarifyTRUE", `97.5%`]),
0, 0) ,]
#write to table:
# fwrite(bayes_species_viab_ci_tab, file = "scripts&data/data/output/results_cis.csv")
# calc corrected germination
#viability in bayesian framework:
#reorder species list
germ[ , .N , Species]
## Species N
## <fctr> <int>
## 1: Nup_var 160
## 2: Pot_ill 160
## 3: Pot_nat 160
## 4: Bra_sch 160
germ[, Species := factor(Species, levels = c('Pot_nat', 'Pot_ill', 'Nup_var', 'Bra_sch'))]
#write model
bayes_species_germ <- stan_glm(Germ_yn ~ Species - 1 , data = germ[Species %in% c('Pot_ill','Pot_nat') & Treatment == "GA", ,], family = binomial(), set.seed(4913))
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 1.1e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.087 seconds (Warm-up)
## Chain 1: 0.166 seconds (Sampling)
## Chain 1: 0.253 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 9e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.105 seconds (Warm-up)
## Chain 2: 0.15 seconds (Sampling)
## Chain 2: 0.255 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 8e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.085 seconds (Warm-up)
## Chain 3: 0.077 seconds (Sampling)
## Chain 3: 0.162 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 9e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.079 seconds (Warm-up)
## Chain 4: 0.061 seconds (Sampling)
## Chain 4: 0.14 seconds (Total)
## Chain 4:
summary(bayes_species_germ)
##
## Model Info:
## function: stan_glm
## family: binomial [logit]
## formula: Germ_yn ~ Species - 1
## algorithm: sampling
## sample: 4000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 80
## predictors: 2
##
## Estimates:
## mean sd 10% 50% 90%
## SpeciesPot_nat -3.9 1.1 -5.3 -3.8 -2.6
## SpeciesPot_ill -6.8 2.6 -10.4 -6.3 -3.9
##
## Fit Diagnostics:
## mean sd 10% 50% 90%
## mean_PPD 0.0 0.0 0.0 0.0 0.0
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## SpeciesPot_nat 0.0 1.0 1360
## SpeciesPot_ill 0.1 1.0 1017
## mean_PPD 0.0 1.0 2748
## log-posterior 0.0 1.0 1167
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
#results in fig
plot_model(bayes_species_germ,
bpe = "median",
prob.inner = 0.5,
prob.outer = 0.95,
transform = 'plogis',
show.values = T,
title = "", axis.title = c("Proportion germ", NULL),
axis.lim = c(0,1),
# axis.labels = c(" ", " ", " "," "),
colors = "bw",
text = element_text(size = 14, color = "black"))
#print model results to table:
ci95 <- plogis(posterior_interval(bayes_species_germ, prob = 0.95))
bayes_2species_germ_ci_tab <- data.table(round(ci95, 2))
bayes_2species_germ_ci_tab[ , species := c("Potamogeton natans","Potamogeton illinoensis") , ]
bayes_2species_germ_ci_tab[ , mean_v := round(plogis(bayes_species_viab$coefficients),2)[1:2] , ]
bayes_2species_germ_ci_tab[, maxgerm := round(c(1/40, 0/40),2)] #GA and Scarif rates
bayes_2species_germ_ci_tab[, germ_of_viab := round(maxgerm/mean_v, 2)]
#view simple results output
bayes_species_viab_ci_tab
## 2.5% 97.5% species mean maxgerm germ_of_viab GA3
## <num> <num> <char> <num> <num> <num> <num>
## 1: 0.62 0.91 Potamogeton natans 0.79 0.28 0.35 0.015290134
## 2: 0.24 0.62 Potamogeton illinoensis 0.42 0.35 0.83 0.001170701
## 3: 0.72 0.98 Nuphar variegata 0.90 0.03 0.03 0.000000000
## 4: 0.36 0.71 Brasenia schreberi 0.53 0.00 0.00 0.000000000
## GA3_2.5 GA3_97.5 SCARIFY SCARIFY_2.5 SCARIFY_97.5 GA3_SC
## <num> <num> <num> <num> <num> <num>
## 1: 4.977765e-04 0.4665731 0.1853132 0.016761677 0.9421787 0.005371836
## 2: 4.502140e-06 0.1912114 0.1068487 0.004817454 0.9400996 0.007716489
## 3: 0.000000e+00 0.0000000 0.0000000 0.000000000 0.0000000 0.000000000
## 4: 0.000000e+00 0.0000000 0.0000000 0.000000000 0.0000000 0.000000000
## GA3_SC_2.5 GA3_SC_97.5
## <num> <num>
## 1: 8.781217e-05 0.1739246
## 2: 4.402162e-05 0.6646672
## 3: 0.000000e+00 0.0000000
## 4: 0.000000e+00 0.0000000
# footer ------------------------------------------------------------------