Final Breeding Behavior Analysis Amaya Thomas, John Fieberg 2025-11-10 Cleaning the data #Excluding data from 7/1 -- didn't follow same protocols bb.dat <- filter(bb.dat, Date != '7/1/2023') #Excluding data from cage 3 -- contained 2 males bb.dat <- filter(bb.dat, Cage.ID != 3) #Excluding data from 7/7 in cage 2 after 11:13AM -- contained 1 female with 2 males bb.dat <- bb.dat %>% filter(Date != '7/7/2023' | Cage.ID != 2 | Time < 0.4673611) Restructuring the data to work with a count-based regression model #Coding breeding behaviors as 1 and non-breeding behaviors as 0 i = c('J', 'WF', 'M', 'V') for (x in 1:length(i)) { bb.dat$BehaviorNum[bb.dat$Behavior == i[x]] = 1 } i = c('perched', 'feeding', 'flying', 'nectaring', 'ovo') for (x in 1:length(i)) { bb.dat$BehaviorNum[bb.dat$Behavior == i[x]] = 0 } #Removing the covariates not needed new.bb.dat <- subset(bb.dat, select = -c(Light.outside, Temp.outside, Temp.cage)) #Converting time column from character to time new.bb.dat$Time <- as_hms(new.bb.dat$Time) str(new.bb.dat) ## 'data.frame': 5132 obs. of 11 variables: ## $ Date : chr "7/2/2023" "7/2/2023" "7/2/2023" "7/2/2023" ... ## $ Cage.ID : chr "1" "1" "1" "1" ... ## $ Individual.ID : chr "21.3.10.338" "21.3.10.338" "21.3.10.338" "21.3.10.338" ... ## $ Sex : chr "M" "M" "M" "M" ... ## $ Behavior : chr "perched" "J" "nectaring" "nectaring" ... ## $ Time : 'hms' num 10:40:00 10:41:00 10:45:00 10:50:00 ... ## ..- attr(*, "units")= chr "secs" ## $ Light.cage : int 55890 56074 55562 49644 39219 37468 38482 39485 42025 43930 ... ## $ Temp.wunderground: num 30.3 30.3 29.6 29.4 29.1 ... ## $ Eclosion.Date : chr "7/1/2023" "7/1/2023" "7/1/2023" "7/1/2023" ... ## $ Notes : chr "" "" "" "" ... ## $ BehaviorNum : num 0 1 0 0 0 0 0 0 0 0 ... #Rounding every time to nearest five minutes new.bb.dat <- new.bb.dat |> mutate(five_min = trunc_hms(Time, 300)) #Adding columns for the average SI and temperature for each five minute interval new.bb.dat <- new.bb.dat |> group_by(Individual.ID, five_min, Date) |> mutate(av.si = mean(Light.cage), av.temp = mean(Temp.wunderground)) #Adding a column for the total number of breeding behaviors observed in each five minute interval for each individual new.bb.dat <- new.bb.dat |> group_by(Individual.ID, five_min, Date) |> mutate(total.bb = sum(BehaviorNum)) |> ungroup() #Taking only the columns needed from new.bb.dat for analysis to simplify the data frame new.bb.dat2 <- subset(new.bb.dat, select = -c(Cage.ID, Behavior, Light.cage, Temp.wunderground, Notes, BehaviorNum)) Fitting a negative binomial and poisson model. AIC strongly favors the negative binomial model. The overdispersion parameter (theta) for the negative binomial model is also small, which suggests the data are overdispersed since the variance = mean + mean^2/theta (when theta is large, the negative binomial model will look more like the Poisson) # Note: I divided av.si by 1000 to re-scale this variable to improve numerical # stability (alternatively, you could center and scale it before fitting). #Negative binomial mod1 <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data = new.bb.dat2, family=nbinom2) summary(mod1) ## Family: nbinom2 ( log ) ## Formula: ## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID) ## Data: new.bb.dat2 ## ## AIC BIC logLik deviance df.resid ## 1132.1 1171.3 -560.0 1120.1 5126 ## ## Random effects: ## ## Conditional model: ## Groups Name Variance Std.Dev. ## Individual.ID (Intercept) 2.466 1.57 ## Number of obs: 5132, groups: Individual.ID, 25 ## ## Dispersion parameter for nbinom2 family (): 0.405 ## ## Conditional model: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) -4.839408 0.419043 -11.549 < 2e-16 *** ## I(av.si/1000) 0.027430 0.006093 4.502 6.73e-06 *** ## poly(av.temp, 2)1 -26.912980 11.429931 -2.355 0.0185 * ## poly(av.temp, 2)2 -5.691185 9.349284 -0.609 0.5427 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #Poisson mod2 <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data=new.bb.dat2, family=poisson) summary(mod2) ## Family: poisson ( log ) ## Formula: ## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID) ## Data: new.bb.dat2 ## ## AIC BIC logLik deviance df.resid ## 1144.7 1177.4 -567.3 1134.7 5127 ## ## Random effects: ## ## Conditional model: ## Groups Name Variance Std.Dev. ## Individual.ID (Intercept) 2.639 1.624 ## Number of obs: 5132, groups: Individual.ID, 25 ## ## Conditional model: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) -4.807210 0.418045 -11.499 < 2e-16 *** ## I(av.si/1000) 0.026797 0.005572 4.809 1.52e-06 *** ## poly(av.temp, 2)1 -28.251473 10.973112 -2.575 0.010 * ## poly(av.temp, 2)2 -5.085035 8.830653 -0.576 0.565 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # Compare the models using AIC AIC(mod1, mod2) ## df AIC ## mod1 6 1132.075 ## mod2 5 1144.695 Breeding behaviors differ for males and females, so we separated the sexes into their own models. #Negative binomial mod1.m <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data = subset(new.bb.dat2, Sex == "M"), family=nbinom2) summary(mod1.m) ## Family: nbinom2 ( log ) ## Formula: ## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID) ## Data: subset(new.bb.dat2, Sex == "M") ## ## AIC BIC logLik deviance df.resid ## 902.3 937.5 -445.2 890.3 2580 ## ## Random effects: ## ## Conditional model: ## Groups Name Variance Std.Dev. ## Individual.ID (Intercept) 1.039 1.019 ## Number of obs: 2586, groups: Individual.ID, 14 ## ## Dispersion parameter for nbinom2 family (): 0.409 ## ## Conditional model: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) -4.18541 0.39816 -10.512 < 2e-16 *** ## I(av.si/1000) 0.02691 0.00657 4.095 4.21e-05 *** ## poly(av.temp, 2)1 -19.14483 8.63883 -2.216 0.0267 * ## poly(av.temp, 2)2 -4.95592 7.07763 -0.700 0.4838 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 mod1.f <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data = subset(new.bb.dat2, Sex == "F"), family=nbinom2) summary(mod1.f) ## Family: nbinom2 ( log ) ## Formula: ## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID) ## Data: subset(new.bb.dat2, Sex == "F") ## ## AIC BIC logLik deviance df.resid ## 235.6 270.7 -111.8 223.6 2540 ## ## Random effects: ## ## Conditional model: ## Groups Name Variance Std.Dev. ## Individual.ID (Intercept) 3.249 1.803 ## Number of obs: 2546, groups: Individual.ID, 11 ## ## Dispersion parameter for nbinom2 family (): 0.307 ## ## Conditional model: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) -5.80375 0.85687 -6.773 1.26e-11 *** ## I(av.si/1000) 0.03200 0.01609 1.989 0.0468 * ## poly(av.temp, 2)1 -11.21627 17.40897 -0.644 0.5194 ## poly(av.temp, 2)2 7.60105 16.00893 0.475 0.6349 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #Model diagnostic checking check_model(mod1.m) ## `check_outliers()` does not yet support models of class `glmmTMB`. check_model(mod1.f) ## `check_outliers()` does not yet support models of class `glmmTMB`. #Model diagnostic checking with the DHARMa package res.m <- simulate_residuals(mod1.m) plotResiduals(res.m) plotQQunif(res.m) res.f <- simulate_residuals(mod1.f) plotResiduals(res.f) plotQQunif(res.f) ## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details #Creating plots for the models library(ggeffects) ## Warning: package 'ggeffects' was built under R version 4.4.2 #Male plots m_plot <- plot(ggeffect(mod1.m), colors = c("blue"), show_title = FALSE) ## You are calculating adjusted predictions on the population-level (i.e. ## `type = "fixed"`) for a *generalized* linear mixed model. ## This may produce biased estimates due to Jensen's inequality. Consider ## setting `bias_correction = TRUE` to correct for this bias. ## See also the documentation of the `bias_correction` argument. m_plot_si <- m_plot[[1]] +labs( x = "Average Sunlight Intensity (lux)", y = "Breeding Behavior Frequency" ) + ggtitle("Males") + scale_y_continuous(limits = c(0,0.25)) ## Scale for y is already present. ## Adding another scale for y, which will replace the existing scale. m_plot_temp <- m_plot[[2]] + labs( x = "Average Temperature (C)", y = "Breeding Behavior Frequency" ) + scale_y_continuous(limits = c(0,0.4)) ## Scale for y is already present. ## Adding another scale for y, which will replace the existing scale. #Female plots f_plot <- plot(ggeffect(mod1.f), colors = c("red"), show_title = FALSE) f_plot_si <- f_plot[[1]] + labs( x = "Average Sunlight Intensity (lux)", y = "" ) + ggtitle("Females") + scale_y_continuous(limits = c(0,0.25)) ## Scale for y is already present. ## Adding another scale for y, which will replace the existing scale. f_plot_temp <- f_plot[[2]] + labs( x = "Average Temperature (C)", y = "" ) + coord_cartesian(ylim=c(0,0.4)) grid.arrange(m_plot_si, f_plot_si, m_plot_temp, f_plot_temp) Hypothesis testing library(car) ## Warning: package 'car' was built under R version 4.4.2 ## ## Attaching package: 'car' ## The following object is masked from 'package:dplyr': ## ## recode Anova(mod1.m) ## Analysis of Deviance Table (Type II Wald chisquare tests) ## ## Response: total.bb ## Chisq Df Pr(>Chisq) ## I(av.si/1000) 16.7731 1 4.213e-05 *** ## poly(av.temp, 2) 5.0793 2 0.07889 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Anova(mod1.f) ## Analysis of Deviance Table (Type II Wald chisquare tests) ## ## Response: total.bb ## Chisq Df Pr(>Chisq) ## I(av.si/1000) 3.9542 1 0.04675 * ## poly(av.temp, 2) 1.0777 2 0.58342 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Age analyses #Need to first add age column to data frame new.bb.dat3 <- new.bb.dat2 new.bb.dat3$Date <- as.Date(new.bb.dat3$Date, format="%m/%d/%Y") new.bb.dat3$Eclosion.Date <- as.Date(new.bb.dat3$Eclosion.Date, format="%m/%d/%Y") new.bb.dat3$Date <- format(new.bb.dat3$Date, "%j") new.bb.dat3$Eclosion.Date <- format(new.bb.dat3$Eclosion.Date, "%j") new.bb.dat3$Date <- as.numeric(new.bb.dat3$Date) new.bb.dat3$Eclosion.Date <- as.numeric(new.bb.dat3$Eclosion.Date) new.bb.dat3$Age <- (new.bb.dat3$Date - new.bb.dat3$Eclosion.Date) Calculating and plotting the proportion of individuals (of each age and sex category) that exhibit at least 1 breeding behavior library(binom) ## Warning: package 'binom' was built under R version 4.4.3 agesum <- new.bb.dat3 %>% group_by(Sex, Age) %>% summarise( n = n(), # proportion with >=1 behavior prop_with_behavior = mean(total.bb > 0), se_prop = sqrt(prop_with_behavior * (1 - prop_with_behavior) / n), # Wilson confidence interval for proportions (these are better than the # simple Normal-based confidence intervals from Biometry) ci = list(binom.confint(x = sum(total.bb > 0), n = n(), methods = "wilson")), # mean number of behaviors mean_behaviors = mean(total.bb), se_mean = sd(total.bb) / sqrt(n), t_crit = qt(0.975, df = n - 1), ci_low_mean = mean_behaviors - t_crit * se_mean, ci_high_mean = mean_behaviors + t_crit * se_mean, .groups = "drop" ) %>% tidyr::unnest_wider(ci, names_sep = "_") %>% select(Sex, Age, n, prop_with_behavior, ci_lower, ci_upper, mean_behaviors, ci_low_mean, ci_high_mean) # Means plot sex_labels <- c( "F" = "Females", "M" = "Males" ) ggplot(agesum, aes(x = Age, y = mean_behaviors)) + geom_point(size = 3) + geom_errorbar(aes(ymin = ci_low_mean, ymax = ci_high_mean), width = 0.2) + facet_wrap(~Sex, labeller = labeller(Sex = sex_labels)) + labs( y = "Mean number of breeding behaviors", x = "Age (days since eclosion)" ) + theme_minimal(base_size = 14) + scale_x_continuous(breaks = seq(0,9)) sessionInfo() ## R version 4.4.1 (2024-06-14 ucrt) ## Platform: x86_64-w64-mingw32/x64 ## Running under: Windows 11 x64 (build 22631) ## ## Matrix products: default ## ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 ## [2] LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 ## [4] LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## time zone: America/Chicago ## tzcode source: internal ## ## attached base packages: ## [1] parallel stats graphics grDevices utils datasets methods ## [8] base ## ## other attached packages: ## [1] binom_1.1-1.1 car_3.1-3 ggeffects_2.2.0 glmmTMB_1.1.10 ## [5] hms_1.1.3 lubridate_1.9.3 effects_4.2-2 carData_3.0-5 ## [9] gridExtra_2.3 performance_0.13.0 DHARMa_0.4.7 ggplot2_3.5.1 ## [13] dplyr_1.1.4 here_1.0.1 lme4_1.1-35.5 glmm_1.4.5 ## [17] doParallel_1.0.17 iterators_1.0.14 foreach_1.5.2 Matrix_1.7-0 ## [21] mvtnorm_1.3-2 trust_0.1-8 ## ## loaded via a namespace (and not attached): ## [1] tidyselect_1.2.1 farver_2.1.2 fastmap_1.2.0 ## [4] bayestestR_0.15.1 digest_0.6.37 timechange_0.3.0 ## [7] estimability_1.5.1 lifecycle_1.0.4 survival_3.6-4 ## [10] magrittr_2.0.3 compiler_4.4.1 rlang_1.1.4 ## [13] sass_0.4.9 tools_4.4.1 utf8_1.2.4 ## [16] yaml_2.3.10 knitr_1.49 labeling_0.4.3 ## [19] gap.datasets_0.0.6 abind_1.4-8 purrr_1.0.2 ## [22] withr_3.0.2 numDeriv_2016.8-1.1 itertools_0.1-3 ## [25] nnet_7.3-19 grid_4.4.1 datawizard_1.0.0 ## [28] fansi_1.0.6 colorspace_2.1-1 scales_1.3.0 ## [31] MASS_7.3-60.2 insight_1.0.1 cli_3.6.3 ## [34] survey_4.4-2 rmarkdown_2.29 reformulas_0.4.0 ## [37] generics_0.1.3 rstudioapi_0.17.1 minqa_1.2.8 ## [40] DBI_1.2.3 cachem_1.1.0 splines_4.4.1 ## [43] mitools_2.4 vctrs_0.6.5 boot_1.3-30 ## [46] jsonlite_1.8.9 patchwork_1.3.0 Formula_1.2-5 ## [49] see_0.10.0 tidyr_1.3.1 jquerylib_0.1.4 ## [52] gap_1.6 glue_1.8.0 nloptr_2.1.1 ## [55] codetools_0.2-20 gtable_0.3.6 munsell_0.5.1 ## [58] tibble_3.2.1 pillar_1.9.0 htmltools_0.5.8.1 ## [61] R6_2.5.1 TMB_1.9.17 Rdpack_2.6.2 ## [64] rprojroot_2.0.4 evaluate_1.0.1 lattice_0.22-6 ## [67] rbibutils_2.3 bslib_0.8.0 Rcpp_1.0.13-1 ## [70] nlme_3.1-164 mgcv_1.9-1 xfun_0.49 ## [73] pkgconfig_2.0.3