Final Breeding Behavior Analysis

Amaya Thomas, John Fieberg

2025-11-10


Cleaning the data

#Excluding data from 7/1 -- didn't follow same protocols
bb.dat <- filter(bb.dat, Date != '7/1/2023')

#Excluding data from cage 3 -- contained 2 males
bb.dat <- filter(bb.dat, Cage.ID != 3)

#Excluding data from 7/7 in cage 2 after 11:13AM -- contained 1 female with 2 males
bb.dat <- bb.dat %>%
  filter(Date != '7/7/2023' | Cage.ID != 2 | Time < 0.4673611)

Restructuring the data to work with a count-based regression
model

#Coding breeding behaviors as 1 and non-breeding behaviors as 0
i = c('J',  'WF', 'M', 'V')

for (x in 1:length(i)) {
bb.dat$BehaviorNum[bb.dat$Behavior == i[x]] = 1
}

i = c('perched', 'feeding', 'flying', 'nectaring', 'ovo')

for (x in 1:length(i)) {
bb.dat$BehaviorNum[bb.dat$Behavior == i[x]] = 0 
}

#Removing the covariates not needed
new.bb.dat <- subset(bb.dat, select = -c(Light.outside, Temp.outside, Temp.cage))

#Converting time column from character to time
new.bb.dat$Time <- as_hms(new.bb.dat$Time)
str(new.bb.dat)

## 'data.frame':    5132 obs. of  11 variables:
##  $ Date             : chr  "7/2/2023" "7/2/2023" "7/2/2023" "7/2/2023" ...
##  $ Cage.ID          : chr  "1" "1" "1" "1" ...
##  $ Individual.ID    : chr  "21.3.10.338" "21.3.10.338" "21.3.10.338" "21.3.10.338" ...
##  $ Sex              : chr  "M" "M" "M" "M" ...
##  $ Behavior         : chr  "perched" "J" "nectaring" "nectaring" ...
##  $ Time             : 'hms' num  10:40:00 10:41:00 10:45:00 10:50:00 ...
##   ..- attr(*, "units")= chr "secs"
##  $ Light.cage       : int  55890 56074 55562 49644 39219 37468 38482 39485 42025 43930 ...
##  $ Temp.wunderground: num  30.3 30.3 29.6 29.4 29.1 ...
##  $ Eclosion.Date    : chr  "7/1/2023" "7/1/2023" "7/1/2023" "7/1/2023" ...
##  $ Notes            : chr  "" "" "" "" ...
##  $ BehaviorNum      : num  0 1 0 0 0 0 0 0 0 0 ...

#Rounding every time to nearest five minutes
new.bb.dat <- new.bb.dat |>
  mutate(five_min = trunc_hms(Time, 300))

#Adding columns for the average SI and temperature for each five minute interval
new.bb.dat <- new.bb.dat |>
  group_by(Individual.ID, five_min, Date) |>
  mutate(av.si = mean(Light.cage),
         av.temp = mean(Temp.wunderground))

#Adding a column for the total number of breeding behaviors observed in each five minute interval for each individual
new.bb.dat <- new.bb.dat |>
  group_by(Individual.ID, five_min, Date) |>
  mutate(total.bb = sum(BehaviorNum)) |>
  ungroup()

#Taking only the columns needed from new.bb.dat for analysis to simplify the data frame
new.bb.dat2 <- subset(new.bb.dat, select = -c(Cage.ID, Behavior, Light.cage, Temp.wunderground, Notes, BehaviorNum))

Fitting a negative binomial and poisson model. AIC strongly favors
the negative binomial model. The overdispersion parameter (theta) for
the negative binomial model is also small, which suggests the data are
overdispersed since the variance = mean + mean^2/theta (when theta is
large, the negative binomial model will look more like the Poisson)

# Note:  I divided av.si by 1000 to re-scale this variable to improve numerical
# stability (alternatively, you could center and scale it before fitting). 

#Negative binomial
mod1 <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data = new.bb.dat2, family=nbinom2)
summary(mod1)

##  Family: nbinom2  ( log )
## Formula:          
## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID)
## Data: new.bb.dat2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1132.1   1171.3   -560.0   1120.1     5126 
## 
## Random effects:
## 
## Conditional model:
##  Groups        Name        Variance Std.Dev.
##  Individual.ID (Intercept) 2.466    1.57    
## Number of obs: 5132, groups:  Individual.ID, 25
## 
## Dispersion parameter for nbinom2 family (): 0.405 
## 
## Conditional model:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -4.839408   0.419043 -11.549  < 2e-16 ***
## I(av.si/1000)       0.027430   0.006093   4.502 6.73e-06 ***
## poly(av.temp, 2)1 -26.912980  11.429931  -2.355   0.0185 *  
## poly(av.temp, 2)2  -5.691185   9.349284  -0.609   0.5427    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Poisson
mod2 <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data=new.bb.dat2, family=poisson)
summary(mod2)

##  Family: poisson  ( log )
## Formula:          
## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID)
## Data: new.bb.dat2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1144.7   1177.4   -567.3   1134.7     5127 
## 
## Random effects:
## 
## Conditional model:
##  Groups        Name        Variance Std.Dev.
##  Individual.ID (Intercept) 2.639    1.624   
## Number of obs: 5132, groups:  Individual.ID, 25
## 
## Conditional model:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -4.807210   0.418045 -11.499  < 2e-16 ***
## I(av.si/1000)       0.026797   0.005572   4.809 1.52e-06 ***
## poly(av.temp, 2)1 -28.251473  10.973112  -2.575    0.010 *  
## poly(av.temp, 2)2  -5.085035   8.830653  -0.576    0.565    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Compare the models using AIC
AIC(mod1, mod2)

##      df      AIC
## mod1  6 1132.075
## mod2  5 1144.695

Breeding behaviors differ for males and females, so we separated the
sexes into their own models.

#Negative binomial
mod1.m <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data = subset(new.bb.dat2, Sex == "M"), family=nbinom2)
summary(mod1.m)

##  Family: nbinom2  ( log )
## Formula:          
## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID)
## Data: subset(new.bb.dat2, Sex == "M")
## 
##      AIC      BIC   logLik deviance df.resid 
##    902.3    937.5   -445.2    890.3     2580 
## 
## Random effects:
## 
## Conditional model:
##  Groups        Name        Variance Std.Dev.
##  Individual.ID (Intercept) 1.039    1.019   
## Number of obs: 2586, groups:  Individual.ID, 14
## 
## Dispersion parameter for nbinom2 family (): 0.409 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -4.18541    0.39816 -10.512  < 2e-16 ***
## I(av.si/1000)       0.02691    0.00657   4.095 4.21e-05 ***
## poly(av.temp, 2)1 -19.14483    8.63883  -2.216   0.0267 *  
## poly(av.temp, 2)2  -4.95592    7.07763  -0.700   0.4838    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mod1.f <- glmmTMB(total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1|Individual.ID), data = subset(new.bb.dat2, Sex == "F"), family=nbinom2)
summary(mod1.f)

##  Family: nbinom2  ( log )
## Formula:          
## total.bb ~ I(av.si/1000) + poly(av.temp, 2) + (1 | Individual.ID)
## Data: subset(new.bb.dat2, Sex == "F")
## 
##      AIC      BIC   logLik deviance df.resid 
##    235.6    270.7   -111.8    223.6     2540 
## 
## Random effects:
## 
## Conditional model:
##  Groups        Name        Variance Std.Dev.
##  Individual.ID (Intercept) 3.249    1.803   
## Number of obs: 2546, groups:  Individual.ID, 11
## 
## Dispersion parameter for nbinom2 family (): 0.307 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -5.80375    0.85687  -6.773 1.26e-11 ***
## I(av.si/1000)       0.03200    0.01609   1.989   0.0468 *  
## poly(av.temp, 2)1 -11.21627   17.40897  -0.644   0.5194    
## poly(av.temp, 2)2   7.60105   16.00893   0.475   0.6349    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Model diagnostic checking
check_model(mod1.m)

## `check_outliers()` does not yet support models of class `glmmTMB`.


check_model(mod1.f)

## `check_outliers()` does not yet support models of class `glmmTMB`.


#Model diagnostic checking with the DHARMa package
res.m <- simulate_residuals(mod1.m)
plotResiduals(res.m)


plotQQunif(res.m)


res.f <- simulate_residuals(mod1.f)
plotResiduals(res.f)


plotQQunif(res.f)

## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details


#Creating plots for the models

library(ggeffects)

## Warning: package 'ggeffects' was built under R version 4.4.2

#Male plots
m_plot <- plot(ggeffect(mod1.m),
               colors = c("blue"), show_title = FALSE)

## You are calculating adjusted predictions on the population-level (i.e.
##   `type = "fixed"`) for a *generalized* linear mixed model.
##   This may produce biased estimates due to Jensen's inequality. Consider
##   setting `bias_correction = TRUE` to correct for this bias.
##   See also the documentation of the `bias_correction` argument.

m_plot_si <- m_plot[[1]] +labs(
  x = "Average Sunlight Intensity (lux)",
  y = "Breeding Behavior Frequency"
) +
  ggtitle("Males") +
  scale_y_continuous(limits = c(0,0.25))

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

m_plot_temp <- m_plot[[2]] + labs(
  x = "Average Temperature (C)",
  y = "Breeding Behavior Frequency"
) +
  scale_y_continuous(limits = c(0,0.4))

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

#Female plots
f_plot <- plot(ggeffect(mod1.f),
               colors = c("red"), show_title = FALSE)
f_plot_si <- f_plot[[1]] + labs(
  x = "Average Sunlight Intensity (lux)",
  y = ""
) +
  ggtitle("Females") +
  scale_y_continuous(limits = c(0,0.25))

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

f_plot_temp <- f_plot[[2]] + labs(
  x = "Average Temperature (C)",
  y = ""
) +
  coord_cartesian(ylim=c(0,0.4))

grid.arrange(m_plot_si, f_plot_si, m_plot_temp, f_plot_temp)


Hypothesis testing

library(car)

## Warning: package 'car' was built under R version 4.4.2

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

Anova(mod1.m)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: total.bb
##                    Chisq Df Pr(>Chisq)    
## I(av.si/1000)    16.7731  1  4.213e-05 ***
## poly(av.temp, 2)  5.0793  2    0.07889 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(mod1.f)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: total.bb
##                   Chisq Df Pr(>Chisq)  
## I(av.si/1000)    3.9542  1    0.04675 *
## poly(av.temp, 2) 1.0777  2    0.58342  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Age analyses

#Need to first add age column to data frame
new.bb.dat3 <- new.bb.dat2
new.bb.dat3$Date <- as.Date(new.bb.dat3$Date, format="%m/%d/%Y")
new.bb.dat3$Eclosion.Date <- as.Date(new.bb.dat3$Eclosion.Date, format="%m/%d/%Y")
new.bb.dat3$Date <- format(new.bb.dat3$Date, "%j")
new.bb.dat3$Eclosion.Date <- format(new.bb.dat3$Eclosion.Date, "%j")

new.bb.dat3$Date <- as.numeric(new.bb.dat3$Date)
new.bb.dat3$Eclosion.Date <- as.numeric(new.bb.dat3$Eclosion.Date)

new.bb.dat3$Age <- (new.bb.dat3$Date - new.bb.dat3$Eclosion.Date)

Calculating and plotting the proportion of individuals (of each age
and sex category) that exhibit at least 1 breeding behavior

library(binom)

## Warning: package 'binom' was built under R version 4.4.3

agesum <- new.bb.dat3 %>%
  group_by(Sex, Age) %>%
  summarise(
    n = n(),
    
    # proportion with >=1 behavior
    prop_with_behavior = mean(total.bb > 0),
    se_prop = sqrt(prop_with_behavior * (1 - prop_with_behavior) / n),
   
     # Wilson confidence interval for proportions (these are better than the 
     # simple Normal-based confidence intervals from Biometry)
    ci = list(binom.confint(x = sum(total.bb > 0),
                            n = n(),
                            methods = "wilson")),
    
    # mean number of behaviors
    mean_behaviors = mean(total.bb),
    se_mean = sd(total.bb) / sqrt(n),
    t_crit = qt(0.975, df = n - 1),
    ci_low_mean = mean_behaviors - t_crit * se_mean,
    ci_high_mean = mean_behaviors + t_crit * se_mean,
    
    .groups = "drop"
  ) %>%
  tidyr::unnest_wider(ci, names_sep = "_") %>%
  select(Sex, Age, n,
         prop_with_behavior, ci_lower, ci_upper,
         mean_behaviors, ci_low_mean, ci_high_mean)

# Means plot
sex_labels <- c(
  "F" = "Females",
  "M" = "Males"
)

ggplot(agesum, aes(x = Age, y = mean_behaviors)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = ci_low_mean, ymax = ci_high_mean), width = 0.2) +
  facet_wrap(~Sex, labeller = labeller(Sex = sex_labels)) +
  labs(
    y = "Mean number of breeding behaviors",
    x = "Age (days since eclosion)"
  ) +
  theme_minimal(base_size = 14) +
  scale_x_continuous(breaks = seq(0,9))


sessionInfo()

## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Chicago
## tzcode source: internal
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] binom_1.1-1.1      car_3.1-3          ggeffects_2.2.0    glmmTMB_1.1.10    
##  [5] hms_1.1.3          lubridate_1.9.3    effects_4.2-2      carData_3.0-5     
##  [9] gridExtra_2.3      performance_0.13.0 DHARMa_0.4.7       ggplot2_3.5.1     
## [13] dplyr_1.1.4        here_1.0.1         lme4_1.1-35.5      glmm_1.4.5        
## [17] doParallel_1.0.17  iterators_1.0.14   foreach_1.5.2      Matrix_1.7-0      
## [21] mvtnorm_1.3-2      trust_0.1-8       
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1    farver_2.1.2        fastmap_1.2.0      
##  [4] bayestestR_0.15.1   digest_0.6.37       timechange_0.3.0   
##  [7] estimability_1.5.1  lifecycle_1.0.4     survival_3.6-4     
## [10] magrittr_2.0.3      compiler_4.4.1      rlang_1.1.4        
## [13] sass_0.4.9          tools_4.4.1         utf8_1.2.4         
## [16] yaml_2.3.10         knitr_1.49          labeling_0.4.3     
## [19] gap.datasets_0.0.6  abind_1.4-8         purrr_1.0.2        
## [22] withr_3.0.2         numDeriv_2016.8-1.1 itertools_0.1-3    
## [25] nnet_7.3-19         grid_4.4.1          datawizard_1.0.0   
## [28] fansi_1.0.6         colorspace_2.1-1    scales_1.3.0       
## [31] MASS_7.3-60.2       insight_1.0.1       cli_3.6.3          
## [34] survey_4.4-2        rmarkdown_2.29      reformulas_0.4.0   
## [37] generics_0.1.3      rstudioapi_0.17.1   minqa_1.2.8        
## [40] DBI_1.2.3           cachem_1.1.0        splines_4.4.1      
## [43] mitools_2.4         vctrs_0.6.5         boot_1.3-30        
## [46] jsonlite_1.8.9      patchwork_1.3.0     Formula_1.2-5      
## [49] see_0.10.0          tidyr_1.3.1         jquerylib_0.1.4    
## [52] gap_1.6             glue_1.8.0          nloptr_2.1.1       
## [55] codetools_0.2-20    gtable_0.3.6        munsell_0.5.1      
## [58] tibble_3.2.1        pillar_1.9.0        htmltools_0.5.8.1  
## [61] R6_2.5.1            TMB_1.9.17          Rdpack_2.6.2       
## [64] rprojroot_2.0.4     evaluate_1.0.1      lattice_0.22-6     
## [67] rbibutils_2.3       bslib_0.8.0         Rcpp_1.0.13-1      
## [70] nlme_3.1-164        mgcv_1.9-1          xfun_0.49          
## [73] pkgconfig_2.0.3