• Preparation
    • Data Sets
    • Long Format
  • Statistical Analyses in the Paper
    • Transcriptions
    • Listener Ratings
    • Acoustic Analysis - Automated Measures
    • Relationship between automated and manual acoustic measures
    • Relationship between acoustic measures and narrow transcriptions
    • Relationship between acoustic analysis and VAS listener ratings
    • Robustness of Contrast
  • Session Info

Preparation

Load packages

library(tidyverse)
library(cowplot)
library(ggpubr)
library(ggdist)
library(lme4)
library(MuMIn)
library(lmerTest)
library(car)

Data Sets

Read in the two main dataframes:
1. Automated measurements from the formant triple tracker for all 117 children 2. Hand-coded manual measurements for a subset of 14 children

# 1) read in the rw_auto_all.csv file
rw_auto_all <- read.csv(file.choose())

# 2) read in the rw_manual_subset.csv file
rw_manual_subset <- read.csv(file.choose())

Add two variables to the rw_manual_subset file which will later be used in modelling

rw_auto_all <- rw_auto_all %>%
  mutate(
    transc.acc = case_when(
      transc %in% c("r","rw") & target=="r" ~ 1,
      transc %in% c("w","wr") & target=="w" ~ 1,
      transc %in% c("r","rw") & target=="w" ~ 0,
      transc %in% c("w","wr") & target=="r" ~ 0
    )
  )

rw_manual_subset <- rw_manual_subset %>%
  mutate(
    R.Indicator=1*(transc %in% c("r", "rw")),
    gcf3f2 = gcf3 - gcf2, 
    transc.acc = case_when(
      transc %in% c("r","rw") & target=="r" ~ 1,
      transc %in% c("w","wr") & target=="w" ~ 1,
      transc %in% c("r","rw") & target=="w" ~ 0,
      transc %in% c("w","wr") & target=="r" ~ 0
    )
  )

Create a subset of the rw_auto_all dataframe to only include each unique word token for each child speaker (i.e. remove the individual listener ratings data, which would duplicate each word spoken by each child)

#removes duplicate files for when you aren't using the listener ratings
rw_auto_unique <- distinct(rw_auto_all, Filename, .keep_all = TRUE)
rw_manual_subset_uniq <- distinct(rw_manual_subset, Filename, .keep_all = TRUE)

# filter rw_auto_unique to only include those 14 children
child_subest <- unique(rw_manual_subset$subject)

rw_auto_subset <- rw_auto_unique %>% 
  dplyr::filter(subject %in% child_subest)

Long Format

Convert the automated measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column

# auto measurements of all children
rw_auto_unique_long <- rw_auto_unique %>% 
  select(c("subject", "transc", "target", "f3f2onset", "f3f2min")) %>% 
  pivot_longer(cols = c("f3f2onset", "f3f2min"),
               names_to = "measurement", 
               values_to = "frequency") %>% 
  mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
                         labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
         measurement_facet = factor(measurement, 
                                    levels = c("f3f2onset", "f3f2min"),
                                    labels = c("Onset", "Minimum F3")))

# filter rw_auto_unique_long to only include those 14 children
subest <- unique(rw_manual_subset$subject)

rw_auto_subset_long <- rw_auto_unique_long %>% 
  dplyr::filter(subject %in% subest)

Convert the manually coded measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column

# hand-coded measurements for subset of 14 children
rw_manual_subset_uniq_long <- rw_manual_subset_uniq %>% 
   select(c("subject", "transc", "target", "f3f2onset", "f3f2min", "gcf3f2")) %>% 
  pivot_longer(cols = c("f3f2onset", "f3f2min", "gcf3f2"),
               names_to = "measurement", 
               values_to = "frequency") %>% 
  mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
                         labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
         measurement_facet = factor(measurement, 
                                    levels = c("f3f2onset", "f3f2min", "gcf3f2"),
                                    labels = c("Onset", "Minimum F3", "Minimum F3 \nVisually Determined")))

Statistical Analyses in the Paper

The following R code is organized by sub-heading of the results section.

Transcriptions

Frequencies table of professional transcriber characterizations

table(rw_auto_unique$transc)
## 
##    r   rw    w   wr 
##  423  201 1245  409
rw_auto_unique %>% filter(target=="r") %>% 
  summarize(mean(transc=="r"))
ABCDEFGHIJ0123456789
mean(transc == "r")
<dbl>
0.3687445
rw_auto_unique %>% filter(target=="r") %>% 
  summarize(mean(transc %in% c("r", "rw")))
ABCDEFGHIJ0123456789
mean(transc %in% c("r", "rw"))
<dbl>
0.5223881
rw_auto_unique %>% filter(target=="w") %>% 
  summarize(mean(transc=="w"))
ABCDEFGHIJ0123456789
mean(transc == "w")
<dbl>
0.8235294
rw_auto_unique %>% filter(target=="w") %>% 
  summarize(mean(transc %in% c("w", "wr")))
ABCDEFGHIJ0123456789
mean(transc %in% c("w", "wr"))
<dbl>
0.9745391

Listener Ratings

Average VAS Rations by transcription

rw_auto_unique %>% 
  group_by(transc) %>% 
  summarize(mean(rating))
ABCDEFGHIJ0123456789
transc
<chr>
mean(rating)
<dbl>
r0.2246196
rw0.3368821
w0.7082958
wr0.5512385

Acoustic Analysis - Automated Measures

Correlation between lowest F3 and onset F3 with manually-coded F3

cor(rw_manual_subset_uniq$gcf3, rw_manual_subset_uniq$f3minmed) -> r1
cor(rw_manual_subset_uniq$gcf3, rw_manual_subset_uniq$f3_onset_med) -> r2
df = dim(rw_manual_subset_uniq)[1]-1

r1;r2;df
## [1] 0.5864477
## [1] 0.4731004
## [1] 267

Fisher’s Z transformation

z1 = .5*(log(1+r1)-log(1-r1))
z2 = .5*(log(1+r2)-log(1-r2))
s = sqrt(2 / (df))

z_d = (z2 - z1)/s
p = pnorm(z_d, lower.tail=TRUE)
p
## [1] 0.03380382
table(rw_auto_unique$f3mincolshort)
## 
##   f3_endpt_med   f3_midpt_med   f3_onset_med    f3_qrtr_med f3_thrqrtr_med 
##            381            461            649            476            311
table(rw_auto_unique$f3mincolshort) / dim(rw_auto_unique)[1]
## 
##   f3_endpt_med   f3_midpt_med   f3_onset_med    f3_qrtr_med f3_thrqrtr_med 
##      0.1672520      0.2023705      0.2848990      0.2089552      0.1365233

Relationship between automated and manual acoustic measures

Correlation between automated F3-F2 difference and manually coded F3-F2 difference

cor(rw_manual_subset_uniq$gcf3f2, rw_manual_subset_uniq$f3f2min, method="pearson")
## [1] 0.4604875
cor.test(rw_manual_subset_uniq$gcf3f2, rw_manual_subset_uniq$f3f2min, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  rw_manual_subset_uniq$gcf3f2 and rw_manual_subset_uniq$f3f2min
## t = 8.4608, df = 266, p-value = 1.797e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3605607 0.5499639
## sample estimates:
##       cor 
## 0.4604875

Relationship between acoustic measures and narrow transcriptions

Model fitting F3-F2 differences and narrow transcriptions

rw_auto_all <- rw_auto_all %>% 
  mutate(R.Indicator=1*(transc %in% c("r", "rw")))

mdl1 <- glmer(R.Indicator ~ f3f2min + (1|ChildID), 
  data = rw_auto_all,
  family = poisson(link="log")
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0918093 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
r.squaredLR(mdl1)
## [1] 0.2304827
## attr(,"adj.r.squared")
## [1] 0.3179875

Model with automated F3-F2 differences for the subset of 14 children

mdl2 <- glmer(R.Indicator ~ f3f2min + (1|ChildID), 
  data = rw_manual_subset_uniq,
  family = poisson(link="log")
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0600304 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
r.squaredLR(mdl2)
## [1] 0.1127922
## attr(,"adj.r.squared")
## [1] 0.1456392

Model with manually coded F3-F2 differences for the subset of 14 children

mdl3 <- glmer(R.Indicator ~ gcf3f2 + (1|ChildID), 
  data = rw_manual_subset_uniq,
  family = poisson(link="log")
)
## boundary (singular) fit: see help('isSingular')
r.squaredLR(mdl3)
## [1] 0.2824487
## attr(,"adj.r.squared")
## [1] 0.3647025

Relationship between acoustic analysis and VAS listener ratings

Model fitting automated F3-F2 differences and listener ratings

mdl4 <- lmer(data = rw_auto_all, 
             formula = "rating ~ f3f2min + (1|ChildID)")
r.squaredLR(mdl4)
## [1] 0.1544851
## attr(,"adj.r.squared")
## [1] 0.3691238
Anova(mdl4)
ABCDEFGHIJ0123456789
 
 
Chisq
<dbl>
Df
<dbl>
Pr(>Chisq)
<dbl>
f3f2min1989.82110

Model fitting automated F3-F2 differences for the subset of 14 children

mdl5 <- lmer(data = rw_manual_subset_uniq, formula = "rating ~ f3f2min + (1|ChildID)")
r.squaredLR(mdl5)
## [1] 0.08437719
## attr(,"adj.r.squared")
## [1] 0.190504
Anova(mdl5)
ABCDEFGHIJ0123456789
 
 
Chisq
<dbl>
Df
<dbl>
Pr(>Chisq)
<dbl>
f3f2min24.6143917.002703e-07

Model fitting manually coded F3-F2 differences for the subset of 14 children

mdl6 <- lmer(data = rw_manual_subset_uniq, formula = "rating ~ gcf3f2 + (1|ChildID)")
r.squaredLR(mdl6)
## [1] 0.3792751
## attr(,"adj.r.squared")
## [1] 0.8563147
Anova(mdl6)
ABCDEFGHIJ0123456789
 
 
Chisq
<dbl>
Df
<dbl>
Pr(>Chisq)
<dbl>
gcf3f2163.914911.578862e-37

Robustness of Contrast

For the correlations between ROC and other factors, see the ROC_Curves_Share.RMD file

Session Info

sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] car_3.1-2       carData_3.0-5   lmerTest_3.1-3  MuMIn_1.47.1   
##  [5] lme4_1.1-31     Matrix_1.5-3    ggdist_3.2.1    ggpubr_0.5.0   
##  [9] cowplot_1.1.1   forcats_1.0.0   stringr_1.5.0   dplyr_1.0.10   
## [13] purrr_0.3.5     readr_2.1.3     tidyr_1.2.1     tibble_3.1.8   
## [17] ggplot2_3.4.0   tidyverse_1.3.2
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4           sass_0.4.4           jsonlite_1.8.4      
##  [4] splines_4.2.1        modelr_0.1.10        bslib_0.4.1         
##  [7] assertthat_0.2.1     distributional_0.3.2 stats4_4.2.1        
## [10] googlesheets4_1.0.1  cellranger_1.1.0     yaml_2.3.6          
## [13] numDeriv_2016.8-1.1  pillar_1.8.1         backports_1.4.1     
## [16] lattice_0.20-45      glue_1.6.2           digest_0.6.29       
## [19] ggsignif_0.6.4       minqa_1.2.5          rvest_1.0.3         
## [22] colorspace_2.0-3     htmltools_0.5.3      pkgconfig_2.0.3     
## [25] broom_1.0.1          haven_2.5.1          scales_1.2.1        
## [28] tzdb_0.3.0           timechange_0.1.1     googledrive_2.0.0   
## [31] generics_0.1.3       farver_2.1.1         ellipsis_0.3.2      
## [34] cachem_1.0.6         withr_2.5.0          cli_3.4.0           
## [37] magrittr_2.0.3       crayon_1.5.2         readxl_1.4.1        
## [40] evaluate_0.18        fs_1.5.2             fansi_1.0.3         
## [43] nlme_3.1-157         MASS_7.3-57          rstatix_0.7.1       
## [46] xml2_1.3.3           tools_4.2.1          hms_1.1.2           
## [49] gargle_1.2.1         lifecycle_1.0.3      munsell_0.5.0       
## [52] reprex_2.0.2         compiler_4.2.1       jquerylib_0.1.4     
## [55] rlang_1.0.6          nloptr_2.0.3         grid_4.2.1          
## [58] rstudioapi_0.14      rmarkdown_2.18       boot_1.3-28         
## [61] gtable_0.3.1         abind_1.4-5          DBI_1.1.3           
## [64] R6_2.5.1             lubridate_1.9.0      knitr_1.41          
## [67] fastmap_1.1.0        utf8_1.2.2           stringi_1.7.8       
## [70] Rcpp_1.0.9           vctrs_0.5.0          dbplyr_2.2.1        
## [73] tidyselect_1.2.0     xfun_0.35