RW Models

Preparation

Data Sets
Long Format

Statistical Analyses in the Paper

Transcriptions
Listener Ratings
Acoustic Analysis - Automated Measures
Relationship between automated and manual acoustic measures
Relationship between acoustic measures and narrow transcriptions
Relationship between acoustic analysis and VAS listener ratings
Robustness of Contrast

Session Info

Preparation

Load packages

library(tidyverse)
library(cowplot)
library(ggpubr)
library(ggdist)
library(lme4)
library(MuMIn)
library(lmerTest)
library(car)

Data Sets

Read in the two main dataframes:
1. Automated measurements from the formant triple tracker for all 117 children 2. Hand-coded manual measurements for a subset of 14 children

# 1) read in the rw_auto_all.csv file
rw_auto_all <- read.csv(file.choose())

# 2) read in the rw_manual_subset.csv file
rw_manual_subset <- read.csv(file.choose())

Add two variables to the rw_manual_subset file which will later be used in modelling

rw_auto_all <- rw_auto_all %>%
  mutate(
    transc.acc = case_when(
      transc %in% c("r","rw") & target=="r" ~ 1,
      transc %in% c("w","wr") & target=="w" ~ 1,
      transc %in% c("r","rw") & target=="w" ~ 0,
      transc %in% c("w","wr") & target=="r" ~ 0
    )
  )

rw_manual_subset <- rw_manual_subset %>%
  mutate(
    R.Indicator=1*(transc %in% c("r", "rw")),
    gcf3f2 = gcf3 - gcf2, 
    transc.acc = case_when(
      transc %in% c("r","rw") & target=="r" ~ 1,
      transc %in% c("w","wr") & target=="w" ~ 1,
      transc %in% c("r","rw") & target=="w" ~ 0,
      transc %in% c("w","wr") & target=="r" ~ 0
    )
  )

Create a subset of the rw_auto_all dataframe to only include each unique word token for each child speaker (i.e. remove the individual listener ratings data, which would duplicate each word spoken by each child)

#removes duplicate files for when you aren't using the listener ratings
rw_auto_unique <- distinct(rw_auto_all, Filename, .keep_all = TRUE)
rw_manual_subset_uniq <- distinct(rw_manual_subset, Filename, .keep_all = TRUE)

# filter rw_auto_unique to only include those 14 children
child_subest <- unique(rw_manual_subset$subject)

rw_auto_subset <- rw_auto_unique %>% 
  dplyr::filter(subject %in% child_subest)

Long Format

Convert the automated measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column

# auto measurements of all children
rw_auto_unique_long <- rw_auto_unique %>% 
  select(c("subject", "transc", "target", "f3f2onset", "f3f2min")) %>% 
  pivot_longer(cols = c("f3f2onset", "f3f2min"),
               names_to = "measurement", 
               values_to = "frequency") %>% 
  mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
                         labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
         measurement_facet = factor(measurement, 
                                    levels = c("f3f2onset", "f3f2min"),
                                    labels = c("Onset", "Minimum F3")))

# filter rw_auto_unique_long to only include those 14 children
subest <- unique(rw_manual_subset$subject)

rw_auto_subset_long <- rw_auto_unique_long %>% 
  dplyr::filter(subject %in% subest)

Convert the manually coded measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column

# hand-coded measurements for subset of 14 children
rw_manual_subset_uniq_long <- rw_manual_subset_uniq %>% 
   select(c("subject", "transc", "target", "f3f2onset", "f3f2min", "gcf3f2")) %>% 
  pivot_longer(cols = c("f3f2onset", "f3f2min", "gcf3f2"),
               names_to = "measurement", 
               values_to = "frequency") %>% 
  mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
                         labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
         measurement_facet = factor(measurement, 
                                    levels = c("f3f2onset", "f3f2min", "gcf3f2"),
                                    labels = c("Onset", "Minimum F3", "Minimum F3 \nVisually Determined")))

Statistical Analyses in the Paper

The following R code is organized by sub-heading of the results section.

Transcriptions

Frequencies table of professional transcriber characterizations

table(rw_auto_unique$transc)

## 
##    r   rw    w   wr 
##  423  201 1245  409

rw_auto_unique %>% filter(target=="r") %>% 
  summarize(mean(transc=="r"))

ABCDEFGHIJ0123456789

mean(transc == "r") <dbl>
0.3687445

rw_auto_unique %>% filter(target=="r") %>% 
  summarize(mean(transc %in% c("r", "rw")))

ABCDEFGHIJ0123456789

mean(transc %in% c("r", "rw")) <dbl>
0.5223881

rw_auto_unique %>% filter(target=="w") %>% 
  summarize(mean(transc=="w"))

ABCDEFGHIJ0123456789

mean(transc == "w") <dbl>
0.8235294

rw_auto_unique %>% filter(target=="w") %>% 
  summarize(mean(transc %in% c("w", "wr")))

ABCDEFGHIJ0123456789

mean(transc %in% c("w", "wr")) <dbl>
0.9745391

Listener Ratings

Average VAS Rations by transcription

rw_auto_unique %>% 
  group_by(transc) %>% 
  summarize(mean(rating))

ABCDEFGHIJ0123456789

transc <chr>	mean(rating) <dbl>
r	0.2246196
rw	0.3368821
w	0.7082958
wr	0.5512385

Acoustic Analysis - Automated Measures

Correlation between lowest F3 and onset F3 with manually-coded F3

cor(rw_manual_subset_uniq$gcf3, rw_manual_subset_uniq$f3minmed) -> r1
cor(rw_manual_subset_uniq$gcf3, rw_manual_subset_uniq$f3_onset_med) -> r2
df = dim(rw_manual_subset_uniq)[1]-1

r1;r2;df

## [1] 0.5864477

## [1] 0.4731004

## [1] 267

Fisher’s Z transformation

z1 = .5*(log(1+r1)-log(1-r1))
z2 = .5*(log(1+r2)-log(1-r2))
s = sqrt(2 / (df))

z_d = (z2 - z1)/s
p = pnorm(z_d, lower.tail=TRUE)
p

## [1] 0.03380382

table(rw_auto_unique$f3mincolshort)

## 
##   f3_endpt_med   f3_midpt_med   f3_onset_med    f3_qrtr_med f3_thrqrtr_med 
##            381            461            649            476            311

table(rw_auto_unique$f3mincolshort) / dim(rw_auto_unique)[1]

## 
##   f3_endpt_med   f3_midpt_med   f3_onset_med    f3_qrtr_med f3_thrqrtr_med 
##      0.1672520      0.2023705      0.2848990      0.2089552      0.1365233

Relationship between automated and manual acoustic measures

Correlation between automated F3-F2 difference and manually coded F3-F2 difference

cor(rw_manual_subset_uniq$gcf3f2, rw_manual_subset_uniq$f3f2min, method="pearson")

## [1] 0.4604875

cor.test(rw_manual_subset_uniq$gcf3f2, rw_manual_subset_uniq$f3f2min, method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  rw_manual_subset_uniq$gcf3f2 and rw_manual_subset_uniq$f3f2min
## t = 8.4608, df = 266, p-value = 1.797e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3605607 0.5499639
## sample estimates:
##       cor 
## 0.4604875

Relationship between acoustic measures and narrow transcriptions

Model fitting F3-F2 differences and narrow transcriptions

rw_auto_all <- rw_auto_all %>% 
  mutate(R.Indicator=1*(transc %in% c("r", "rw")))

mdl1 <- glmer(R.Indicator ~ f3f2min + (1|ChildID), 
  data = rw_auto_all,
  family = poisson(link="log")
)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0918093 (tol = 0.002, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

r.squaredLR(mdl1)

## [1] 0.2304827
## attr(,"adj.r.squared")
## [1] 0.3179875

Model with automated F3-F2 differences for the subset of 14 children

mdl2 <- glmer(R.Indicator ~ f3f2min + (1|ChildID), 
  data = rw_manual_subset_uniq,
  family = poisson(link="log")
)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0600304 (tol = 0.002, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

r.squaredLR(mdl2)

## [1] 0.1127922
## attr(,"adj.r.squared")
## [1] 0.1456392

Model with manually coded F3-F2 differences for the subset of 14 children

mdl3 <- glmer(R.Indicator ~ gcf3f2 + (1|ChildID), 
  data = rw_manual_subset_uniq,
  family = poisson(link="log")
)

## boundary (singular) fit: see help('isSingular')

r.squaredLR(mdl3)

## [1] 0.2824487
## attr(,"adj.r.squared")
## [1] 0.3647025

Relationship between acoustic analysis and VAS listener ratings

Model fitting automated F3-F2 differences and listener ratings

mdl4 <- lmer(data = rw_auto_all, 
             formula = "rating ~ f3f2min + (1|ChildID)")
r.squaredLR(mdl4)

## [1] 0.1544851
## attr(,"adj.r.squared")
## [1] 0.3691238

Anova(mdl4)

ABCDEFGHIJ0123456789

	Chisq <dbl>	Df <dbl>	Pr(>Chisq) <dbl>
f3f2min	1989.821	1	0

Model fitting automated F3-F2 differences for the subset of 14 children

mdl5 <- lmer(data = rw_manual_subset_uniq, formula = "rating ~ f3f2min + (1|ChildID)")
r.squaredLR(mdl5)

## [1] 0.08437719
## attr(,"adj.r.squared")
## [1] 0.190504

Anova(mdl5)

ABCDEFGHIJ0123456789

	Chisq <dbl>	Df <dbl>	Pr(>Chisq) <dbl>
f3f2min	24.61439	1	7.002703e-07

Model fitting manually coded F3-F2 differences for the subset of 14 children

mdl6 <- lmer(data = rw_manual_subset_uniq, formula = "rating ~ gcf3f2 + (1|ChildID)")
r.squaredLR(mdl6)

## [1] 0.3792751
## attr(,"adj.r.squared")
## [1] 0.8563147

Anova(mdl6)

ABCDEFGHIJ0123456789

	Chisq <dbl>	Df <dbl>	Pr(>Chisq) <dbl>
gcf3f2	163.9149	1	1.578862e-37

Robustness of Contrast

For the correlations between ROC and other factors, see the ROC_Curves_Share.RMD file

Session Info

sessionInfo()

## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] car_3.1-2       carData_3.0-5   lmerTest_3.1-3  MuMIn_1.47.1   
##  [5] lme4_1.1-31     Matrix_1.5-3    ggdist_3.2.1    ggpubr_0.5.0   
##  [9] cowplot_1.1.1   forcats_1.0.0   stringr_1.5.0   dplyr_1.0.10   
## [13] purrr_0.3.5     readr_2.1.3     tidyr_1.2.1     tibble_3.1.8   
## [17] ggplot2_3.4.0   tidyverse_1.3.2
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4           sass_0.4.4           jsonlite_1.8.4      
##  [4] splines_4.2.1        modelr_0.1.10        bslib_0.4.1         
##  [7] assertthat_0.2.1     distributional_0.3.2 stats4_4.2.1        
## [10] googlesheets4_1.0.1  cellranger_1.1.0     yaml_2.3.6          
## [13] numDeriv_2016.8-1.1  pillar_1.8.1         backports_1.4.1     
## [16] lattice_0.20-45      glue_1.6.2           digest_0.6.29       
## [19] ggsignif_0.6.4       minqa_1.2.5          rvest_1.0.3         
## [22] colorspace_2.0-3     htmltools_0.5.3      pkgconfig_2.0.3     
## [25] broom_1.0.1          haven_2.5.1          scales_1.2.1        
## [28] tzdb_0.3.0           timechange_0.1.1     googledrive_2.0.0   
## [31] generics_0.1.3       farver_2.1.1         ellipsis_0.3.2      
## [34] cachem_1.0.6         withr_2.5.0          cli_3.4.0           
## [37] magrittr_2.0.3       crayon_1.5.2         readxl_1.4.1        
## [40] evaluate_0.18        fs_1.5.2             fansi_1.0.3         
## [43] nlme_3.1-157         MASS_7.3-57          rstatix_0.7.1       
## [46] xml2_1.3.3           tools_4.2.1          hms_1.1.2           
## [49] gargle_1.2.1         lifecycle_1.0.3      munsell_0.5.0       
## [52] reprex_2.0.2         compiler_4.2.1       jquerylib_0.1.4     
## [55] rlang_1.0.6          nloptr_2.0.3         grid_4.2.1          
## [58] rstudioapi_0.14      rmarkdown_2.18       boot_1.3-28         
## [61] gtable_0.3.1         abind_1.4-5          DBI_1.1.3           
## [64] R6_2.5.1             lubridate_1.9.0      knitr_1.41          
## [67] fastmap_1.1.0        utf8_1.2.2           stringi_1.7.8       
## [70] Rcpp_1.0.9           vctrs_0.5.0          dbplyr_2.2.1        
## [73] tidyselect_1.2.0     xfun_0.35

RW Models

Elizabeth E. Ancel¹, Michael L. Smith¹, V.N. Vimal Rao², & Benjamin Munson¹

3/28/2023

Preparation

Data Sets

Long Format

Statistical Analyses in the Paper

Transcriptions

Listener Ratings

Acoustic Analysis - Automated Measures

Relationship between automated and manual acoustic measures

Relationship between acoustic measures and narrow transcriptions

Relationship between acoustic analysis and VAS listener ratings

Robustness of Contrast

Session Info

RW Models

Elizabeth E. Ancel1, Michael L. Smith1, V.N. Vimal Rao2, & Benjamin Munson1

3/28/2023

Preparation

Data Sets

Long Format

Statistical Analyses in the Paper

Transcriptions

Listener Ratings

Acoustic Analysis - Automated Measures

Relationship between automated and manual acoustic measures

Relationship between acoustic measures and narrow transcriptions

Relationship between acoustic analysis and VAS listener ratings

Robustness of Contrast

Session Info

Elizabeth E. Ancel¹, Michael L. Smith¹, V.N. Vimal Rao², & Benjamin Munson¹