• Preparation
    • Data Sets
    • Long Format
  • Paper Figures
    • Figure 1
    • Figure 2
    • Figure 3
    • Figure 4
  • Appendix
    • Supplemental Figure 2
    • Supplemental Figure 3
  • Session Info

Preparation

Load packages

library(tidyverse)
library(cowplot)
library(ggpubr)
library(ggdist)

Data Sets

Read in the two main dataframes:
1. Automated measurements from the formant triple tracker for all 117 children 2. Hand-coded manual measurements for a subset of 14 children

# 1) read in the rw_auto_all.csv file
rw_auto_all <- read.csv(file.choose())

# 2) read in the rw_manual_subset.csv file
rw_manual_subset <- read.csv(file.choose())

Create a subset of the rw_auto_all dataframe to only include each unique word token for each child speaker (i.e. remove the individual listener ratings data, which would duplicate each word spoken by each child)

#removes duplicate files for when you aren't using the listener ratings
rw_auto_unique <- distinct(rw_auto_all, Filename, .keep_all = TRUE)
rw_manual_subset_uniq <- distinct(rw_manual_subset, Filename, .keep_all = TRUE)

# filter rw_auto_unique to only include those 14 children
child_subest <- unique(rw_manual_subset$subject)

rw_auto_subset <- rw_auto_unique %>% 
  dplyr::filter(subject %in% child_subest)

Add a column calculating the F3-F2 difference to the rw_manual_subset and rw_manual_subset_uniq dataframes

#adds a column calculating the F3-F2 difference
rw_manual_subset <- rw_manual_subset %>% mutate(gcf3f2 = gcf3 - gcf2)
rw_manual_subset_uniq <- rw_manual_subset_uniq %>% mutate(gcf3f2 = gcf3 - gcf2)

Long Format

Convert the automated measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column

# auto measurements of all children
rw_auto_unique_long <- rw_auto_unique %>% 
  select(c("subject", "transc", "target", "f3f2onset", "f3f2min")) %>% 
  pivot_longer(cols = c("f3f2onset", "f3f2min"),
               names_to = "measurement", 
               values_to = "frequency") %>% 
  mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
                         labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
         measurement_facet = factor(measurement, 
                                    levels = c("f3f2onset", "f3f2min"),
                                    labels = c("Onset", "Minimum F3")))

# filter rw_auto_unique_long to only include those 14 children
subest <- unique(rw_manual_subset$subject)

rw_auto_subset_long <- rw_auto_unique_long %>% 
  dplyr::filter(subject %in% subest)

Convert the manually coded measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column

# hand-coded measurements for subset of 14 children
rw_manual_subset_uniq_long <- rw_manual_subset_uniq %>% 
   select(c("subject", "transc", "target", "f3f2onset", "f3f2min", "gcf3f2")) %>% 
  pivot_longer(cols = c("f3f2onset", "f3f2min", "gcf3f2"),
               names_to = "measurement", 
               values_to = "frequency") %>% 
  mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
                         labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
         measurement_facet = factor(measurement, 
                                    levels = c("f3f2onset", "f3f2min", "gcf3f2"),
                                    labels = c("Onset", "Minimum F3", "Minimum F3 \nVisually Determined")))

Paper Figures

Create a y-axis scale that includes the /ɹ/-/w/ labels

# create y-axis labels to include phonetic transcription of /ɹ/ and /w/
y_axis_rw <- scale_y_continuous(breaks = c(0,0.25,0.50,0.75,1), labels = c("/\u0279/ 0", 0.25,0.5,0.75, "/w/ 1"))

Figure 1

px_auto_onset_min_all <- ggplot(rw_auto_unique_long) +
  aes(x = transc_labels, y = frequency, fill = transc_labels) +
  ggdist::stat_dots(aes(color = transc_labels),
                    side = "top", dotsize = 5, alpha = 0.6) +
  geom_boxplot(fatten = 3, width = 0.15, outlier.shape = NA, alpha = 0.6,
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(geom = "point", fun = "mean", size = 2, color = "white", 
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(geom = "point", fun = "mean", size = 1.25, color = "black",
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(aes(label = round(..y.., 0)), geom = "label", fun = "mean", size = 2, fill = "white",
               position = position_nudge(x = -0.32, y = 0)) +
  coord_flip(ylim = c(0,4000)) +
  labs(title = "Automated Measures for All Children (n = 117)", 
       x = "Transcription", y = "F3-F2 (Hz)", fill = "Acoustic \nMeasurement") +
  scale_fill_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
  scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
  guides(color = "none", fill = "none") +
  facet_wrap(~measurement_facet) +
  theme_bw() +
  theme(plot.title = element_text(hjust = .5, size = 12),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12))
px_auto_onset_min_all
## Warning: The dot-dot notation (`..y..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(y)` instead.

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(px_auto_onset_min_all, file = "YOUR FILE PATH AND FIGURE NAME HERE",
#        dpi = 300, height = 6.1, width = 6.1)

Now look at the auto measures for the subset of 14 children who were hand coded

# auto measurements
px_auto_subset <- ggplot(rw_auto_subset_long) +
  aes(x = transc_labels, y = frequency, fill = transc_labels) +
  ggdist::stat_dots(aes(color = transc_labels),
                    side = "top", dotsize = 1, alpha = 0.6) +
  geom_boxplot(fatten = 3, width = 0.15, outlier.shape = NA, alpha = 0.6,
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(geom = "point", fun = "mean", size = 2, color = "white", 
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(geom = "point", fun = "mean", size = 1.25, color = "black",
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(aes(label = round(..y.., 0)), geom = "label", fun = "mean", size = 2, fill = "white",
               position = position_nudge(x = -0.32, y = 0)) +
  coord_flip(ylim = c(0,4000)) +
  labs(title = "Automated Subset of Children (n = 14)", x = "Transcription", y = "F3-F2 (Hz)", fill = "Acoustic \nMeasurement") +
  scale_fill_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
  scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
  guides(color = "none", fill = "none") +
  facet_wrap(~measurement_facet) +
  theme_bw() +
  theme(plot.title = element_text(hjust = .5, size = 12),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12))
px_auto_subset

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(px_auto_subset, file = "YOUR FILE PATH AND FIGURE NAME HERE",
#        dpi = 300, height = 6.1, width = 6.1)

Plot the manually coded f3-f2

px_hand_subset <- rw_manual_subset_uniq_long %>% 
  dplyr::filter(measurement == "gcf3f2") %>% 
  ggplot() +
  aes(x = transc_labels, y = frequency, fill = transc_labels) +
  ggdist::stat_dots(aes(color = transc_labels),
                    side = "top", dotsize = 1, alpha = 0.6) +
  geom_boxplot(fatten = 3, width = 0.15, outlier.shape = NA, alpha = 0.6,
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(geom = "point", fun = "mean", size = 2, color = "white", 
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(geom = "point", fun = "mean", size = 1.25, color = "black",
               position = position_nudge(x = -0.1, y = 0)) +
  stat_summary(aes(label = round(..y.., 0)), geom = "label", fun = "mean", size = 2, fill = "white",
               position = position_nudge(x = -0.32, y = 0)) +
  coord_flip(ylim = c(0,4000)) +
  labs(title = "Manually Coded Subset of Children (n = 14)", x = "Transcription", y = "F3-F2 (Hz)", fill = "Acoustic \nMeasurement",
       subtitle = "Minimum F3 Visually Determined") +
  scale_fill_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
  scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) + 
  guides(color = "none", fill = "none") +
  theme_bw() +
  theme(plot.title = element_text(hjust = .5, size = 12),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        plot.subtitle = element_text(hjust = .5))
px_hand_subset

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(px_hand_subset, file = "YOUR FILE PATH AND FIGURE NAME HERE",
#        dpi = 300, height = 6.1, width = 6.1)

Combine the plots using the cowplot package to create Figure 1

# create a list of plots that will be the bottom row of the combined figure
# warning messages are referring to the [ɹ] character and can be igonred
bottom_row <- cowplot::plot_grid(px_auto_subset, px_hand_subset, labels = c("B", "C"), rel_widths = c(2,1.5), ncol = 2)

figure_1 <- plot_grid(px_auto_onset_min_all, bottom_row, labels = c("A",""), ncol = 1)
figure_1

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(figure_1, file = "YOUR FILE PATH AND FIGURE NAME HERE",
#        dpi = 300, height = 7.2, width = 9.3)

Figure 2

Plot the average untrained listener ratings for a given child as a function of automated F3-F2 difference

fig2A <- rw_manual_subset %>% 
  group_by(subject, ChildWordKey, f3f2min, target) %>% 
  summarise(avg_rating = mean(rating)) %>% 
  mutate(target_phon = factor(target, levels = c("r", "w"), labels = c("/\u0279/", "/w/"))) %>% 
  ggplot() +
  aes(x = f3f2min, y = avg_rating) +
  geom_smooth(method = "loess", se = FALSE, color = "grey50") +
  geom_point(aes(color = target_phon), alpha = 0.75, size = 2) +
  y_axis_rw +
  labs(x = "Automated F3-F2 Difference (Hz)", y = "Average Untrained \nListener Rating", color = "Target\nPhoneme") +
  coord_cartesian(ylim = c(0,1), xlim = c(100,4000)) + 
  scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) + 
  theme_bw()  +
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = .5),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12))
fig2A

Plot the average untrained listener ratings for a given child as a function of manually coded F3-F2 difference

fig2B <- rw_manual_subset %>% 
  group_by(subject, ChildWordKey, gcf3f2, target) %>% 
  summarise(avg_rating = mean(rating)) %>% 
  mutate(target_phon = factor(target, levels = c("r", "w"), labels = c("/\u0279/", "/w/"))) %>% 
  ggplot() +
  aes(x = gcf3f2, y = avg_rating) +
  geom_smooth(method = "loess", se = FALSE, color = "grey50") +
  geom_point(aes(color = target_phon), alpha = 0.75, size = 2) +
  y_axis_rw +
  labs(x = "Manually Coded F3-F2 Difference (Hz)", y = "Average Untrained \nListener Rating", color = "Target\nPhoneme") +
  coord_cartesian(ylim = c(0,1), xlim = c(100,4000)) + 
  guides(color = "none") +
  scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) + 
  theme_bw()  +
  theme(plot.title = element_text(hjust = .5),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12))
fig2B

Combine the plots using the cowplot package to create Figure 2

figure_2 <- cowplot::plot_grid(fig2A, fig2B, labels = "AUTO", ncol = 1)
figure_2

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(figure_2, file = "YOUR FILE PATH AND FIGURE NAME HERE",
#         dpi = 300, height = 7.2, width = 6.1)

Figure 3

Correlating listener ratings based from minimum F3-F2 distance from the automated measures for all children

figure_3 <- rw_auto_all %>% 
  group_by(subject, ChildWordKey, f3f2min, target) %>% 
  summarise(avg_rating = mean(rating)) %>% 
  mutate(target_phon = factor(target, levels = c("r", "w"), labels = c("/\u0279/", "/w/"))) %>% 
  ggplot() +
  aes(x = f3f2min, y = avg_rating) +
  geom_smooth(method = "loess", se = FALSE, color = "grey50") +
  geom_point(aes(color = target_phon), alpha = 0.3, size = 2) +
  y_axis_rw +
  labs(x = "Automated F3-F2 Difference (Hz)", y = "Average Untrained \nListener Rating",  color = "Target\nPhoneme") +
  coord_cartesian(ylim = c(0,1), xlim = c(100,4000)) + 
  scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) + 
  theme_bw()  +
  theme(plot.title = element_text(hjust = .5),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12))

figure_3

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(figure_3, file = "YOUR FILE PATH AND FIGURE NAME HERE",
#         dpi = 300, height = 6.1, width = 6.1)

Figure 4

The code for ROC curves is in a separate file called “ROC_Curves_Share.RMD”

Appendix

The code used to create the supplemental figures. Supplemental Figure 1 is a screenshot of the VAS interface and thus no code is provided here.

Supplemental Figure 2

Correlation between manually coded F3 value and the automated F3 minimum.

Supp_Fig2 <- ggplot(rw_manual_subset_uniq, aes(x=gcf3, y=f3minmed)) +
  geom_smooth(method = "lm", se = FALSE, color = "red3") +
  geom_abline(size = 0.75, slope = 1, linetype = "dashed") +
  geom_point(size = 2, alpha = 0.5) +
  ggpubr::stat_cor(aes(label = ..r.label..), label.x = 1550, label.y = 4750) +
  coord_cartesian(xlim = c(1500, 5000), ylim = c(1500,5000)) +
  labs(x = "Manually Coded F3 (Hz)", y = "Automated F3 Minimum (Hz)") +
  theme_bw() +
  theme(plot.title = element_text(hjust = .5),
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 14))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
Supp_Fig2

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(Supp_Fig2, file = "YOUR FILE PATH AND FIGURE NAME HERE", 
#         dpi = 300, height = 6.3, width = 6.1)

Supplemental Figure 3

Correlation between manually coded F3 value and the automated F3 minimum

Supp_Fig3 <- ggplot(rw_manual_subset_uniq, aes(x=gcf3, y=f3_onset_med)) + 
  geom_smooth(method = "lm", se = FALSE, color = "red3") +
  geom_abline(size = 0.75, slope = 1, linetype = "dashed") +
  geom_point(size = 2, alpha = 0.5) +
  ggpubr::stat_cor(aes(label = ..r.label..), label.x = 1550, label.y = 4750) +
  coord_cartesian(xlim = c(1500, 5000), ylim = c(1500,5000)) +
  labs(x = "Manually Coded F3 (Hz)", y = "Automated F3 Onset (Hz)") +
  theme_bw() +
  theme(plot.title = element_text(hjust = .5),
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 14))
Supp_Fig3

# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(Supp_Fig3, file = "YOUR FILE PATH AND FIGURE NAME HERE", 
#         dpi = 300, height = 6.3, width = 6.1)

Session Info

sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggdist_3.2.1    ggpubr_0.5.0    cowplot_1.1.1   forcats_1.0.0  
##  [5] stringr_1.5.0   dplyr_1.0.10    purrr_0.3.5     readr_2.1.3    
##  [9] tidyr_1.2.1     tibble_3.1.8    ggplot2_3.4.0   tidyverse_1.3.2
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4           sass_0.4.4           splines_4.2.1       
##  [4] jsonlite_1.8.4       viridisLite_0.4.1    carData_3.0-5       
##  [7] modelr_0.1.10        bslib_0.4.1          assertthat_0.2.1    
## [10] distributional_0.3.2 highr_0.9            googlesheets4_1.0.1 
## [13] cellranger_1.1.0     yaml_2.3.6           lattice_0.20-45     
## [16] pillar_1.8.1         backports_1.4.1      glue_1.6.2          
## [19] quadprog_1.5-8       digest_0.6.29        ggsignif_0.6.4      
## [22] rvest_1.0.3          colorspace_2.0-3     Matrix_1.5-3        
## [25] htmltools_0.5.3      pkgconfig_2.0.3      broom_1.0.1         
## [28] haven_2.5.1          scales_1.2.1         tzdb_0.3.0          
## [31] timechange_0.1.1     googledrive_2.0.0    mgcv_1.8-40         
## [34] generics_0.1.3       farver_2.1.1         car_3.1-2           
## [37] ellipsis_0.3.2       cachem_1.0.6         withr_2.5.0         
## [40] cli_3.4.0            magrittr_2.0.3       crayon_1.5.2        
## [43] readxl_1.4.1         evaluate_0.18        fs_1.5.2            
## [46] fansi_1.0.3          nlme_3.1-157         rstatix_0.7.1       
## [49] xml2_1.3.3           tools_4.2.1          hms_1.1.2           
## [52] gargle_1.2.1         lifecycle_1.0.3      munsell_0.5.0       
## [55] reprex_2.0.2         compiler_4.2.1       jquerylib_0.1.4     
## [58] rlang_1.0.6          grid_4.2.1           rstudioapi_0.14     
## [61] labeling_0.4.2       rmarkdown_2.18       gtable_0.3.1        
## [64] abind_1.4-5          DBI_1.1.3            R6_2.5.1            
## [67] lubridate_1.9.0      knitr_1.41           fastmap_1.1.0       
## [70] utf8_1.2.2           stringi_1.7.8        vctrs_0.5.0         
## [73] dbplyr_2.2.1         tidyselect_1.2.0     xfun_0.35