Department of Speech-Language-Hearing Sciences, University of Minnesota
Department of Educational Psychology, University of Minnesota
Load packages
library(tidyverse)
library(cowplot)
library(ggpubr)
library(ggdist)
Read in the two main dataframes:
1. Automated measurements from the formant triple tracker for all 117
children 2. Hand-coded manual measurements for a subset of 14
children
# 1) read in the rw_auto_all.csv file
<- read.csv(file.choose())
rw_auto_all
# 2) read in the rw_manual_subset.csv file
<- read.csv(file.choose()) rw_manual_subset
Create a subset of the rw_auto_all
dataframe to only
include each unique word token for each child speaker (i.e. remove the
individual listener ratings data, which would duplicate each word spoken
by each child)
#removes duplicate files for when you aren't using the listener ratings
<- distinct(rw_auto_all, Filename, .keep_all = TRUE)
rw_auto_unique <- distinct(rw_manual_subset, Filename, .keep_all = TRUE)
rw_manual_subset_uniq
# filter rw_auto_unique to only include those 14 children
<- unique(rw_manual_subset$subject)
child_subest
<- rw_auto_unique %>%
rw_auto_subset ::filter(subject %in% child_subest) dplyr
Add a column calculating the F3-F2 difference to the
rw_manual_subset
and rw_manual_subset_uniq
dataframes
#adds a column calculating the F3-F2 difference
<- rw_manual_subset %>% mutate(gcf3f2 = gcf3 - gcf2)
rw_manual_subset <- rw_manual_subset_uniq %>% mutate(gcf3f2 = gcf3 - gcf2) rw_manual_subset_uniq
Convert the automated measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column
# auto measurements of all children
<- rw_auto_unique %>%
rw_auto_unique_long select(c("subject", "transc", "target", "f3f2onset", "f3f2min")) %>%
pivot_longer(cols = c("f3f2onset", "f3f2min"),
names_to = "measurement",
values_to = "frequency") %>%
mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
measurement_facet = factor(measurement,
levels = c("f3f2onset", "f3f2min"),
labels = c("Onset", "Minimum F3")))
# filter rw_auto_unique_long to only include those 14 children
<- unique(rw_manual_subset$subject)
subest
<- rw_auto_unique_long %>%
rw_auto_subset_long ::filter(subject %in% subest) dplyr
Convert the manually coded measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column
# hand-coded measurements for subset of 14 children
<- rw_manual_subset_uniq %>%
rw_manual_subset_uniq_long select(c("subject", "transc", "target", "f3f2onset", "f3f2min", "gcf3f2")) %>%
pivot_longer(cols = c("f3f2onset", "f3f2min", "gcf3f2"),
names_to = "measurement",
values_to = "frequency") %>%
mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
measurement_facet = factor(measurement,
levels = c("f3f2onset", "f3f2min", "gcf3f2"),
labels = c("Onset", "Minimum F3", "Minimum F3 \nVisually Determined")))
Create a y-axis scale that includes the /ɹ/-/w/ labels
# create y-axis labels to include phonetic transcription of /ɹ/ and /w/
<- scale_y_continuous(breaks = c(0,0.25,0.50,0.75,1), labels = c("/\u0279/ 0", 0.25,0.5,0.75, "/w/ 1")) y_axis_rw
<- ggplot(rw_auto_unique_long) +
px_auto_onset_min_all aes(x = transc_labels, y = frequency, fill = transc_labels) +
::stat_dots(aes(color = transc_labels),
ggdistside = "top", dotsize = 5, alpha = 0.6) +
geom_boxplot(fatten = 3, width = 0.15, outlier.shape = NA, alpha = 0.6,
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(geom = "point", fun = "mean", size = 2, color = "white",
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(geom = "point", fun = "mean", size = 1.25, color = "black",
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(aes(label = round(..y.., 0)), geom = "label", fun = "mean", size = 2, fill = "white",
position = position_nudge(x = -0.32, y = 0)) +
coord_flip(ylim = c(0,4000)) +
labs(title = "Automated Measures for All Children (n = 117)",
x = "Transcription", y = "F3-F2 (Hz)", fill = "Acoustic \nMeasurement") +
scale_fill_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
guides(color = "none", fill = "none") +
facet_wrap(~measurement_facet) +
theme_bw() +
theme(plot.title = element_text(hjust = .5, size = 12),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12))
px_auto_onset_min_all
## Warning: The dot-dot notation (`..y..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(y)` instead.
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(px_auto_onset_min_all, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 6.1, width = 6.1)
Now look at the auto measures for the subset of 14 children who were hand coded
# auto measurements
<- ggplot(rw_auto_subset_long) +
px_auto_subset aes(x = transc_labels, y = frequency, fill = transc_labels) +
::stat_dots(aes(color = transc_labels),
ggdistside = "top", dotsize = 1, alpha = 0.6) +
geom_boxplot(fatten = 3, width = 0.15, outlier.shape = NA, alpha = 0.6,
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(geom = "point", fun = "mean", size = 2, color = "white",
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(geom = "point", fun = "mean", size = 1.25, color = "black",
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(aes(label = round(..y.., 0)), geom = "label", fun = "mean", size = 2, fill = "white",
position = position_nudge(x = -0.32, y = 0)) +
coord_flip(ylim = c(0,4000)) +
labs(title = "Automated Subset of Children (n = 14)", x = "Transcription", y = "F3-F2 (Hz)", fill = "Acoustic \nMeasurement") +
scale_fill_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
guides(color = "none", fill = "none") +
facet_wrap(~measurement_facet) +
theme_bw() +
theme(plot.title = element_text(hjust = .5, size = 12),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12))
px_auto_subset
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(px_auto_subset, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 6.1, width = 6.1)
Plot the manually coded f3-f2
<- rw_manual_subset_uniq_long %>%
px_hand_subset ::filter(measurement == "gcf3f2") %>%
dplyrggplot() +
aes(x = transc_labels, y = frequency, fill = transc_labels) +
::stat_dots(aes(color = transc_labels),
ggdistside = "top", dotsize = 1, alpha = 0.6) +
geom_boxplot(fatten = 3, width = 0.15, outlier.shape = NA, alpha = 0.6,
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(geom = "point", fun = "mean", size = 2, color = "white",
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(geom = "point", fun = "mean", size = 1.25, color = "black",
position = position_nudge(x = -0.1, y = 0)) +
stat_summary(aes(label = round(..y.., 0)), geom = "label", fun = "mean", size = 2, fill = "white",
position = position_nudge(x = -0.32, y = 0)) +
coord_flip(ylim = c(0,4000)) +
labs(title = "Manually Coded Subset of Children (n = 14)", x = "Transcription", y = "F3-F2 (Hz)", fill = "Acoustic \nMeasurement",
subtitle = "Minimum F3 Visually Determined") +
scale_fill_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
guides(color = "none", fill = "none") +
theme_bw() +
theme(plot.title = element_text(hjust = .5, size = 12),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12),
plot.subtitle = element_text(hjust = .5))
px_hand_subset
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(px_hand_subset, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 6.1, width = 6.1)
Combine the plots using the cowplot
package to create
Figure 1
# create a list of plots that will be the bottom row of the combined figure
# warning messages are referring to the [ɹ] character and can be igonred
<- cowplot::plot_grid(px_auto_subset, px_hand_subset, labels = c("B", "C"), rel_widths = c(2,1.5), ncol = 2)
bottom_row
<- plot_grid(px_auto_onset_min_all, bottom_row, labels = c("A",""), ncol = 1)
figure_1 figure_1
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave(figure_1, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 7.2, width = 9.3)
Plot the average untrained listener ratings for a given child as a function of automated F3-F2 difference
<- rw_manual_subset %>%
fig2A group_by(subject, ChildWordKey, f3f2min, target) %>%
summarise(avg_rating = mean(rating)) %>%
mutate(target_phon = factor(target, levels = c("r", "w"), labels = c("/\u0279/", "/w/"))) %>%
ggplot() +
aes(x = f3f2min, y = avg_rating) +
geom_smooth(method = "loess", se = FALSE, color = "grey50") +
geom_point(aes(color = target_phon), alpha = 0.75, size = 2) +
+
y_axis_rw labs(x = "Automated F3-F2 Difference (Hz)", y = "Average Untrained \nListener Rating", color = "Target\nPhoneme") +
coord_cartesian(ylim = c(0,1), xlim = c(100,4000)) +
scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
theme_bw() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = .5),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12))
fig2A
Plot the average untrained listener ratings for a given child as a function of manually coded F3-F2 difference
<- rw_manual_subset %>%
fig2B group_by(subject, ChildWordKey, gcf3f2, target) %>%
summarise(avg_rating = mean(rating)) %>%
mutate(target_phon = factor(target, levels = c("r", "w"), labels = c("/\u0279/", "/w/"))) %>%
ggplot() +
aes(x = gcf3f2, y = avg_rating) +
geom_smooth(method = "loess", se = FALSE, color = "grey50") +
geom_point(aes(color = target_phon), alpha = 0.75, size = 2) +
+
y_axis_rw labs(x = "Manually Coded F3-F2 Difference (Hz)", y = "Average Untrained \nListener Rating", color = "Target\nPhoneme") +
coord_cartesian(ylim = c(0,1), xlim = c(100,4000)) +
guides(color = "none") +
scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12))
fig2B
Combine the plots using the cowplot
package to create
Figure 2
<- cowplot::plot_grid(fig2A, fig2B, labels = "AUTO", ncol = 1)
figure_2 figure_2
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(figure_2, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 7.2, width = 6.1)
Correlating listener ratings based from minimum F3-F2 distance from the automated measures for all children
<- rw_auto_all %>%
figure_3 group_by(subject, ChildWordKey, f3f2min, target) %>%
summarise(avg_rating = mean(rating)) %>%
mutate(target_phon = factor(target, levels = c("r", "w"), labels = c("/\u0279/", "/w/"))) %>%
ggplot() +
aes(x = f3f2min, y = avg_rating) +
geom_smooth(method = "loess", se = FALSE, color = "grey50") +
geom_point(aes(color = target_phon), alpha = 0.3, size = 2) +
+
y_axis_rw labs(x = "Automated F3-F2 Difference (Hz)", y = "Average Untrained \nListener Rating", color = "Target\nPhoneme") +
coord_cartesian(ylim = c(0,1), xlim = c(100,4000)) +
scale_color_viridis_d(option = "magma", begin = 0.2, end = 0.7) +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12))
figure_3
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(figure_3, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 6.1, width = 6.1)
The code for ROC curves is in a separate file called “ROC_Curves_Share.RMD”
The code used to create the supplemental figures. Supplemental Figure 1 is a screenshot of the VAS interface and thus no code is provided here.
Correlation between manually coded F3 value and the automated F3 minimum.
<- ggplot(rw_manual_subset_uniq, aes(x=gcf3, y=f3minmed)) +
Supp_Fig2 geom_smooth(method = "lm", se = FALSE, color = "red3") +
geom_abline(size = 0.75, slope = 1, linetype = "dashed") +
geom_point(size = 2, alpha = 0.5) +
::stat_cor(aes(label = ..r.label..), label.x = 1550, label.y = 4750) +
ggpubrcoord_cartesian(xlim = c(1500, 5000), ylim = c(1500,5000)) +
labs(x = "Manually Coded F3 (Hz)", y = "Automated F3 Minimum (Hz)") +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
Supp_Fig2
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(Supp_Fig2, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 6.3, width = 6.1)
Correlation between manually coded F3 value and the automated F3 minimum
<- ggplot(rw_manual_subset_uniq, aes(x=gcf3, y=f3_onset_med)) +
Supp_Fig3 geom_smooth(method = "lm", se = FALSE, color = "red3") +
geom_abline(size = 0.75, slope = 1, linetype = "dashed") +
geom_point(size = 2, alpha = 0.5) +
::stat_cor(aes(label = ..r.label..), label.x = 1550, label.y = 4750) +
ggpubrcoord_cartesian(xlim = c(1500, 5000), ylim = c(1500,5000)) +
labs(x = "Manually Coded F3 (Hz)", y = "Automated F3 Onset (Hz)") +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
Supp_Fig3
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(Supp_Fig3, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 6.3, width = 6.1)
sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggdist_3.2.1 ggpubr_0.5.0 cowplot_1.1.1 forcats_1.0.0
## [5] stringr_1.5.0 dplyr_1.0.10 purrr_0.3.5 readr_2.1.3
## [9] tidyr_1.2.1 tibble_3.1.8 ggplot2_3.4.0 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.4 splines_4.2.1
## [4] jsonlite_1.8.4 viridisLite_0.4.1 carData_3.0-5
## [7] modelr_0.1.10 bslib_0.4.1 assertthat_0.2.1
## [10] distributional_0.3.2 highr_0.9 googlesheets4_1.0.1
## [13] cellranger_1.1.0 yaml_2.3.6 lattice_0.20-45
## [16] pillar_1.8.1 backports_1.4.1 glue_1.6.2
## [19] quadprog_1.5-8 digest_0.6.29 ggsignif_0.6.4
## [22] rvest_1.0.3 colorspace_2.0-3 Matrix_1.5-3
## [25] htmltools_0.5.3 pkgconfig_2.0.3 broom_1.0.1
## [28] haven_2.5.1 scales_1.2.1 tzdb_0.3.0
## [31] timechange_0.1.1 googledrive_2.0.0 mgcv_1.8-40
## [34] generics_0.1.3 farver_2.1.1 car_3.1-2
## [37] ellipsis_0.3.2 cachem_1.0.6 withr_2.5.0
## [40] cli_3.4.0 magrittr_2.0.3 crayon_1.5.2
## [43] readxl_1.4.1 evaluate_0.18 fs_1.5.2
## [46] fansi_1.0.3 nlme_3.1-157 rstatix_0.7.1
## [49] xml2_1.3.3 tools_4.2.1 hms_1.1.2
## [52] gargle_1.2.1 lifecycle_1.0.3 munsell_0.5.0
## [55] reprex_2.0.2 compiler_4.2.1 jquerylib_0.1.4
## [58] rlang_1.0.6 grid_4.2.1 rstudioapi_0.14
## [61] labeling_0.4.2 rmarkdown_2.18 gtable_0.3.1
## [64] abind_1.4-5 DBI_1.1.3 R6_2.5.1
## [67] lubridate_1.9.0 knitr_1.41 fastmap_1.1.0
## [70] utf8_1.2.2 stringi_1.7.8 vctrs_0.5.0
## [73] dbplyr_2.2.1 tidyselect_1.2.0 xfun_0.35