Department of Speech-Language-Hearing Sciences, University of Minnesota
Department of Educational Psychology, University of Minnesota
Load packages
library(tidyverse)
library(plyr)
library(cowplot)
library(ggpubr)
library(lme4)
Read in the 3 main dataframes:
1. Automated measurements from the formant triple tracker for all 117
children 2. Accuracy data
3. Hand-coded manual measurements for a subset of 14 children
# 1) read in the rw_auto_all.csv file
<- read.csv(file.choose())
rw_auto_all
# 2) read in the rw_accuracy_all.csv file
<- read.csv(file.choose())
rw_accuracy_all
# 3) read in the rw_manual_subset.csv file
<- read.csv(file.choose()) rw_manual_subset
Select the columns we care about: ChildWordKey
,
subject
, target
, and f3f2min
Only
give us the unique observations Add a column that codes target words
that start with “w” as 1, and “r” words as 0
<- rw_auto_all %>% select(ChildWordKey, subject, target, f3f2min)
rw_auto_all_pcc
<- unique(rw_auto_all_pcc)
rw_auto_all_pcc_item
$targetBin <- ifelse(rw_auto_all_pcc_item$target == "w", 1, 0) rw_auto_all_pcc_item
Create a logit mixed effects model to predict r/w based on f3f2min
<- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun=2000000))
control
<- glmer(
mod_min_F3F2 data = rw_auto_all_pcc_item,
family = 'binomial',
control = control,
formula = targetBin ~ f3f2min + (1 + f3f2min | subject)
)
summary(mod_min_F3F2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: targetBin ~ f3f2min + (1 + f3f2min | subject)
## Data: rw_auto_all_pcc_item
## Control: control
##
## AIC BIC logLik deviance df.resid
## 3066.4 3095.1 -1528.2 3056.4 2273
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.00846 -0.96090 -0.04244 0.96446 1.72207
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 1.219e+00 1.1041968
## f3f2min 3.977e-07 0.0006307 -1.00
## Number of obs: 2278, groups: subject, 117
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.234e-01 1.558e-01 -5.286 1.25e-07 ***
## f3f2min 4.763e-04 8.465e-05 5.627 1.84e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## f3f2min -0.957
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Use that model to predict whether the production is an r or a w, and compare this to the target
<- within(
pred_rw_F3F2
rw_auto_all_pcc_item, {<- predict(object = mod_min_F3F2, newdata = rw_auto_all_pcc_item)
LogOddsf3f2 <- exp(LogOddsf3f2) / (1 + exp(LogOddsf3f2))
Probabilityf3f2 <- ifelse(Probabilityf3f2 > 0.50, 'w', 'r')
PredictedCf3f2 <- as.numeric(PredictedCf3f2 == as.character(target))
PredictedAccuracyf3f2
} )
Compute average accuracy for each child talker(i.e., model prediction = target)
<- plyr::ddply(pred_rw_F3F2, ~ subject, summarise, Avg = mean(PredictedAccuracyf3f2))
ROC_f3f2_ID
# add ID column for merging
$ID <- ROC_f3f2_ID$subject ROC_f3f2_ID
Merge the rw_accuracy_all
dataframe (which has a bunch
of other measures) with the ROC_f3f2_ID
accuracy file
created above
<- merge(rw_accuracy_all, ROC_f3f2_ID, by="ID") VASbyAcoustics
Create the F3-F2 difference column for the gold hand-coded children
Select the columns we care about: ChildWordKey
,
subject
, target
, and gcf3f2min
for each child utterance Add a column that codes target words that start
with “w” as 1, and “r” words as 0
$gcf3f2min = rw_manual_subset$gcf3 - rw_manual_subset$gcf2
rw_manual_subset
<- rw_manual_subset[,c("ChildWordKey", "subject", "target", "gcf3f2min")] %>% unique()
rw_auto_all_pcc_item_HC
$targetBin=ifelse(rw_auto_all_pcc_item_HC$target=="w",1,0) rw_auto_all_pcc_item_HC
Create a logit mixed effects model to predict r/w based on gcf3f2min
= glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun=2000000))
control.HC
<- glmer(
mod_min_F3F2_HC data = rw_auto_all_pcc_item_HC,
family = 'binomial',
control = control,
formula = targetBin~gcf3f2min + (1 + gcf3f2min | subject)
)
summary(mod_min_F3F2_HC)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: targetBin ~ gcf3f2min + (1 + gcf3f2min | subject)
## Data: rw_auto_all_pcc_item_HC
## Control: control
##
## AIC BIC logLik deviance df.resid
## 251.0 269.0 -120.5 241.0 264
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7962 -0.5382 0.0620 0.5018 4.7455
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 1.443e+00 1.201221
## gcf3f2min 9.255e-07 0.000962 -1.00
## Number of obs: 269, groups: subject, 14
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.9454536 0.5992529 -6.584 4.58e-11 ***
## gcf3f2min 0.0023491 0.0003868 6.074 1.25e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## gcf3f2min -0.940
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Use that model to predict whether the production is an r or a w, and compare this to the target
<- within(
pred_rw_F3F2_HC
rw_auto_all_pcc_item_HC, {<- predict(object = mod_min_F3F2_HC, newdata = rw_auto_all_pcc_item_HC)
LogOddsf3f2.HC <- exp(LogOddsf3f2.HC) / (1 + exp(LogOddsf3f2.HC))
Probabilityf3f2.HC <- ifelse(Probabilityf3f2.HC > 0.50, 'w', 'r')
PredictedCf3f2.HC <- as.numeric(PredictedCf3f2.HC == as.character(target))
PredictedAccuracyf3f2.HC
} )
Compute average accuracy for each child talker (i.e., model prediction = target)
<- plyr::ddply(pred_rw_F3F2_HC,~subject,summarise, avg.hc = mean(PredictedAccuracyf3f2.HC))
ROC_f3f2_ID_HC
$ID <- ROC_f3f2_ID_HC$subject ROC_f3f2_ID_HC
Merge the rw_accuracy_all
dataframe (which has a bunch
of other measures) with the ROC_f3f2_ID_HC
accuracy file
created above
<- merge(rw_accuracy_all, ROC_f3f2_ID_HC, by="ID") VASbyAcoustics_HC
**Please Note: VASbyAcoustics_HC
is a dataframe that
just has the hand coded kids, but is otherwise the same as
VASbyAcoustics
Comparing VAS Measures of the Robustness of the /r/-/w/ contrast with Transcribed Accuracy for all Children
<- ggplot(data = VASbyAcoustics) +
fig4A aes(x = TP2_both_len, y = ETP_PCC) +
stat_smooth(method ="lm", se = FALSE, col="red3") +
geom_point(size = 2, alpha = 0.5) +
::stat_cor(method = "pearson", p.accuracy = 0.001, label.x = 0.23, label.y = 0.92) +
ggpubrcoord_cartesian(xlim = c(0.2, 1), ylim = c(0.2, 1)) +
labs(x = "Proportion of /\u0279/ and /w/ Transcribed as Accurate",
y = "Robustness of /\u0279/-/w/ Contrast from VAS") +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
fig4A
Comparing Automatically Generated Acoustic Measures of the Robustness of the /r/-/w/ contrast with Transcribed Accuracy for all Children
<- ggplot(data = VASbyAcoustics) +
fig4B aes(x = TP2_both_len, y = Avg) +
stat_smooth(method ="lm", se = FALSE, col="red3") +
geom_point(size = 2, alpha = 0.5) +
::stat_cor(method = "pearson", p.accuracy = 0.001, label.x = 0.23, label.y = 0.92) +
ggpubrcoord_cartesian(xlim = c(0.2, 1), ylim = c(0.2, 1)) +
labs(x = "Proportion of /\u0279/ and /w/ Transcribed as Accurate",
y = "Robustness of /\u0279/-/w/ Contrast from Acoustics") +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
fig4B
Combine the plots using the cowplot
package to create
Figure 4
<- cowplot::plot_grid(fig4A, fig4B, labels = "AUTO", nrow = 1)
fig4 fig4
# If you want to save, simply uncomment the ggsave code below and add the file path of where you want to save the figure,
# We saved all of the figures in the PNG format using the dpi, height, and width values below
# ggsave2(fig4, file = "YOUR FILE PATH AND FIGURE NAME HERE",
# dpi = 300, height = 6.1, width = 10.1)
Comparing VAS Measures of the Robustness of the /r/-/w/ contrast with Transcribed Accuracy for Subset of 14 Children
<- ggplot(data = VASbyAcoustics_HC) +
fig1X aes(x = TP2_both_len, y = ETP_PCC) +
stat_smooth(method ="lm", se = FALSE, col="red3") +
geom_point(size = 2, alpha = 0.5) +
::stat_cor(method = "pearson", p.accuracy = 0.001, label.x = 0.23, label.y = 0.92) +
ggpubrcoord_cartesian(xlim = c(0.2, 1), ylim = c(0.2, 1)) +
labs(x = "Proportion of /\u0279/ and /w/ Transcribed as Accurate",
y = "Robustness of /\u0279/-/w/ Contrast from VAS") +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
fig1X
Comparing Manually-coded Acoustic Measures of the Robustness of the /r/-/w/ contrast with Transcribed Accuracy for Subset of 14 Children
<- ggplot(data = VASbyAcoustics_HC) +
fig2X aes(x = TP2_both_len, y = avg.hc) +
stat_smooth(method ="lm", se = FALSE, col="red3") +
geom_point(size = 2, alpha = 0.5) +
::stat_cor(method = "pearson", p.accuracy = 0.001, label.x = 0.23, label.y = 0.92) +
ggpubrcoord_cartesian(xlim = c(0.2, 1), ylim = c(0.2, 1)) +
labs(x = "Proportion of /\u0279/ and /w/ Transcribed as Accurate",
y = "Robustness of /\u0279/-/w/ Contrast from Acoustics") +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
fig2X
Comparing Automatically Generated Acoustic Measures of the Robustness of the /r/-/w/ contrast with Transcribed Accuracy for Subset of 14 Children
<- VASbyAcoustics %>% filter(ID %in% c("020L", "030L", "040L", "058L", "064L", "097L", "101L", "106L", "612L", "615L", "638L", "665L", "669L", "681L"))
VASbyAcoustics_short
<-
fig3X ggplot(data = VASbyAcoustics_short) +
aes(x = TP2_both_len, y = Avg) +
stat_smooth(method ="lm", se = FALSE, col="red3") +
geom_point(size = 2, alpha = 0.5) +
::stat_cor(method = "pearson", p.accuracy = 0.001, label.x = 0.23, label.y = 0.92) +
ggpubrcoord_cartesian(xlim = c(0.2, 1), ylim = c(0.2, 1)) +
labs(x = "Proportion of /\u0279/ and /w/ Transcribed as Accurate",
y = "Robustness of /\u0279/-/w/ Contrast from Acoustics") +
theme_bw() +
theme(plot.title = element_text(hjust = .5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
fig3X
sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lme4_1.1-31 Matrix_1.5-3 ggpubr_0.5.0 cowplot_1.1.1
## [5] plyr_1.8.8 forcats_1.0.0 stringr_1.5.0 dplyr_1.0.10
## [9] purrr_0.3.5 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [13] ggplot2_3.4.0 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.4 jsonlite_1.8.4
## [4] splines_4.2.1 carData_3.0-5 modelr_0.1.10
## [7] bslib_0.4.1 assertthat_0.2.1 highr_0.9
## [10] googlesheets4_1.0.1 cellranger_1.1.0 yaml_2.3.6
## [13] pillar_1.8.1 backports_1.4.1 lattice_0.20-45
## [16] glue_1.6.2 digest_0.6.29 ggsignif_0.6.4
## [19] minqa_1.2.5 rvest_1.0.3 colorspace_2.0-3
## [22] htmltools_0.5.3 pkgconfig_2.0.3 broom_1.0.1
## [25] haven_2.5.1 scales_1.2.1 tzdb_0.3.0
## [28] timechange_0.1.1 googledrive_2.0.0 mgcv_1.8-40
## [31] farver_2.1.1 generics_0.1.3 car_3.1-2
## [34] ellipsis_0.3.2 cachem_1.0.6 withr_2.5.0
## [37] cli_3.4.0 magrittr_2.0.3 crayon_1.5.2
## [40] readxl_1.4.1 evaluate_0.18 fs_1.5.2
## [43] fansi_1.0.3 nlme_3.1-157 MASS_7.3-57
## [46] rstatix_0.7.1 xml2_1.3.3 tools_4.2.1
## [49] hms_1.1.2 gargle_1.2.1 lifecycle_1.0.3
## [52] munsell_0.5.0 reprex_2.0.2 compiler_4.2.1
## [55] jquerylib_0.1.4 rlang_1.0.6 nloptr_2.0.3
## [58] grid_4.2.1 rstudioapi_0.14 labeling_0.4.2
## [61] rmarkdown_2.18 boot_1.3-28 gtable_0.3.1
## [64] abind_1.4-5 DBI_1.1.3 R6_2.5.1
## [67] lubridate_1.9.0 knitr_1.41 fastmap_1.1.0
## [70] utf8_1.2.2 stringi_1.7.8 Rcpp_1.0.9
## [73] vctrs_0.5.0 dbplyr_2.2.1 tidyselect_1.2.0
## [76] xfun_0.35