Department of Speech-Language-Hearing Sciences, University of Minnesota
Department of Educational Psychology, University of Minnesota
Load packages
library(tidyverse)
library(cowplot)
library(ggpubr)
library(ggdist)
library(lme4)
library(MuMIn)
library(lmerTest)
library(car)
Read in the two main dataframes:
1. Automated measurements from the formant triple tracker for all 117
children 2. Hand-coded manual measurements for a subset of 14
children
# 1) read in the rw_auto_all.csv file
<- read.csv(file.choose())
rw_auto_all
# 2) read in the rw_manual_subset.csv file
<- read.csv(file.choose()) rw_manual_subset
Add two variables to the rw_manual_subset
file which
will later be used in modelling
<- rw_auto_all %>%
rw_auto_all mutate(
transc.acc = case_when(
%in% c("r","rw") & target=="r" ~ 1,
transc %in% c("w","wr") & target=="w" ~ 1,
transc %in% c("r","rw") & target=="w" ~ 0,
transc %in% c("w","wr") & target=="r" ~ 0
transc
)
)
<- rw_manual_subset %>%
rw_manual_subset mutate(
R.Indicator=1*(transc %in% c("r", "rw")),
gcf3f2 = gcf3 - gcf2,
transc.acc = case_when(
%in% c("r","rw") & target=="r" ~ 1,
transc %in% c("w","wr") & target=="w" ~ 1,
transc %in% c("r","rw") & target=="w" ~ 0,
transc %in% c("w","wr") & target=="r" ~ 0
transc
) )
Create a subset of the rw_auto_all
dataframe to only
include each unique word token for each child speaker (i.e. remove the
individual listener ratings data, which would duplicate each word spoken
by each child)
#removes duplicate files for when you aren't using the listener ratings
<- distinct(rw_auto_all, Filename, .keep_all = TRUE)
rw_auto_unique <- distinct(rw_manual_subset, Filename, .keep_all = TRUE)
rw_manual_subset_uniq
# filter rw_auto_unique to only include those 14 children
<- unique(rw_manual_subset$subject)
child_subest
<- rw_auto_unique %>%
rw_auto_subset ::filter(subject %in% child_subest) dplyr
Convert the automated measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column
# auto measurements of all children
<- rw_auto_unique %>%
rw_auto_unique_long select(c("subject", "transc", "target", "f3f2onset", "f3f2min")) %>%
pivot_longer(cols = c("f3f2onset", "f3f2min"),
names_to = "measurement",
values_to = "frequency") %>%
mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
measurement_facet = factor(measurement,
levels = c("f3f2onset", "f3f2min"),
labels = c("Onset", "Minimum F3")))
# filter rw_auto_unique_long to only include those 14 children
<- unique(rw_manual_subset$subject)
subest
<- rw_auto_unique_long %>%
rw_auto_subset_long ::filter(subject %in% subest) dplyr
Convert the manually coded measurements dataframe into long format so frequency is one column and measurement (min/onset) is another column
# hand-coded measurements for subset of 14 children
<- rw_manual_subset_uniq %>%
rw_manual_subset_uniq_long select(c("subject", "transc", "target", "f3f2onset", "f3f2min", "gcf3f2")) %>%
pivot_longer(cols = c("f3f2onset", "f3f2min", "gcf3f2"),
names_to = "measurement",
values_to = "frequency") %>%
mutate(transc_labels = factor(transc, levels = c("r", "rw", "wr", "w"),
labels = c("[\u0279]", "[\u0279]:[w]", "[w]:[\u0279]", "[w]")),
measurement_facet = factor(measurement,
levels = c("f3f2onset", "f3f2min", "gcf3f2"),
labels = c("Onset", "Minimum F3", "Minimum F3 \nVisually Determined")))
The following R code is organized by sub-heading of the results section.
Frequencies table of professional transcriber characterizations
table(rw_auto_unique$transc)
##
## r rw w wr
## 423 201 1245 409
%>% filter(target=="r") %>%
rw_auto_unique summarize(mean(transc=="r"))
mean(transc == "r") <dbl> | ||||
---|---|---|---|---|
0.3687445 |
%>% filter(target=="r") %>%
rw_auto_unique summarize(mean(transc %in% c("r", "rw")))
mean(transc %in% c("r", "rw")) <dbl> | ||||
---|---|---|---|---|
0.5223881 |
%>% filter(target=="w") %>%
rw_auto_unique summarize(mean(transc=="w"))
mean(transc == "w") <dbl> | ||||
---|---|---|---|---|
0.8235294 |
%>% filter(target=="w") %>%
rw_auto_unique summarize(mean(transc %in% c("w", "wr")))
mean(transc %in% c("w", "wr")) <dbl> | ||||
---|---|---|---|---|
0.9745391 |
Average VAS Rations by transcription
%>%
rw_auto_unique group_by(transc) %>%
summarize(mean(rating))
transc <chr> | mean(rating) <dbl> | |||
---|---|---|---|---|
r | 0.2246196 | |||
rw | 0.3368821 | |||
w | 0.7082958 | |||
wr | 0.5512385 |
Correlation between lowest F3 and onset F3 with manually-coded F3
cor(rw_manual_subset_uniq$gcf3, rw_manual_subset_uniq$f3minmed) -> r1
cor(rw_manual_subset_uniq$gcf3, rw_manual_subset_uniq$f3_onset_med) -> r2
= dim(rw_manual_subset_uniq)[1]-1
df
r1;r2;df
## [1] 0.5864477
## [1] 0.4731004
## [1] 267
Fisher’s Z transformation
= .5*(log(1+r1)-log(1-r1))
z1 = .5*(log(1+r2)-log(1-r2))
z2 = sqrt(2 / (df))
s
= (z2 - z1)/s
z_d = pnorm(z_d, lower.tail=TRUE)
p p
## [1] 0.03380382
table(rw_auto_unique$f3mincolshort)
##
## f3_endpt_med f3_midpt_med f3_onset_med f3_qrtr_med f3_thrqrtr_med
## 381 461 649 476 311
table(rw_auto_unique$f3mincolshort) / dim(rw_auto_unique)[1]
##
## f3_endpt_med f3_midpt_med f3_onset_med f3_qrtr_med f3_thrqrtr_med
## 0.1672520 0.2023705 0.2848990 0.2089552 0.1365233
Correlation between automated F3-F2 difference and manually coded F3-F2 difference
cor(rw_manual_subset_uniq$gcf3f2, rw_manual_subset_uniq$f3f2min, method="pearson")
## [1] 0.4604875
cor.test(rw_manual_subset_uniq$gcf3f2, rw_manual_subset_uniq$f3f2min, method="pearson")
##
## Pearson's product-moment correlation
##
## data: rw_manual_subset_uniq$gcf3f2 and rw_manual_subset_uniq$f3f2min
## t = 8.4608, df = 266, p-value = 1.797e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3605607 0.5499639
## sample estimates:
## cor
## 0.4604875
Model fitting F3-F2 differences and narrow transcriptions
<- rw_auto_all %>%
rw_auto_all mutate(R.Indicator=1*(transc %in% c("r", "rw")))
<- glmer(R.Indicator ~ f3f2min + (1|ChildID),
mdl1 data = rw_auto_all,
family = poisson(link="log")
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0918093 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
r.squaredLR(mdl1)
## [1] 0.2304827
## attr(,"adj.r.squared")
## [1] 0.3179875
Model with automated F3-F2 differences for the subset of 14 children
<- glmer(R.Indicator ~ f3f2min + (1|ChildID),
mdl2 data = rw_manual_subset_uniq,
family = poisson(link="log")
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0600304 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
r.squaredLR(mdl2)
## [1] 0.1127922
## attr(,"adj.r.squared")
## [1] 0.1456392
Model with manually coded F3-F2 differences for the subset of 14 children
<- glmer(R.Indicator ~ gcf3f2 + (1|ChildID),
mdl3 data = rw_manual_subset_uniq,
family = poisson(link="log")
)
## boundary (singular) fit: see help('isSingular')
r.squaredLR(mdl3)
## [1] 0.2824487
## attr(,"adj.r.squared")
## [1] 0.3647025
Model fitting automated F3-F2 differences and listener ratings
<- lmer(data = rw_auto_all,
mdl4 formula = "rating ~ f3f2min + (1|ChildID)")
r.squaredLR(mdl4)
## [1] 0.1544851
## attr(,"adj.r.squared")
## [1] 0.3691238
Anova(mdl4)
Chisq <dbl> | Df <dbl> | Pr(>Chisq) <dbl> | ||
---|---|---|---|---|
f3f2min | 1989.821 | 1 | 0 |
Model fitting automated F3-F2 differences for the subset of 14 children
<- lmer(data = rw_manual_subset_uniq, formula = "rating ~ f3f2min + (1|ChildID)")
mdl5 r.squaredLR(mdl5)
## [1] 0.08437719
## attr(,"adj.r.squared")
## [1] 0.190504
Anova(mdl5)
Chisq <dbl> | Df <dbl> | Pr(>Chisq) <dbl> | ||
---|---|---|---|---|
f3f2min | 24.61439 | 1 | 7.002703e-07 |
Model fitting manually coded F3-F2 differences for the subset of 14 children
<- lmer(data = rw_manual_subset_uniq, formula = "rating ~ gcf3f2 + (1|ChildID)")
mdl6 r.squaredLR(mdl6)
## [1] 0.3792751
## attr(,"adj.r.squared")
## [1] 0.8563147
Anova(mdl6)
Chisq <dbl> | Df <dbl> | Pr(>Chisq) <dbl> | ||
---|---|---|---|---|
gcf3f2 | 163.9149 | 1 | 1.578862e-37 |
For the correlations between ROC and other factors, see the
ROC_Curves_Share.RMD
file
sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] car_3.1-2 carData_3.0-5 lmerTest_3.1-3 MuMIn_1.47.1
## [5] lme4_1.1-31 Matrix_1.5-3 ggdist_3.2.1 ggpubr_0.5.0
## [9] cowplot_1.1.1 forcats_1.0.0 stringr_1.5.0 dplyr_1.0.10
## [13] purrr_0.3.5 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [17] ggplot2_3.4.0 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.4 jsonlite_1.8.4
## [4] splines_4.2.1 modelr_0.1.10 bslib_0.4.1
## [7] assertthat_0.2.1 distributional_0.3.2 stats4_4.2.1
## [10] googlesheets4_1.0.1 cellranger_1.1.0 yaml_2.3.6
## [13] numDeriv_2016.8-1.1 pillar_1.8.1 backports_1.4.1
## [16] lattice_0.20-45 glue_1.6.2 digest_0.6.29
## [19] ggsignif_0.6.4 minqa_1.2.5 rvest_1.0.3
## [22] colorspace_2.0-3 htmltools_0.5.3 pkgconfig_2.0.3
## [25] broom_1.0.1 haven_2.5.1 scales_1.2.1
## [28] tzdb_0.3.0 timechange_0.1.1 googledrive_2.0.0
## [31] generics_0.1.3 farver_2.1.1 ellipsis_0.3.2
## [34] cachem_1.0.6 withr_2.5.0 cli_3.4.0
## [37] magrittr_2.0.3 crayon_1.5.2 readxl_1.4.1
## [40] evaluate_0.18 fs_1.5.2 fansi_1.0.3
## [43] nlme_3.1-157 MASS_7.3-57 rstatix_0.7.1
## [46] xml2_1.3.3 tools_4.2.1 hms_1.1.2
## [49] gargle_1.2.1 lifecycle_1.0.3 munsell_0.5.0
## [52] reprex_2.0.2 compiler_4.2.1 jquerylib_0.1.4
## [55] rlang_1.0.6 nloptr_2.0.3 grid_4.2.1
## [58] rstudioapi_0.14 rmarkdown_2.18 boot_1.3-28
## [61] gtable_0.3.1 abind_1.4-5 DBI_1.1.3
## [64] R6_2.5.1 lubridate_1.9.0 knitr_1.41
## [67] fastmap_1.1.0 utf8_1.2.2 stringi_1.7.8
## [70] Rcpp_1.0.9 vctrs_0.5.0 dbplyr_2.2.1
## [73] tidyselect_1.2.0 xfun_0.35