###Loading in required packages and data library(tidyverse) KeywordList <- read_csv("KeywordAnalysis.csv") KeywordList[is.na(KeywordList)] <- 0 #replacing NAs with 0 ###Variable set up nStudies <- ncol(KeywordList[,-IDs]) #total number of studies in sample IDs <- c(1,2,3) #The first three columns are identification columns fundingAndAuthor <- c(which(KeywordList$Keyword == 'Funding'),which(KeywordList$Keyword == 'Primary Author')) #The last two rows are two extra variables: #funding is whether or not the study was funded #primary author is whether the primary author was affiliated with an academic institution ### ###Overall ### ###Overall summaries colSums(KeywordList[-fundingAndAuthor,-IDs]) #number of keywords in each study cbind(KeywordList$Keyword,rowSums(KeywordList[,-IDs])) #number of times each unique ###Our keyword analysis was divided into three categories: sample characteristics, assessment characteristics, and analytic characteristics ##Analysis of Sample characteristic Sample <- KeywordList[KeywordList[1] == 'Sample',] SampleRowSums <- data_frame('keyword' = Sample$Keyword, 'rowsums' = rowSums(Sample[,-IDs])) #the number of studies with each sample keyword sum(SampleRowSums[,2]) #the total number of sample keywords total across all 119 studies sampleColSums <- colSums(Sample[,-IDs]) #the number of sample keywords in each study totalColWithSampleKeyword <- sum(sampleColSums > 0) #the number of studies with at least one sample keyword propTotalColWithSampleKeyword <- sum(sampleColSums > 0)/nStudies #the proportion of studies with a sample keyword #Analysis of Assessment Assessment <- KeywordList[KeywordList[1] == 'Assessment',] assessmentRowSums <- data_frame('keyword' = Assessment$Keyword, 'rowsums' = rowSums(Assessment[,-IDs])) #the number of studies with each assessment keyword AssessmentColSums <- colSums(Assessment[,-IDs]) #the total number of assessment keywords total across all 119 studies totalColWithAssessmentKeyword <- sum(AssessmentColSums > 0) #the number of studies with at least one assessment keyword propTotalColWithAssessmentKeyword <- sum(AssessmentColSums > 0)/nStudies #the proportion of studies with an assessment keyword ##The assessment category was also broken into 3 subcategories: purpose, type, and test subject sum(assessmentRowSums[which(Assessment$Subtheme == 'Purpose'),2]) #the total number of 'purpose' keywords cited across all studies sum(assessmentRowSums[which(Assessment$Subtheme == 'Type'),2]) #the total number of 'type' keywords cited across all studies sum(assessmentRowSums[which(Assessment$Subtheme == 'Test Subject'),2]) #the total number of 'test subject' keywords cited across all studies #Analysis of Analytic Analytic <- KeywordList[KeywordList[1] == 'Analytic',] AnalyticRowSums <- data_frame('keyword' = Analytic$Keyword, 'rowsums' = rowSums(Analytic[,-IDs])) #the number of studies with each analytic keyword AnalyticColSums <- colSums(Analytic[,-IDs]) #the total number of analytic keywords total across all 119 studies totalColWithAnalyticKeyword <- sum(AnalyticColSums > 0) #the number of studies with at least one analytic keyword propTotalColWithAnalyticKeyword <- sum(AnalyticColSums > 0)/nStudies #the proportion of studies with an analytic keyword ### #Analysis of Funded ### nFunded <- sum( KeywordList[KeywordList$Keyword == 'Funding',] == 1 ) #total number of studies that were funded as.numeric( KeywordList[KeywordList$Keyword == 'Funding',] == 1 ) #creating a data frame with just the funded studies funded <- KeywordList[,c(IDs,which(KeywordList[KeywordList$Keyword == 'Funding',] == 1 ))] colSums(funded[-fundingAndAuthor,-IDs]) #the number of keywords in each funded study rowSumsFunded <- data_frame('keyword' = funded$Keyword, 'rowsums' = rowSums(funded[,-IDs])) #the number of each keyword in funded studies rowSumsFunded[rowSumsFunded[,2] != '0',] #every keyword that was in a funded study at least once. comparison <- data_frame('keyword' = funded$Keyword, 'rowsumsF' = rowSums(funded[,-IDs]), 'rowsumsNF' = rowSums(KeywordList[,-IDs]) - rowSums(funded[,-IDs]), 'propDif' = (rowSums(funded[,-IDs])/nFunded) / ((rowSums(KeywordList[,-IDs]) - rowSums(funded[,-IDs]))/ (nStudies - nFunded) ) ) ###Analysis of Demographics #This section is exactly the same as the previous demographic section, except with funded studies only sample <- funded[funded[1] == 'Sample',] SampleRowSums <- data_frame('keyword' = Sample$Keyword, 'rowsums' = rowSums(Sample[,-IDs])) #the number of studies with each demographic keyword sampleColSums <- colSums(sample[,-IDs]) #the total number of demographic keywords total across all 35 funded studies totalColWithSamplefunded <- sum(sampleColSums > 0) #the number of studies with at least one demographic keyword propTotalColWithSamplefunded <- sum(sampleColSums > 0)/nFunded #the proportion of studies with an demographic keyword #test: ho: pi_dem = pi_dem_funded #Is there a difference in the proportion of demographic keywords in funded vs non funded studies? #No there is not prop.test(c(totalColWithSampleKeyword - totalColWithSamplefunded,totalColWithSamplefunded), c(nStudies - nFunded,nFunded)) #Analysis of Purpose Assessment <- funded[funded[1] == 'Assessment',] assessmentRowSums <- data_frame('keyword' = Assessment$Keyword, 'rowsums' = rowSums(Assessment[,-IDs])) #the number of studies with each assessment keyword AssessmentColSums <- colSums(Assessment[,-IDs]) #the total number of assessment keywords total across all 35 funded studies totalColWithAssessmentfunded <- sum(AssessmentColSums > 0) #the number of studies with at least one assessment keyword propTotalColWithAssessmentfunded <- sum(AssessmentColSums > 0)/nFunded #the proportion of studies with an assessment keyword #test: ho: pi_purp = pi_purp_funded #Is there a difference in the proportion of assessment keywords in funded vs non funded studies? #No there is not prop.test(c(totalColWithAssessmentKeyword - totalColWithAssessmentfunded,totalColWithAssessmentfunded), c(nStudies - nFunded,nFunded), alternative = "two.sided") #Analysis of Analytic AnalyticF <- funded[funded[1] == 'Analytic',] AnalyticRowSums <- data_frame('keyword' = AnalyticF$Keyword, 'rowsums' = rowSums(AnalyticF[,-IDs])) #the number of studies with each analytic keyword AnalyticColSums <- colSums(AnalyticF[,-IDs]) #the total number of analytic keywords total across all 35 funded studies totalColWithAnalyticfunded <- sum(AnalyticColSums > 0) #the number of studies with at least one analytic keyword propTotalColWithAnalyticfunded <- sum(AnalyticColSums > 0)/nFunded #the proportion of studies with an analytic keyword #test: ho: pi_dem = pi_anal_funded #Is there a difference in the proportion of analytic keywords in funded vs non funded studies? #No there is not prop.test(c(totalColWithAnalyticKeyword - totalColWithAnalyticfunded,totalColWithAnalyticfunded), c(nStudies - nFunded,nFunded), alternative = "two.sided") ###This code is extraneous and did not make it into the final analysis. It is exactly the same as the previous section, except ###only includes studies where the primary author was affiliated with an academic institution ### #Analysis of College Affiliated ### nCollegeAffiliated <- sum( KeywordList[KeywordList$Keyword == 'Primary Author',] != 0 ) as.numeric( KeywordList[KeywordList$Keyword == 'Primary Author',] != 0 ) collegeAffiliated <- KeywordList[,c(which(KeywordList[KeywordList$Keyword == 'Primary Author',] != 0))] colSums(collegeAffiliated[-fundingAndAuthor,-IDs]) rowSumscollegeAffiliated <- data_frame('keyword' = collegeAffiliated$Keyword, 'rowsums' = rowSums(collegeAffiliated[,-IDs])) rowSumscollegeAffiliated[rowSumscollegeAffiliated[,2] != '0',] comparison <- data_frame('keyword' = collegeAffiliated$Keyword, 'rowsumsCA' = rowSums(collegeAffiliated[,-IDs]), 'rowsumsNCA' = rowSums(KeywordList[,-IDs]) - rowSums(collegeAffiliated[,-IDs]), 'propDif' = (rowSums(collegeAffiliated[,-IDs])/nCollegeAffiliated) / ((rowSums(KeywordList[,-IDs]) - rowSums(collegeAffiliated[,-IDs]))/ (nStudies - nCollegeAffiliated) ) ) ###Analysis of Demographics sample <- collegeAffiliated[collegeAffiliated[1] == 'Sample',] SampleRowSums <- data_frame('keyword' = Sample$Keyword, 'rowsums' = rowSums(Sample[,-IDs])) sampleColSums <- colSums(sample[,-IDs]) totalColWithSamplecollegeAffiliated <- sum(sampleColSums > 0) propTotalColWithSamplecollegeAffiliated <- sum(sampleColSums > 0)/nCollegeAffiliated #test: ho: pi_dem = pi_dem_colAf prop.test(c(totalColWithSampleKeyword - totalColWithSamplecollegeAffiliated,totalColWithSamplecollegeAffiliated), c(nStudies - nCollegeAffiliated,nCollegeAffiliated)) #Analysis of Purpose Assessment <- collegeAffiliated[collegeAffiliated[1] == 'Assessment',] assessmentRowSums <- data_frame('keyword' = Assessment$Keyword, 'rowsums' = rowSums(Assessment[,-IDs])) AssessmentColSums <- colSums(Assessment[,-IDs]) totalColWithAssessmentcollegeAffiliated <- sum(AssessmentColSums > 0) propTotalColWithAssessmentcollegeAffiliated <- sum(AssessmentColSums > 0)/nCollegeAffiliated #test: ho: pi_dem = pi_purp_colAf prop.test(c(totalColWithAssessmentKeyword - totalColWithAssessmentcollegeAffiliated,totalColWithAssessmentcollegeAffiliated), c(nStudies - nCollegeAffiliated,nCollegeAffiliated)) #Analysis of Analytic Analytic <- collegeAffiliated[collegeAffiliated[1] == 'Analytic',] AnalyticRowSums <- data_frame('keyword' = Analytic$Keyword, 'rowsums' = rowSums(Analytic[,-IDs])) AnalyticColSums <- colSums(Analytic[,-IDs]) totalColWithAnalyticcollegeAffiliated <- sum(AnalyticColSums > 0) propTotalColWithAnalyticcollegeAffiliated <- sum(AnalyticColSums > 0)/nCollegeAffiliated #test: ho: pi_dem = pi_anal_colAf prop.test(c(totalColWithAnalyticKeyword - totalColWithAnalyticcollegeAffiliated,totalColWithAnalyticcollegeAffiliated), c(nStudies - nCollegeAffiliated,nCollegeAffiliated), alternative = "two.sided")