######################################################################### ## ## Data Preprocessing Script for Hofelich Mohr, Sell, & Lindsay (in prep) ## ## UPDATED: 3/23/2015 ######################################################################### ## Reads in files: ## ## HMSL_data_allcompleted.csv ## (dataset with only complete responses) ## ## HMSL_Elaboration_scores_all.csv ## (combined raw elaboration scores from judges) ## ## HMSL_Originality_scores_all.csv ## (combined raw originality scores from judges) ## ## HMSL_Flexibility_scores_all.csv ## (combined raw flexibility scores from judges) ## ######################################################################### ## Required libraries require(stringr) || install.packages("stringr") require(psych) || install.packages("psych") ######################################################################### ## Reading in the data files ######################################################################### setwd("/Volumes/survey/591330_RSS/591330_dd/Data Sharing") data = read.csv(file="HMSL_data_allcompleted.csv", header=T) elaboration.scores = read.csv(file="HMSL_Elaboration_scores_all.csv", header=T) originality.scores = read.csv(file="HMSL_Originality_scores_all.csv", header=T) flexibility.scores = read.csv(file="HMSL_Flexibility_scores_all.csv", header=T) ######################################################################### ## Splitting responses from the unsegmented boxes ######################################################################### #Qseg_1 to Qseg_50 seem to be the fill in the blank #Qunseg is essay box #looks like people used "." "," "" or "-" which appears as multiple spaces unseg = subset(data, data$Segmented=="unsegmented") #count different separators to determine which was used for each respondent for(i in 1:nrow(unseg)){ unseg$ncommas_unseg[i] = nrow(as.data.frame(str_locate_all(pattern=",", unseg$Qunseg[i]))) unseg$nperiods_unseg[i] = nrow(as.data.frame(str_locate_all(pattern="\\.", unseg$Qunseg[i]))) unseg$nenters_unseg[i] = nrow(as.data.frame(str_locate_all(pattern=" ", unseg$Qunseg[i]))) unseg$ndash_unseg[i] = nrow(as.data.frame(str_locate_all(pattern="-", unseg$Qunseg[i]))) unseg$nsemicol_unseg[i] = nrow(as.data.frame(str_locate_all(pattern=";", unseg$Qunseg[i]))) } unseg$maxused = apply(unseg[,107:111], 1, max) split = as.data.frame(matrix(1, nrow=nrow(unseg), ncol=50)) #Split responses using the separator used most often for (i in 1:nrow(unseg)){ if (unseg$ncommas_unseg[i]==unseg$maxused[i]){ split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c(","), 50)} else { if (unseg$nperiods_unseg[i]==unseg$maxused[i]){ split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c("\\."), 50)} else { if (unseg$ndash_unseg[i]==unseg$maxused[i]){ split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c("-"), 50)} else { if (unseg$nsemicol_unseg[i]==unseg$maxused[i]){ split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c(";"), 50)} else {split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c(" "), 50)}}}}} names(split) = paste("Qunseg", 1:50, sep="_") unseg1 = cbind(unseg$QLogin_1, split) names(unseg1)[1] = "QLogin_1" #merge split unsegmented responses with data data1 = merge(data, unseg1, by="QLogin_1", all.x=TRUE, all.y=TRUE) #check to make sure unsegmented split correctly (look at file in excel) #write.csv(data1, file="Datawithresponses_split.csv") ######################################################################### ## Correcting split errors ######################################################################### #Need to separate responses: subset(data1, data1$QLogin_1=="989BoJRx") data1[which(data1$QLogin_1=="989BoJRx"),which(names(data1)=="Qunseg_3")] = "Used as a bracelet." data1[which(data1$QLogin_1=="989BoJRx"),which(names(data1)=="Qunseg_4")] = "Can be used to make a sling shot." subset(data1, data1$QLogin_1=="619m9P17") data1[which(data1$QLogin_1=="619m9P17"),which(names(data1)=="Qunseg_3")] = "fireplace." data1[which(data1$QLogin_1=="619m9P17"),which(names(data1)=="Qunseg_4")] = "weight" subset(data1, data1$QLogin_1=="L9otc4KT") data1[which(data1$QLogin_1=="L9otc4KT"),which(names(data1)=="Qunseg_4")] = "hit together to use as an instrument." data1[which(data1$QLogin_1=="L9otc4KT"),which(names(data1)=="Qunseg_5")] = "Put at the bottom of a big flower pot to take up space so use less dirt." data1[which(data1$QLogin_1=="L9otc4KT"),which(names(data1)=="Qunseg_6")] = "Use as a paver" subset(data1, data1$QLogin_1=="oP2kzb61") data1[which(data1$QLogin_1=="oP2kzb61"),which(names(data1)=="Qunseg_3")] = "It is a foundation" data1[which(data1$QLogin_1=="oP2kzb61"),which(names(data1)=="Qunseg_4")] = "it can be used as a weapon" data1[which(data1$QLogin_1=="oP2kzb61"),which(names(data1)=="Qunseg_5")] = "it can be used as an anchor" subset(data1, data1$QLogin_1=="DXQb2R2M") data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_1")] = "Wall" data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_2")] = "Street" data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_3")] = "building" data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_4")] = "sidewalk" data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_5")] = "school" data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_6")] = "house" data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_7")] = "pixies" data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_8")] = "firehouse" subset(data1, data1$QLogin_1=="57ZLnnlc") data1[which(data1$QLogin_1=="57ZLnnlc"),which(names(data1)=="Qunseg_7")] = "check if a baked item (cake, muffin) is moist or able to be removed from the oven" data1[which(data1$QLogin_1=="57ZLnnlc"),which(names(data1)=="Qunseg_8")] = "" subset(data1, data1$QLogin_1=="V8mxLJpC") data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_4")] = "use to open a key lock" data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_5")] = "make paperclip sculptures" data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_6")] = "jewelry" data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_7")] = "hang things with it" data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_8")] = "use as a clothes pin if you lose a button" data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_9")] = "Use as a utensil if you forget your fork" subset(data1, data1$QLogin_1=="l9pCqCGT") data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_1")] = "button for clothes" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_2")] = "earing" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_3")] = "key" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_4")] = "key chain" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_5")] = "art" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_6")] = "pick" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_7")] = "zipper" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_8")] = "clip" data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_9")] = "poker" subset(data1, data1$QLogin_1=="bU801XR6") data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_1")] = "FOR PAPERS" data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_2")] = "FOR MONEY" data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_3")] = "FOR BOOKMARKS" data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_4")] = "FOR OPENING LOCKS" ######################################################################### ## Prepare data for scoring ######################################################################### #Isolate the creativity responses for scoring - want to put in long version seg = subset(data1, data1$Segmented=="segmented", select=c("QLogin_1", paste("Qseg", 1:50, sep="_"), "Item", "Segmented", "lines")) names(seg)[2:51] = paste("resp", 1:50, sep=".") unseg = subset(data1, data1$Segmented=="unsegmented", select=c("QLogin_1", paste("Qunseg", 1:50, sep="_"), "Item", "Segmented", "lines")) names(unseg)[2:51] = paste("resp", 1:50, sep=".") dvt = rbind(seg, unseg) dvtlong = reshape(dvt, varying=c(paste("resp", 1:50, sep=".")), v.names="response", times=c(1:50), idvar="QLogin_1", direction="long", new.row.names = 1:33451) row.names(dvtlong) = 1:nrow(dvtlong) dvtlong = dvtlong[order(dvtlong$QLogin_1),] #get rid of NAs and blanks in responses dvtlong1 = subset(dvtlong, dvtlong$response!="" & dvtlong$response!=" " & is.na(dvtlong$response)==FALSE) #remove subjects who entered non-responses (like "none") dvtlong1[which(dvtlong1$response=="None"),] dvtlong1 = subset(dvtlong1, dvtlong1$QLogin_1!="2aA4wIGJ") ################################################################################## ## Checking incomplete and blank responses (shouldn't count towards score) ################################################################################## #Checking ids based on notes: subset(dvtlong1, dvtlong1$QLogin_1=="iR7JwQyj") subset(data, data$QLogin_1=="iR7JwQyj") #as is how it was entered - will not count this towards score dvtlong1$badresp = ifelse(dvtlong1$QLogin_1=="iR7JwQyj" & dvtlong1$response=="as", 1, 0) subset(dvtlong1, dvtlong1$QLogin_1=="9mlyecrJ") subset(data$Qunseg, data$QLogin_1=="9mlyecrJ") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="9mlyecrJ" & dvtlong1$response==" burn and use as ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="w2E2r112") subset(data, data$QLogin_1=="w2E2r112") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="w2E2r112" & dvtlong1$response=="c")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="Fh8745VU") subset(data, data$QLogin_1=="Fh8745VU") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="Fh8745VU" & dvtlong1$response=="Can be used to hold ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="G1u8Znj3") subset(data$Qunseg, data$QLogin_1=="G1u8Znj3") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="G1u8Znj3" & dvtlong1$response=="dig s")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="IT6ztEpf") subset(data, data$QLogin_1=="IT6ztEpf") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="IT6ztEpf" & dvtlong1$response=="Get something out a")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="G63FgMsT") subset(data$Qunseg, data$QLogin_1=="G63FgMsT") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="G63FgMsT" & dvtlong1$response=="hold")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="BaQk6mT1") subset(data$Qunseg, data$QLogin_1=="BaQk6mT1") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="BaQk6mT1" & dvtlong1$response==" reset ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="VRjUpNjw") subset(data, data$QLogin_1=="VRjUpNjw") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="VRjUpNjw" & dvtlong1$response=="roast mar")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="UUx7idJG") subset(data, data$QLogin_1=="UUx7idJG") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="UUx7idJG" & dvtlong1$response=="scra")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="6xuQ1Z9X") subset(data, data$QLogin_1=="6xuQ1Z9X") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="6xuQ1Z9X" & dvtlong1$response=="use a a weapon when fl")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="jZeyKHnS") subset(data, data$QLogin_1=="jZeyKHnS") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="jZeyKHnS" & dvtlong1$response=="1")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="46nE85l6") subset(data, data$QLogin_1=="46nE85l6") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="46nE85l6" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="lw50p93r") #seemed to be responding as paperclip subset(dvtlong1, dvtlong1$QLogin_1=="989BoJRx") subset(dvtlong1, dvtlong1$QLogin_1=="vLu5G9ae") subset(dvtlong1, dvtlong1$QLogin_1=="LUdy216o") subset(data, data$QLogin_1=="LUdy216o") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="LUdy216o" & dvtlong1$response=="Clip together for b")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="4RcV7Sp1") subset(data, data$QLogin_1=="4RcV7Sp1") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="4RcV7Sp1" & dvtlong1$response=="push r")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="365hZA15") subset(data, data$QLogin_1=="365hZA15") dvtlong1$badresp[which(dvtlong1$QLogin_1=="365hZA15" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="9Xxv7HVW") subset(data, data$QLogin_1=="9Xxv7HVW") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="9Xxv7HVW" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="mv2wBo0e") subset(data, data$QLogin_1=="mv2wBo0e") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="mv2wBo0e" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="VGv554rI") subset(data, data$QLogin_1=="VGv554rI") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="VGv554rI" & dvtlong1$response==" A")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="zvXd0H7K") subset(data, data$QLogin_1=="zvXd0H7K") #no incompletes subset(dvtlong1, dvtlong1$QLogin_1=="IcVu0w8g") subset(data, data$QLogin_1=="IcVu0w8g") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="IcVu0w8g" & dvtlong1$response==" be decor on party f")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="eKeAvV0P") subset(data, data$QLogin_1=="eKeAvV0P") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="eKeAvV0P" & dvtlong1$response=="blo")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="mhcrc4iq") subset(data, data$QLogin_1=="mhcrc4iq") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="mhcrc4iq" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="RRWg2Zzl") subset(data, data$QLogin_1=="RRWg2Zzl") #entered as one item dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "Making a necklace... of paperclips")) dvtlong1$response[which(dvtlong1$QLogin_1=="RRWg2Zzl" & dvtlong1$response==" Making a necklace")] = "Making a necklace... of paperclips" dvtlong1$badresp[which(dvtlong1$QLogin_1=="RRWg2Zzl" & dvtlong1$response=="of paperclips")] = 1 dvtlong1$badresp[which(dvtlong1$QLogin_1=="RRWg2Zzl" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="UUuMg7dW") subset(data, data$QLogin_1=="UUuMg7dW") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="UUuMg7dW" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="v1f48OH2") subset(data, data$QLogin_1=="v1f48OH2") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="v1f48OH2" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="vWx8G5y2") subset(data, data$QLogin_1=="vWx8G5y2") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="vWx8G5y2" & dvtlong1$response==" ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="B0o9M8xC") subset(data, data$QLogin_1=="B0o9M8xC") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="B0o9M8xC" & dvtlong1$response=="when combined with a rubber band")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="EwBSSB2k") subset(data, data$QLogin_1=="EwBSSB2k") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="EwBSSB2k" & dvtlong1$response=="Use to pin insects for")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="A26c3z9n") subset(data, data$QLogin_1=="A26c3z9n") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="A26c3z9n" & dvtlong1$response=="ti==")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="14BdrD3U") subset(data, data$QLogin_1=="14BdrD3U") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="14BdrD3U" & dvtlong1$response=="Stir a very ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="3drmN5S2") subset(data, data$QLogin_1=="3drmN5S2") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="3drmN5S2" & dvtlong1$response==" peice our")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="D9ZeYC3r") subset(data, data$QLogin_1=="D9ZeYC3r") #this is how it was entered - will not count this towards score dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "a miniature bow and arrow (with rubber band, of course)")) dvtlong1$response[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$response==" a miniature bow and arrow (with rubber band")] = "a miniature bow and arrow (with rubber band, of course)" dvtlong1$badresp[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$response==" of course)")] = 1 dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==5)] = 4 dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==6)] = 5 dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==7)] = 6 dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==8)] = 7 dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==9)] = 8 subset(dvtlong1, dvtlong1$QLogin_1=="l66h8TSK") subset(data, data$QLogin_1=="l66h8TSK") dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "Graffiti on plastic scratch tool")) #this is how it was entered - will not count this towards score dvtlong1$response[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$response=="5. Graffiti on plastic scratch tool")] = "Graffiti on plastic scratch tool" dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==3)] = 2 dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==5)] = 3 dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==7)] = 4 dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==9)] = 5 dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==11)] = 6 subset(dvtlong1, dvtlong1$QLogin_1=="uirSLy6r") subset(data, data$QLogin_1=="uirSLy6r") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="uirSLy6r" & dvtlong1$response=="Ma")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="wJ77G1q1") subset(data, data$QLogin_1=="wJ77G1q1") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="wJ77G1q1" & dvtlong1$response=="k")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="2Xywd76Z") subset(data, data$QLogin_1=="2Xywd76Z") dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "straightened out, it can be used to reset a modem")) #this is how it was entered - will not count this towards score dvtlong1$response[which(dvtlong1$QLogin_1=="2Xywd76Z" & dvtlong1$response=="straightened out")] = "straightened out, it can be used to reset a modem" dvtlong1$badresp[which(dvtlong1$QLogin_1=="2Xywd76Z" & dvtlong1$time==2)]=1 dvtlong1$badresp[which(dvtlong1$QLogin_1=="2Xywd76Z" & dvtlong1$time==3)]=1 subset(dvtlong1, dvtlong1$QLogin_1=="MXicONTd") subset(data, data$QLogin_1=="MXicONTd") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="MXicONTd" & dvtlong1$response=="dig goo out of the grooves on a ")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="16scmgQe") subset(data, data$QLogin_1=="16scmgQe") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="16scmgQe" & dvtlong1$response=="to b")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="5SRJR5D5") subset(data, data$QLogin_1=="5SRJR5D5") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="5SRJR5D5" & dvtlong1$response=="U")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="gIk5xOKf") subset(data, data$QLogin_1=="gIk5xOKf") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="gIk5xOKf" & dvtlong1$response=="hiding sp")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="mqMj47H6") subset(data, data$QLogin_1=="mqMj47H6") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="mqMj47H6" & dvtlong1$response=="make a start fi")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="Y2ov9UYW") subset(data, data$QLogin_1=="Y2ov9UYW") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="Y2ov9UYW" & dvtlong1$response==" to brea")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="VGv554rI") subset(data, data$QLogin_1=="VGv554rI") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="VGv554rI" & dvtlong1$response==" A")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="QnU42iu0") subset(data, data$QLogin_1=="QnU42iu0") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="QnU42iu0" & dvtlong1$response=="Break for a fire p")] = 1 subset(dvtlong1, dvtlong1$QLogin_1=="Pg80l09K") subset(data, data$QLogin_1=="Pg80l09K") #this is how it was entered - will not count this towards score dvtlong1$badresp[which(dvtlong1$QLogin_1=="Pg80l09K" & dvtlong1$response==" One side can be sharpend to be used")] = 1 #Removing all the bad responses summary(factor(dvtlong1$badresp)) dvtlong2 = subset(dvtlong1, dvtlong1$badresp==0) dvtlong1 = dvtlong2 ############################################################################### ## Prepare data for scoring ############################################################################### #1) fluency score: take max of time fluency = aggregate(dvtlong1$time, list(dvtlong1$QLogin_1, dvtlong1$Item, dvtlong1$Segmented, dvtlong1$lines), max) names(fluency) = c("QLogin_1", "Item", "Segmented", "Lines", "Fluency") #2) elaboration... try a couple things #a. number of words (then take average overall) for (i in 1:nrow(dvtlong1)){ dvtlong1$wordcount[i] = sapply(strsplit(as.character(dvtlong1$response[i]), " "), length) } avgwc = aggregate(dvtlong1$wordcount, list(dvtlong1$QLogin_1), mean) names(avgwc) = c("QLogin_1", "AvgWordCount") #b. more subjective rating - like Hommel et al., 2011: The amount of detail (e.g., “a doorstop” counts 0, whereas “a door stop to prevent a door slamming shut in a strong wind” counts 2 (1 point for explanation of door slamming and another for further detail about the wind). #will need to export and do this outside of R #3-4) Originality and felxibility will need to be rated offline was well #Combine to create fluency and word count scores scores = merge(fluency, avgwc, by="QLogin_1") #Prepare rest of data for scoring - break up by item, than alphabetize responses (only run commented code once). #paperclip = subset(dvtlong1, dvtlong1$Item=="paperclip", select=-c(wordcount)) #brick = subset(dvtlong1, dvtlong1$Item=="brick", select=-c(wordcount)) #paperclip = paperclip[order(paperclip$response),] #brick = brick[order(brick$response),] #For originality/creativity and elaboration (response-based scores) #write.csv(paperclip, file="ForCodingResponses_Paperclip_ROUND2.csv") #write.csv(brick, file="ForCodingResponses_Brick_ROUND2.csv") ## PROCEDURE FOR SCORING PREP # 1. Insert new row at top, with rating scale (1 to 5), sort data by Response # 2. Save .csv files as excel documents # 3. Put into Google Docs # 4. Share separate copies of the response with each rater (3-4 google docs) # 5. Hide all columns but the response and score columns # 6. Share google doc with raters, along with instructions from "Data Scoring Procedures" word document #For flexibility: #excel = subset(paperclip, select=c("QLogin_1", "response")) #write.table(" ", file="PaperclipbySs_ROUND2.txt", row.names=TRUE, sep="\t") #for (i in levels(factor(excel$QLogin_1))) { # cat(i, "\n", file="PaperclipbySs_ROUND2.txt", append=T) # a = paste(rbind(subset(as.character(excel$response), excel$QLogin_1==i))) # for (j in 1:length(a)){ # cat("\t", a[j], "\n", file="PaperclipbySs_ROUND2.txt", append=T) # } # cat("\n\n\n\n\n", file="PaperclipbySs_ROUND2.txt", append=T) #} #excel = subset(brick, select=c("QLogin_1", "response")) #write.table(" ", file="BrickbySs_ROUND2.txt", row.names=TRUE, sep="\t") #for (i in levels(factor(excel$QLogin_1))) { # cat(i, "\n", file="BrickbySs_ROUND2.txt", append=T) # a = paste(rbind(subset(as.character(excel$response), excel$QLogin_1==i))) # for (j in 1:length(a)){ # cat("\t", a[j], "\n", file="BrickbySs_ROUND2.txt", append=T) # } # cat("\n\n\n\n\n", file="BrickbySs_ROUND2.txt", append=T) #} ## Once scoring is complete, combine data from all google docs and put back into R ############################################################################### ## Examining and combining score data ############################################################################### #Elaboration head(elaboration.scores) #Calculate discrepency in ratings and average rating elaboration.scores$Rating_Discrepancy = elaboration.scores$J2_Rating - elaboration.scores$J1_Rating elaboration.scores$Average_Rating = rowMeans(elaboration.scores[,7:8]) elaboration.scores$match = ifelse(elaboration.scores$Rating_Discrepancy==0, 1, 0) summary(factor(elaboration.scores$match)) summary(factor(elaboration.scores$match))[2]/(summary(factor(elaboration.scores$match))[1] +summary(factor(elaboration.scores$match))[2]) #overall agreement of 92% in ratings. #take average of ratings for each participant elab_avg = aggregate(elaboration.scores$Average_Rating, list(elaboration.scores$QLogin_1), mean, na.rm=T) names(elab_avg) = c("QLogin_1", "elaboration.scores") scores1 = merge(scores, elab_avg, by="QLogin_1") scores = scores1 #Originality head(originality.scores) alpha(originality.scores[,7:10]) #overall a = .73 (raw) #is it different for brick and paperclip? alpha(subset(originality.scores, originality.scores$Item=="paperclip")[,7:10]) #.69 alpha(subset(originality.scores, originality.scores$Item=="brick")[,7:10]) #.74 #slightly higher for brick than for paperclip ratings #take average across ratings originality.scores$Avg_rating = rowMeans(originality.scores[,7:10], na.rm=T) #Take average of these ratings across participants creat_avg = aggregate(originality.scores$Avg_rating, list(originality.scores$QLogin_1), mean, na.rm=T) names(creat_avg) = c("QLogin_1", "originality.scores") scores2 = merge(scores, creat_avg, by="QLogin_1") scores = scores2 #Flexibility head(flexibility.scores) alpha(flexibility.scores[,2:4]) #overall a = .97 (raw) #take average across ratings flexibility.scores$Avg_rating = rowMeans(flexibility.scores[,2:4], na.rm=T) flex_avg = subset(flexibility.scores, select=c("QLogin_1", "Avg_rating")) names(flex_avg)[2] = "flexibility.scores" trim.trailing <- function (x) sub("\\s+$", "", x) flex_avg$QLogin_1 = trim.trailing(flex_avg$QLogin_1) scores2 = merge(scores, flex_avg, by="QLogin_1") scores = scores2 head(scores) names(scores)[7:9] = c("Elaboration", "Originality", "Flexibility") ############################################################################### ## Combine with timing data and personality scales/demographics ############################################################################### otherdata = subset(data, select=c("QLogin_1", "Qpagetime_2", paste("QBFI_A", 1:44, sep="_"), "Qage_1", "Qsex", "Qeducation")) scoresdemo = merge(scores, otherdata, by=c("QLogin_1")) head(scoresdemo) #write out csv for data analysis write.csv(scoresdemo, file="HMSL_ScoreData.csv", row.names=FALSE)