#########################################################################
##
##  Data Preprocessing Script for Hofelich Mohr, Sell, & Lindsay (in prep)
##
##	UPDATED: 3/23/2015
#########################################################################
## Reads in files:
## 		
## 		HMSL_data_allcompleted.csv 
##			(dataset with only complete responses)
## 		
## 		HMSL_Elaboration_scores_all.csv
##			(combined raw elaboration scores from judges)
##		
## 		HMSL_Originality_scores_all.csv
##			(combined raw originality scores from judges)
##		
## 		HMSL_Flexibility_scores_all.csv
##			(combined raw flexibility scores from judges)
##
#########################################################################
##  Required libraries

require(stringr) || install.packages("stringr")
require(psych) || install.packages("psych")

#########################################################################
## Reading in the data files
#########################################################################

setwd("/Volumes/survey/591330_RSS/591330_dd/Data Sharing")

data = read.csv(file="HMSL_data_allcompleted.csv", header=T)
elaboration.scores = read.csv(file="HMSL_Elaboration_scores_all.csv", header=T)
originality.scores = read.csv(file="HMSL_Originality_scores_all.csv", header=T)
flexibility.scores = read.csv(file="HMSL_Flexibility_scores_all.csv", header=T)

#########################################################################
## Splitting responses from the unsegmented boxes
#########################################################################


#Qseg_1 to Qseg_50 seem to be the fill in the blank
#Qunseg is essay box

#looks like people used "." "," "<enter>" or "-" which appears as multiple spaces
unseg = subset(data, data$Segmented=="unsegmented")

#count different separators to determine which was used for each respondent
for(i in 1:nrow(unseg)){
unseg$ncommas_unseg[i] = nrow(as.data.frame(str_locate_all(pattern=",", unseg$Qunseg[i])))
unseg$nperiods_unseg[i] = nrow(as.data.frame(str_locate_all(pattern="\\.", unseg$Qunseg[i])))
unseg$nenters_unseg[i] = nrow(as.data.frame(str_locate_all(pattern="  ", unseg$Qunseg[i])))
unseg$ndash_unseg[i] = nrow(as.data.frame(str_locate_all(pattern="-", unseg$Qunseg[i])))
unseg$nsemicol_unseg[i] = nrow(as.data.frame(str_locate_all(pattern=";", unseg$Qunseg[i])))
}

unseg$maxused = apply(unseg[,107:111], 1, max)

split = as.data.frame(matrix(1, nrow=nrow(unseg), ncol=50))

#Split responses using the separator used most often
for (i in 1:nrow(unseg)){
	if (unseg$ncommas_unseg[i]==unseg$maxused[i]){
		split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c(","), 50)}
	else { 
		if (unseg$nperiods_unseg[i]==unseg$maxused[i]){
			split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c("\\."), 50)}
		else { 
			if (unseg$ndash_unseg[i]==unseg$maxused[i]){
				split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c("-"), 50)}
			else { 
				if (unseg$nsemicol_unseg[i]==unseg$maxused[i]){
					split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c(";"), 50)}
				else {split[i,] = str_split_fixed(as.character(unseg$Qunseg[i]), pattern=c("  "), 50)}}}}}
	

names(split) = paste("Qunseg", 1:50, sep="_")
unseg1 = cbind(unseg$QLogin_1, split)
names(unseg1)[1] = "QLogin_1"

#merge split unsegmented responses with data
data1 = merge(data, unseg1, by="QLogin_1", all.x=TRUE, all.y=TRUE)

#check to make sure unsegmented split correctly (look at file in excel)
#write.csv(data1, file="Datawithresponses_split.csv")

#########################################################################
## Correcting split errors
#########################################################################

#Need to separate responses:
subset(data1, data1$QLogin_1=="989BoJRx")
data1[which(data1$QLogin_1=="989BoJRx"),which(names(data1)=="Qunseg_3")] = "Used as a bracelet."
data1[which(data1$QLogin_1=="989BoJRx"),which(names(data1)=="Qunseg_4")] = "Can be used to make a sling shot."

subset(data1, data1$QLogin_1=="619m9P17")
data1[which(data1$QLogin_1=="619m9P17"),which(names(data1)=="Qunseg_3")] = "fireplace."
data1[which(data1$QLogin_1=="619m9P17"),which(names(data1)=="Qunseg_4")] = "weight"

subset(data1, data1$QLogin_1=="L9otc4KT")
data1[which(data1$QLogin_1=="L9otc4KT"),which(names(data1)=="Qunseg_4")] = "hit together to use as an instrument."
data1[which(data1$QLogin_1=="L9otc4KT"),which(names(data1)=="Qunseg_5")] = "Put at the bottom of a big flower pot to take up space so use less dirt."
data1[which(data1$QLogin_1=="L9otc4KT"),which(names(data1)=="Qunseg_6")] = "Use as a paver"


subset(data1, data1$QLogin_1=="oP2kzb61")
data1[which(data1$QLogin_1=="oP2kzb61"),which(names(data1)=="Qunseg_3")] = "It is a foundation"
data1[which(data1$QLogin_1=="oP2kzb61"),which(names(data1)=="Qunseg_4")] = "it can be used as a weapon"
data1[which(data1$QLogin_1=="oP2kzb61"),which(names(data1)=="Qunseg_5")] = "it can be used as an anchor"


subset(data1, data1$QLogin_1=="DXQb2R2M")
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_1")] = "Wall"
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_2")] = "Street"
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_3")] = "building"
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_4")] = "sidewalk"
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_5")] = "school"
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_6")] = "house"
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_7")] = "pixies"
data1[which(data1$QLogin_1=="DXQb2R2M"),which(names(data1)=="Qunseg_8")] = "firehouse"


subset(data1, data1$QLogin_1=="57ZLnnlc")
data1[which(data1$QLogin_1=="57ZLnnlc"),which(names(data1)=="Qunseg_7")] = "check if a baked item (cake, muffin) is moist or able to be removed from the oven"
data1[which(data1$QLogin_1=="57ZLnnlc"),which(names(data1)=="Qunseg_8")] = ""


subset(data1, data1$QLogin_1=="V8mxLJpC")
data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_4")] = "use to open a key lock"
data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_5")] = "make paperclip sculptures"
data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_6")] = "jewelry"
data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_7")] = "hang things with it"
data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_8")] = "use as a clothes pin if you lose a button"
data1[which(data1$QLogin_1=="V8mxLJpC"),which(names(data1)=="Qunseg_9")] = "Use as a utensil if you forget your fork"


subset(data1, data1$QLogin_1=="l9pCqCGT")
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_1")] = "button for clothes"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_2")] = "earing"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_3")] = "key"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_4")] = "key chain"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_5")] = "art"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_6")] = "pick"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_7")] = "zipper"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_8")] = "clip"
data1[which(data1$QLogin_1=="l9pCqCGT"),which(names(data1)=="Qunseg_9")] = "poker"

subset(data1, data1$QLogin_1=="bU801XR6")
data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_1")] = "FOR PAPERS"
data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_2")] = "FOR MONEY"
data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_3")] = "FOR BOOKMARKS"
data1[which(data1$QLogin_1=="bU801XR6"),which(names(data1)=="Qunseg_4")] = "FOR OPENING LOCKS"


#########################################################################
## Prepare data for scoring
#########################################################################

#Isolate the creativity responses for scoring - want to put in long version
seg = subset(data1, data1$Segmented=="segmented", select=c("QLogin_1", paste("Qseg", 1:50, sep="_"), "Item", "Segmented", "lines"))
names(seg)[2:51] = paste("resp", 1:50, sep=".")
unseg = subset(data1, data1$Segmented=="unsegmented", select=c("QLogin_1", paste("Qunseg", 1:50, sep="_"), "Item", "Segmented", "lines"))
names(unseg)[2:51] = paste("resp", 1:50, sep=".")

dvt = rbind(seg, unseg)


dvtlong = reshape(dvt, varying=c(paste("resp", 1:50, sep=".")), v.names="response", times=c(1:50), idvar="QLogin_1", direction="long", new.row.names = 1:33451)
	
row.names(dvtlong) = 1:nrow(dvtlong)	
dvtlong = dvtlong[order(dvtlong$QLogin_1),]


#get rid of NAs and blanks in responses
dvtlong1 = subset(dvtlong, dvtlong$response!="" & dvtlong$response!=" " & is.na(dvtlong$response)==FALSE)

#remove subjects who entered non-responses (like "none")
dvtlong1[which(dvtlong1$response=="None"),]

dvtlong1 = subset(dvtlong1, dvtlong1$QLogin_1!="2aA4wIGJ")


##################################################################################
## Checking incomplete and blank responses (shouldn't count towards score)
##################################################################################

#Checking ids based on notes:
subset(dvtlong1, dvtlong1$QLogin_1=="iR7JwQyj")
subset(data, data$QLogin_1=="iR7JwQyj")
	#as is how it was entered - will not count this towards score
dvtlong1$badresp = ifelse(dvtlong1$QLogin_1=="iR7JwQyj" & dvtlong1$response=="as", 1, 0)

subset(dvtlong1, dvtlong1$QLogin_1=="9mlyecrJ")
subset(data$Qunseg, data$QLogin_1=="9mlyecrJ")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="9mlyecrJ" & dvtlong1$response==" burn and use as ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="w2E2r112")
subset(data, data$QLogin_1=="w2E2r112")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="w2E2r112" & dvtlong1$response=="c")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="Fh8745VU")
subset(data, data$QLogin_1=="Fh8745VU")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="Fh8745VU" & dvtlong1$response=="Can be used to hold ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="G1u8Znj3")
subset(data$Qunseg, data$QLogin_1=="G1u8Znj3")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="G1u8Znj3" & dvtlong1$response=="dig s")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="IT6ztEpf")
subset(data, data$QLogin_1=="IT6ztEpf")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="IT6ztEpf" & dvtlong1$response=="Get something out a")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="G63FgMsT")
subset(data$Qunseg, data$QLogin_1=="G63FgMsT")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="G63FgMsT" & dvtlong1$response=="hold")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="BaQk6mT1")
subset(data$Qunseg, data$QLogin_1=="BaQk6mT1")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="BaQk6mT1" & dvtlong1$response==" reset ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="VRjUpNjw")
subset(data, data$QLogin_1=="VRjUpNjw")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="VRjUpNjw" & dvtlong1$response=="roast mar")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="UUx7idJG")
subset(data, data$QLogin_1=="UUx7idJG")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="UUx7idJG" & dvtlong1$response=="scra")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="6xuQ1Z9X")
subset(data, data$QLogin_1=="6xuQ1Z9X")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="6xuQ1Z9X" & dvtlong1$response=="use a a weapon when fl")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="jZeyKHnS")
subset(data, data$QLogin_1=="jZeyKHnS")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="jZeyKHnS" & dvtlong1$response=="1")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="46nE85l6")
subset(data, data$QLogin_1=="46nE85l6")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="46nE85l6" & dvtlong1$response=="  ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="lw50p93r") #seemed to be responding as paperclip
subset(dvtlong1, dvtlong1$QLogin_1=="989BoJRx")
subset(dvtlong1, dvtlong1$QLogin_1=="vLu5G9ae")

subset(dvtlong1, dvtlong1$QLogin_1=="LUdy216o")
subset(data, data$QLogin_1=="LUdy216o")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="LUdy216o" & dvtlong1$response=="Clip together for b")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="4RcV7Sp1")
subset(data, data$QLogin_1=="4RcV7Sp1")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="4RcV7Sp1" & dvtlong1$response=="push r")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="365hZA15")
subset(data, data$QLogin_1=="365hZA15")
dvtlong1$badresp[which(dvtlong1$QLogin_1=="365hZA15" & dvtlong1$response=="  ")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="9Xxv7HVW")
subset(data, data$QLogin_1=="9Xxv7HVW")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="9Xxv7HVW" & dvtlong1$response=="  ")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="mv2wBo0e")
subset(data, data$QLogin_1=="mv2wBo0e")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="mv2wBo0e" & dvtlong1$response=="  ")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="VGv554rI")
subset(data, data$QLogin_1=="VGv554rI")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="VGv554rI" & dvtlong1$response=="  A")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="zvXd0H7K")
subset(data, data$QLogin_1=="zvXd0H7K")
	#no incompletes

subset(dvtlong1, dvtlong1$QLogin_1=="IcVu0w8g")
subset(data, data$QLogin_1=="IcVu0w8g")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="IcVu0w8g" & dvtlong1$response==" be decor on party f")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="eKeAvV0P")
subset(data, data$QLogin_1=="eKeAvV0P")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="eKeAvV0P" & dvtlong1$response=="blo")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="mhcrc4iq")
subset(data, data$QLogin_1=="mhcrc4iq")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="mhcrc4iq" & dvtlong1$response=="  ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="RRWg2Zzl")
subset(data, data$QLogin_1=="RRWg2Zzl")
	#entered as one item
dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "Making a necklace... of paperclips"))
dvtlong1$response[which(dvtlong1$QLogin_1=="RRWg2Zzl" & dvtlong1$response=="  Making a necklace")] = "Making a necklace... of paperclips"
dvtlong1$badresp[which(dvtlong1$QLogin_1=="RRWg2Zzl" & dvtlong1$response=="of paperclips")] = 1
dvtlong1$badresp[which(dvtlong1$QLogin_1=="RRWg2Zzl" & dvtlong1$response=="  ")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="UUuMg7dW")
subset(data, data$QLogin_1=="UUuMg7dW")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="UUuMg7dW" & dvtlong1$response=="  ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="v1f48OH2")
subset(data, data$QLogin_1=="v1f48OH2")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="v1f48OH2" & dvtlong1$response=="  ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="vWx8G5y2")
subset(data, data$QLogin_1=="vWx8G5y2")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="vWx8G5y2" & dvtlong1$response=="  ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="B0o9M8xC") 
subset(data, data$QLogin_1=="B0o9M8xC")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="B0o9M8xC" & dvtlong1$response=="when combined with a rubber band")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="EwBSSB2k") 
subset(data, data$QLogin_1=="EwBSSB2k")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="EwBSSB2k" & dvtlong1$response=="Use to pin insects for")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="A26c3z9n") 
subset(data, data$QLogin_1=="A26c3z9n")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="A26c3z9n" & dvtlong1$response=="ti==")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="14BdrD3U") 
subset(data, data$QLogin_1=="14BdrD3U")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="14BdrD3U" & dvtlong1$response=="Stir a very ")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="3drmN5S2") 
subset(data, data$QLogin_1=="3drmN5S2")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="3drmN5S2" & dvtlong1$response==" peice our")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="D9ZeYC3r") 
subset(data, data$QLogin_1=="D9ZeYC3r")
	#this is how it was entered - will not count this towards score
dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "a miniature bow and arrow (with rubber band, of course)"))
dvtlong1$response[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$response==" a miniature bow and arrow (with rubber band")] = "a miniature bow and arrow (with rubber band, of course)"
dvtlong1$badresp[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$response==" of course)")] = 1
dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==5)] = 4
dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==6)] = 5
dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==7)] = 6
dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==8)] = 7
dvtlong1$time[which(dvtlong1$QLogin_1=="D9ZeYC3r" & dvtlong1$time==9)] = 8

subset(dvtlong1, dvtlong1$QLogin_1=="l66h8TSK") 
subset(data, data$QLogin_1=="l66h8TSK")
dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "Graffiti on plastic scratch tool"))
	#this is how it was entered - will not count this towards score
dvtlong1$response[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$response=="5. Graffiti on plastic scratch tool")] = "Graffiti on plastic scratch tool"
dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==3)] = 2
dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==5)] = 3
dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==7)] = 4
dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==9)] = 5
dvtlong1$time[which(dvtlong1$QLogin_1=="l66h8TSK" & dvtlong1$time==11)] = 6

subset(dvtlong1, dvtlong1$QLogin_1=="uirSLy6r") 
subset(data, data$QLogin_1=="uirSLy6r")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="uirSLy6r" & dvtlong1$response=="Ma")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="wJ77G1q1") 
subset(data, data$QLogin_1=="wJ77G1q1")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="wJ77G1q1" & dvtlong1$response=="k")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="2Xywd76Z") 
subset(data, data$QLogin_1=="2Xywd76Z")
dvtlong1$response = factor(dvtlong1$response, levels=c(levels(dvtlong1$response), "straightened out, it can be used to reset a modem"))
	#this is how it was entered - will not count this towards score
dvtlong1$response[which(dvtlong1$QLogin_1=="2Xywd76Z" & dvtlong1$response=="straightened out")] = "straightened out, it can be used to reset a modem"
dvtlong1$badresp[which(dvtlong1$QLogin_1=="2Xywd76Z" & dvtlong1$time==2)]=1
dvtlong1$badresp[which(dvtlong1$QLogin_1=="2Xywd76Z" & dvtlong1$time==3)]=1


subset(dvtlong1, dvtlong1$QLogin_1=="MXicONTd") 
subset(data, data$QLogin_1=="MXicONTd")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="MXicONTd" & dvtlong1$response=="dig goo out of the grooves on a ")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="16scmgQe") 
subset(data, data$QLogin_1=="16scmgQe")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="16scmgQe" & dvtlong1$response=="to b")] = 1


subset(dvtlong1, dvtlong1$QLogin_1=="5SRJR5D5") 
subset(data, data$QLogin_1=="5SRJR5D5")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="5SRJR5D5" & dvtlong1$response=="U")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="gIk5xOKf") 
subset(data, data$QLogin_1=="gIk5xOKf")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="gIk5xOKf" & dvtlong1$response=="hiding sp")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="mqMj47H6") 
subset(data, data$QLogin_1=="mqMj47H6")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="mqMj47H6" & dvtlong1$response=="make a start fi")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="Y2ov9UYW") 
subset(data, data$QLogin_1=="Y2ov9UYW")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="Y2ov9UYW" & dvtlong1$response==" to brea")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="VGv554rI") 
subset(data, data$QLogin_1=="VGv554rI")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="VGv554rI" & dvtlong1$response=="  A")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="QnU42iu0") 
subset(data, data$QLogin_1=="QnU42iu0")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="QnU42iu0" & dvtlong1$response=="Break for a fire p")] = 1

subset(dvtlong1, dvtlong1$QLogin_1=="Pg80l09K") 
subset(data, data$QLogin_1=="Pg80l09K")
	#this is how it was entered - will not count this towards score
dvtlong1$badresp[which(dvtlong1$QLogin_1=="Pg80l09K" & dvtlong1$response==" One side can be sharpend to be used")] = 1

#Removing all the bad responses
summary(factor(dvtlong1$badresp))

dvtlong2 = subset(dvtlong1, dvtlong1$badresp==0)

dvtlong1 = dvtlong2


###############################################################################
## Prepare data for scoring
###############################################################################

#1) fluency score: take max of time

fluency = aggregate(dvtlong1$time, list(dvtlong1$QLogin_1, dvtlong1$Item, dvtlong1$Segmented, dvtlong1$lines), max)
names(fluency) = c("QLogin_1", "Item", "Segmented", "Lines", "Fluency")

#2) elaboration... try a couple things 
#a. number of words (then take average overall)
for (i in 1:nrow(dvtlong1)){
	dvtlong1$wordcount[i] = sapply(strsplit(as.character(dvtlong1$response[i]), " "), length)
}

avgwc = aggregate(dvtlong1$wordcount, list(dvtlong1$QLogin_1), mean)
names(avgwc) = c("QLogin_1", "AvgWordCount")

#b. more subjective rating - like Hommel et al., 2011: The amount of detail (e.g., “a doorstop” counts 0, whereas “a door stop to prevent a door slamming shut in a strong wind” counts 2 (1 point for explanation of door slamming and another for further detail about the wind).
	#will need to export and do this outside of R


#3-4) Originality and felxibility will need to be rated offline was well

#Combine to create fluency and word count scores

scores = merge(fluency, avgwc, by="QLogin_1")


#Prepare rest of data for scoring - break up by item, than alphabetize responses (only run commented code once).

#paperclip = subset(dvtlong1, dvtlong1$Item=="paperclip", select=-c(wordcount))
#brick = subset(dvtlong1, dvtlong1$Item=="brick", select=-c(wordcount))

#paperclip = paperclip[order(paperclip$response),]
#brick = brick[order(brick$response),]

#For originality/creativity and elaboration (response-based scores)
#write.csv(paperclip, file="ForCodingResponses_Paperclip_ROUND2.csv")
#write.csv(brick, file="ForCodingResponses_Brick_ROUND2.csv")

## PROCEDURE FOR SCORING PREP
# 1. Insert new row at top, with rating scale (1 to 5), sort data by Response
# 2. Save .csv files as excel documents
# 3. Put into Google Docs
# 4. Share separate copies of the response with each rater (3-4 google docs)
# 5. Hide all columns but the response and score columns
# 6. Share google doc with raters, along with instructions from "Data Scoring Procedures" word document


#For flexibility:
#excel = subset(paperclip, select=c("QLogin_1", "response"))
#write.table(" ", file="PaperclipbySs_ROUND2.txt", row.names=TRUE, sep="\t")
#for (i in levels(factor(excel$QLogin_1))) {
#	cat(i, "\n", file="PaperclipbySs_ROUND2.txt", append=T)
#	a = paste(rbind(subset(as.character(excel$response), excel$QLogin_1==i)))
#	for (j in 1:length(a)){
#	cat("\t", a[j], "\n", file="PaperclipbySs_ROUND2.txt", append=T)
#	}
#	cat("\n\n\n\n\n", file="PaperclipbySs_ROUND2.txt", append=T)
#}

#excel = subset(brick, select=c("QLogin_1", "response"))
#write.table(" ", file="BrickbySs_ROUND2.txt", row.names=TRUE, sep="\t")
#for (i in levels(factor(excel$QLogin_1))) {
#	cat(i, "\n", file="BrickbySs_ROUND2.txt", append=T)
#	a = paste(rbind(subset(as.character(excel$response), excel$QLogin_1==i)))
#	for (j in 1:length(a)){
#	cat("\t", a[j], "\n", file="BrickbySs_ROUND2.txt", append=T)
#	}
#	cat("\n\n\n\n\n", file="BrickbySs_ROUND2.txt", append=T)
#}

## Once scoring is complete, combine data from all google docs and put back into R

###############################################################################
## Examining and combining score data
###############################################################################

#Elaboration
head(elaboration.scores)

#Calculate discrepency in ratings and average rating
elaboration.scores$Rating_Discrepancy = elaboration.scores$J2_Rating - elaboration.scores$J1_Rating
elaboration.scores$Average_Rating = rowMeans(elaboration.scores[,7:8])

elaboration.scores$match = ifelse(elaboration.scores$Rating_Discrepancy==0, 1, 0)

summary(factor(elaboration.scores$match))
summary(factor(elaboration.scores$match))[2]/(summary(factor(elaboration.scores$match))[1] +summary(factor(elaboration.scores$match))[2])
	#overall agreement of 92% in ratings.

#take average of ratings for each participant
elab_avg = aggregate(elaboration.scores$Average_Rating, list(elaboration.scores$QLogin_1), mean, na.rm=T)
names(elab_avg) = c("QLogin_1", "elaboration.scores")

scores1 = merge(scores, elab_avg, by="QLogin_1")
scores = scores1


#Originality
head(originality.scores)

alpha(originality.scores[,7:10])
	#overall a = .73 (raw)

#is it different for brick and paperclip?
alpha(subset(originality.scores, originality.scores$Item=="paperclip")[,7:10]) #.69
alpha(subset(originality.scores, originality.scores$Item=="brick")[,7:10]) #.74
	#slightly higher for brick than for paperclip ratings

#take average across ratings
originality.scores$Avg_rating = rowMeans(originality.scores[,7:10], na.rm=T)

#Take average of these ratings across participants
creat_avg = aggregate(originality.scores$Avg_rating, list(originality.scores$QLogin_1), mean, na.rm=T)
names(creat_avg) = c("QLogin_1", "originality.scores")

scores2 = merge(scores, creat_avg, by="QLogin_1")
scores = scores2


#Flexibility
head(flexibility.scores)

alpha(flexibility.scores[,2:4])
	#overall a = .97 (raw)

#take average across ratings
flexibility.scores$Avg_rating = rowMeans(flexibility.scores[,2:4], na.rm=T)
flex_avg = subset(flexibility.scores, select=c("QLogin_1", "Avg_rating"))
names(flex_avg)[2] = "flexibility.scores"
trim.trailing <- function (x) sub("\\s+$", "", x)
flex_avg$QLogin_1  = trim.trailing(flex_avg$QLogin_1)

scores2 = merge(scores, flex_avg, by="QLogin_1")
scores = scores2

head(scores)

names(scores)[7:9] = c("Elaboration", "Originality", "Flexibility")

###############################################################################
## Combine with timing data and personality scales/demographics
###############################################################################

otherdata = subset(data, select=c("QLogin_1", "Qpagetime_2", paste("QBFI_A", 1:44, sep="_"), "Qage_1", "Qsex", "Qeducation"))

scoresdemo = merge(scores, otherdata, by=c("QLogin_1"))

head(scoresdemo)

#write out csv for data analysis
write.csv(scoresdemo, file="HMSL_ScoreData.csv", row.names=FALSE)