#########################################################################
##
##  Data Analysis Script for Hofelich Mohr, Sell, & Lindsay (in prep)
##
##	UPDATED: 3/25/2015
#########################################################################
## Reads in files:
## 		
## 		HMSL_ScoreData.csv 
##			(dataset with only complete responses)
## 		
## 		HMSL_data_full.csv
##			(dataset with all consented responses)
## 		
##		HMSL_data_allcompleted.csv
## 			(dataset with all completed responses)
##
#########################################################################
##  Required libraries

if(!require(car)) {install.packages("car"); library(car)}
if(!require(psych)) {install.packages("psych"); library(psych)}

#########################################################################
## Reading in the data files
#########################################################################

setwd("/Volumes/survey/591330_RSS/591330_dd/Data Sharing")

scoresdemo = read.csv(file="HMSL_ScoreData.csv", header=T)

data = read.csv(file="HMSL_data_full.csv", header=T)
	
datac = read.csv(file="HMSL_data_allcompleted.csv", header=T)

scoresdemo$Item = factor(scoresdemo$Item)
scoresdemo$Segmented = factor(scoresdemo$Segmented)
scoresdemo$Lines = factor(scoresdemo$Lines, levels=c(5, 10, 15), ordered=is.ordered(c(5, 10, 15)))

#########################################################################
## Plotting time spent on AUT
#########################################################################

#First, let's take a look at the participants who spent longer than expected on the timed page (which is possible if they ignored the pop-up warning). 

hist(scoresdemo$Qpagetime_2, xlab = "Time spent on page", main="Histogram of time spent on page")


#Some obvious outliers... (likely java script for timing broke, will need to exclude anyway)
hist(subset(scoresdemo$Qpagetime_2, scoresdemo$Qpagetime_2 < 10000), xlab = "Time spent on page", main="Histogram with outliers > 10,000s removed")


#The cut off was supposed to be 120s, and it looks like the majority are around that time or less. 
scoresdemo$LongTimes = ifelse(scoresdemo$Qpagetime_2 > 140, "> 140s", "< 140s")
as.data.frame(table(factor(scoresdemo$LongTimes)))

#21 participants took longer than 140s on the timed portion. We will remove these cases from analysis, then look at counts across conditions. 
scoresdemo1 = subset(scoresdemo, scoresdemo$LongTimes=="< 140s")
summary(scoresdemo1[,2:4])
xtabs(~scoresdemo1$Item + scoresdemo1$Segmented+scoresdemo1$Lines)


#Also remove the one person who reported they were under 18. 
scoresdemo1 = scoresdemo1[-(which(scoresdemo1$Qage_1 < 18)),]


#########################################################################
## Hypotheses related to FLUENCY
#########################################################################

#More visible lines (in both segmented & unsegmented cases) will increase fluency
hist(scoresdemo1$Fluency)

# Because lines are ordered and points along an interval scale, will treat them numerically rather than categorically. This will capture the linear effects of increasing the number of lines, rather than looking for differences between the three line numbers we chose. 

Anova(lm(Fluency ~ as.numeric(Lines), data=scoresdemo1), type="II", white.adjust=TRUE)
summary(lm(Fluency ~ as.numeric(Lines), data=scoresdemo1), type="II", white.adjust=TRUE)

by(scoresdemo1$Fluency, scoresdemo1$Lines, mean)

#Follow up t-tests on the means
t.test(Fluency ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=15), var.equal=F)
t.test(Fluency ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=10),  var.equal=F)
t.test(Fluency ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=5),  var.equal=F)


#Check it as a poisson model 
pmf = glm(Fluency ~ as.numeric(Lines), family="poisson", data=scoresdemo1)
summary(pmf)

pmfs = glm(Fluency ~ as.numeric(Lines)*Segmented, family="poisson", data=scoresdemo1)
summary(pmfs)


#When controlling for number of rows visible (as well as potential interactions between rows and segmentation), segmented rows will have higher fluency scores than unsegmented rows
Anova(lm(Fluency ~ as.numeric(Lines)*Segmented, data = scoresdemo1), white.adjust="TRUE")

#The effect of segmentation of fluency is not significant. The interaction between lines and segmentation is also not significant for fluency.

#########################################################################
## Hypotheses related to ELABORATION
#########################################################################

#Participants will have less elaboration in the segmented compared to non-segmented boxes

#Elaboration was measured two different ways; the first was an traditional elaboration rating score (0,1,2,3), and the second was word counts of the response. 

#First we will look at the elaboration scores. 
hist(scoresdemo1$Elaboration)
  #As expected, these scores are pretty skewed. 
 
#Do they differ by segmentation?
t.test(scoresdemo1$Elaboration ~ scoresdemo1$Segmented, paired=F, var.equal=F)
	#Yes. It appears that the respondents in the unsegmented condition had higher elaboration scores than those in the segmented condition. 

#Now for word count. 
hist(scoresdemo1$AvgWordCount)

#Are the two measure of elaboration correlated? 
plot(scoresdemo1$Elaboration, scoresdemo1$AvgWordCount)
cor.test(scoresdemo1$Elaboration, scoresdemo1$AvgWordCount)
	#Yes, quite significantly, with an r(594) = .74. Do the effects of segmentation on elaboration also hold for  for word count?

t.test(scoresdemo1$AvgWordCount ~ scoresdemo1$Segmented, paired=F, var.equal=F)
	#Yes, again in the same direction, with unsegmented higher than segmented.


#Does the effect differ by the number of boxes/lines? 
Anova(lm(Elaboration ~ as.numeric(Lines)*Segmented, data=scoresdemo1), type="II", white.adjust=TRUE)
Anova(lm(AvgWordCount~ as.numeric(Lines)*Segmented, data=scoresdemo1), type="II", white.adjust=TRUE)
	#No.

#Does elaboration decrease with increasing number of lines?
Anova(lm(Elaboration ~ as.numeric(Lines), data=scoresdemo1), white.adjust=TRUE)
Anova(lm(AvgWordCount ~ as.numeric(Lines), data=scoresdemo1), white.adjust=TRUE)
	#Nope.

#########################################################################
## Hypotheses related to FLEXBILITY
#########################################################################

#Participants will have higher flexibility in the segmented than unsegmented boxes
#Because the flexibility scores are necessarily confined by the number of items people put down (fluency), flexibility needs to be examined relative to fluency. There are a few ways to treat the flexibility scores. 

#First, can use a ratio of fluency to flexibility scores. This is often done in the literature, but could get hairy with participants who gave only 1 response, as they will have the same ratio as someone with 10 responses and 10 different categories, even though they would arguably be showing less flexibility. 
scoresdemo1$FlexFluRatio = scoresdemo1$Flexibility/ scoresdemo1$Fluency
t.test(scoresdemo1$FlexFluRatio ~ scoresdemo1$Segmented)

#The second way is to remove the participants who only gave one response, that way ratios of 1 are based on people with multiple responses. But the best option we think is to just look at the flexibility scores (rather than the ratio), and include fluency as a covariate in the model to determine whether 
hist(scoresdemo1$Flexibility)

Anova(lm(Flexibility ~ Segmented + Fluency, data = scoresdemo1), type="II", white.adjust=T)
	#no

#########################################################################
## Hypotheses related to non-response
#########################################################################
#Segmentation or Number of lines could lead to greater non-compliance (not responding)
data$nogo = ifelse(data$Segmented=="unsegmented" & data$Qunseg=="", "no response", ifelse(data$Segmented=="segmented" & data$Qseg_1=="", "no response", "responded"))
as.data.frame(table(data$nogo))

datac$nogo = ifelse(datac$Segmented=="unsegmented" & datac$Qunseg=="", "no response", ifelse(datac$Segmented=="segmented" & datac$Qseg_1=="", "no response", "responded"))
as.data.frame(table(datac$nogo))
#From the initial consenters, 115 participants did not complete the Originality task (left the unsegmented box or first segmented box blank). Only 1 completed left it blank.

xtabs(~ data$nogo + data$Segmented) 
chisq.test(xtabs(~ data$nogo + data$Segmented))

xtabs(~ data$nogo + data$lines) 
chisq.test(xtabs(~ data$nogo + as.numeric(data$lines)))

#Incidents of non-response did not seem to differ by number of lines or whether the boxes were segmented or unsegmented.


#########################################################################
## Hypotheses related to Originality
#########################################################################
#No general hypotheses, just seeing if any of our manipulations affect it. We can start with the obvious to see whether lines and segmentation influence creativity scores. 

hist(scoresdemo1$Originality)

Anova(lm(Originality ~ as.numeric(Lines)*Segmented, data=scoresdemo1), type="II", white.adjust=T)
	#Significant interaction between the two. 

by(scoresdemo1$Originality, list(scoresdemo1$Lines, scoresdemo1$Segmented), mean)
barplot(by(scoresdemo1$Originality, list(scoresdemo1$Lines, scoresdemo1$Segmented), mean), beside=T, legend=T, args.legend= list(x = "bottomright"))


#Follow up anovas on segmented and unsegmented separately to see what is happening. 
Anova(lm(Originality ~ as.numeric(Lines), data = subset(scoresdemo1, scoresdemo1$Segmented == "segmented")), white.adjust=T)
Anova(lm(Originality ~ as.numeric(Lines), data = subset(scoresdemo1, scoresdemo1$Segmented == "unsegmented")),  white.adjust=T)

summary(lm(Originality ~ as.numeric(Lines), data = subset(scoresdemo1, scoresdemo1$Segmented == "segmented")),  white.adjust=T)
summary(lm(Originality ~ as.numeric(Lines), data = subset(scoresdemo1, scoresdemo1$Segmented == "unsegmented")),  white.adjust=T)


#########################################################################
## Hypotheses related to PERSONALITY SCALES
#########################################################################
#BFI scale scoring (“R” denotes reverse-scored items):
	#Extraversion: 1, 6R, 11, 16, 21R, 26, 31R, 36 
	#Agreeableness: 2R, 7, 12R, 17, 22, 27R, 32, 37R, 42 
	#Conscientiousness: 3, 8R, 13, 18R, 23R, 28, 33, 38, 43R 
	#Neuroticism: 4, 9R, 14, 19, 24R, 29, 34R, 39 
	#Openness: 5, 10, 15, 20, 25, 30, 35R, 40, 41R, 44

as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}

for (i in grep("BFI", names(scoresdemo1))){
	scoresdemo1[which(scoresdemo1[,i]=="[Not Answered]"),i] = NA
  scoresdemo1[,i] = as.numeric(as.numeric.factor(scoresdemo1[,i]))
}

scoresdemo1$BFI.Extraversion = rowMeans(cbind(scoresdemo1$QBFI_A_1, (6-scoresdemo1$QBFI_A_6), scoresdemo1$QBFI_A_11, scoresdemo1$QBFI_A_16, (6-scoresdemo1$QBFI_A_21), scoresdemo1$QBFI_A_26, (6-scoresdemo1$QBFI_A_31), scoresdemo1$QBFI_A_36))
alpha(as.data.frame(cbind(scoresdemo1$QBFI_A_1, (6-scoresdemo1$QBFI_A_6), scoresdemo1$QBFI_A_11, scoresdemo1$QBFI_A_16, (6-scoresdemo1$QBFI_A_21), scoresdemo1$QBFI_A_26, (6-scoresdemo1$QBFI_A_31), scoresdemo1$QBFI_A_36))) 
  #alpha = .85

scoresdemo1$BFI.Agreeableness = rowMeans(cbind((6-scoresdemo1$QBFI_A_2), scoresdemo1$QBFI_A_7, (6-scoresdemo1$QBFI_A_12), scoresdemo1$QBFI_A_17, scoresdemo1$QBFI_A_22, (6-scoresdemo1$QBFI_A_27), scoresdemo1$QBFI_A_32, (6-scoresdemo1$QBFI_A_37), scoresdemo1$QBFI_A_42))
alpha(as.data.frame(cbind((6-scoresdemo1$QBFI_A_2), scoresdemo1$QBFI_A_7, (6-scoresdemo1$QBFI_A_12), scoresdemo1$QBFI_A_17, scoresdemo1$QBFI_A_22, (6-scoresdemo1$QBFI_A_27), scoresdemo1$QBFI_A_32, (6-scoresdemo1$QBFI_A_37), scoresdemo1$QBFI_A_42)))
  #alpha = .78

scoresdemo1$BFI.Conscientiousness = rowMeans(cbind(scoresdemo1$QBFI_A_3, (6-scoresdemo1$QBFI_A_8), scoresdemo1$QBFI_A_13, (6-scoresdemo1$QBFI_A_18), (6-scoresdemo1$QBFI_A_23), scoresdemo1$QBFI_A_28, scoresdemo1$QBFI_A_33, scoresdemo1$QBFI_A_38, (6-scoresdemo1$QBFI_A_43)))
alpha(as.data.frame(cbind(scoresdemo1$QBFI_A_3, (6-scoresdemo1$QBFI_A_8), scoresdemo1$QBFI_A_13, (6-scoresdemo1$QBFI_A_18), (6-scoresdemo1$QBFI_A_23), scoresdemo1$QBFI_A_28, scoresdemo1$QBFI_A_33, scoresdemo1$QBFI_A_38, (6-scoresdemo1$QBFI_A_43))))
  #alpha = .81

scoresdemo1$BFI.Neuroticism = rowMeans(cbind(scoresdemo1$QBFI_A_4, (6-scoresdemo1$QBFI_A_9), scoresdemo1$QBFI_A_14, scoresdemo1$QBFI_A_19, (6-scoresdemo1$QBFI_A_24), scoresdemo1$QBFI_A_29, (6-scoresdemo1$QBFI_A_34), scoresdemo1$QBFI_A_39))
alpha(as.data.frame(cbind(scoresdemo1$QBFI_A_4, (6-scoresdemo1$QBFI_A_9), scoresdemo1$QBFI_A_14, scoresdemo1$QBFI_A_19, (6-scoresdemo1$QBFI_A_24), scoresdemo1$QBFI_A_29, (6-scoresdemo1$QBFI_A_34), scoresdemo1$QBFI_A_39)))
  #alpha = .84

scoresdemo1$BFI.Openness = rowMeans(cbind(scoresdemo1$QBFI_A_5, scoresdemo1$QBFI_A_10, scoresdemo1$QBFI_A_15, scoresdemo1$QBFI_A_20, scoresdemo1$QBFI_A_25, scoresdemo1$QBFI_A_30, (6-scoresdemo1$QBFI_A_35), scoresdemo1$QBFI_A_40, (6-scoresdemo1$QBFI_A_41), scoresdemo1$QBFI_A_44))
alpha(as.data.frame(cbind(scoresdemo1$QBFI_A_5, scoresdemo1$QBFI_A_10, scoresdemo1$QBFI_A_15, scoresdemo1$QBFI_A_20, scoresdemo1$QBFI_A_25, scoresdemo1$QBFI_A_30, (6-scoresdemo1$QBFI_A_35), scoresdemo1$QBFI_A_40, (6-scoresdemo1$QBFI_A_41), scoresdemo1$QBFI_A_44)))
  #alpha = .81


#Means and SD of scores, as well as correlations between scales:
colMeans(scoresdemo1[,60:64], na.rm=T)
apply(scoresdemo1[,60:64], 2, sd, na.rm=T)
apply(scoresdemo1[,60:64], 2, median, na.rm=T)
cor(scoresdemo1[,60:64], use="complete.obs")


#Take a median split of conscientiousness, above = high; below = low; at median will be excluded. 
hist(scoresdemo1$BFI.Conscientiousness)

median(scoresdemo1$BFI.Conscientiousness, na.rm=T)

scoresdemo1$MedianSplit.Conscientiousness = ifelse(scoresdemo1$BFI.Conscientiousness < median(scoresdemo1$BFI.Conscientiousness, na.rm=T), "low", ifelse(scoresdemo1$BFI.Conscientiousness > median(scoresdemo1$BFI.Conscientiousness, na.rm=T), "high", NA))

#Check split
summary(scoresdemo1$BFI.Conscientiousness)
summary(as.factor(scoresdemo1$MedianSplit.Conscientiousness))

highc = subset(scoresdemo1, scoresdemo1$MedianSplit.Conscientiousness=="high")
lowc = subset(scoresdemo1, scoresdemo1$MedianSplit.Conscientiousness=="low")

mean(highc$BFI.Conscientiousness)
sd(highc$BFI.Conscientiousness)
mean(lowc$BFI.Conscientiousness)
sd(lowc$BFI.Conscientiousness)


#Is the interaction between lines and segmentation for Originality heightened in high versus low Conscientious individuals?
Anova(lm(Originality ~ as.numeric(Lines)*Segmented*MedianSplit.Conscientiousness, data=scoresdemo1), white.adjust=T)
	#3-way interaction is ns. But since this was an apriori hypothesis, will check in each group individually.
	

Anova(lm(Originality ~ as.numeric(Lines)*Segmented, data=highc), white.adjust=T)
Anova(lm(Originality ~ as.numeric(Lines)*Segmented, data=lowc), white.adjust=T)
	#interaction is significant in high, but not low conscientiousness people

par(mfrow=c(1, 2))
barplot(by(highc$Originality, list(highc$Lines, highc$Segmented), mean, na.rm=T), beside=T, main="High Conscientiousness", legend=T, args.legend =c(x="bottom"))
barplot(by(lowc$Originality, list(lowc$Lines, lowc$Segmented), mean, na.rm=T), beside=T, main="Low Conscientiousness", legend=T, args.legend =c(x="bottom"))


#follow-up tests to examine the direction
summary(lm(Originality ~ as.numeric(Lines), data=subset(highc, highc$Segmented=="unsegmented")))
summary(lm(Originality ~ as.numeric(Lines), data=subset(highc, highc$Segmented=="segmented")))


#Are the other effects we found heightened in highly conscientious participants?
Anova(lm(Fluency ~ as.numeric(Lines)*MedianSplit.Conscientiousness, data=scoresdemo1), white.adjust=T)
Anova(lm(Fluency ~ as.numeric(Lines), data=highc), white.adjust=T)
Anova(lm(Fluency ~ as.numeric(Lines), data=lowc), white.adjust=T)
	#ns
summary(lm(Fluency ~ as.numeric(Lines), data=highc), white.adjust=T)
summary(lm(Fluency ~ as.numeric(Lines), data=lowc), white.adjust=T)


Anova(lm(Elaboration ~ Segmented*MedianSplit.Conscientiousness, data=scoresdemo1), white.adjust=T)

t.test(Elaboration ~ Segmented, data=highc, var.equal=F)
t.test(Elaboration ~ Segmented, data=lowc, var.equal=F)

Anova(lm(AvgWordCount ~ Segmented*MedianSplit.Conscientiousness, data=scoresdemo1), white.adjust=T)

t.test(AvgWordCount ~ Segmented, data=highc, var.equal=F)
t.test(AvgWordCount ~ Segmented, data=lowc, var.equal=F)


#########################################################################
## Does the Variance in scores differ between manipulations?
#########################################################################
#Looking at whether the variance within each group difference between groups (e.g., do you have a greater or worse ability to distinguish between groups).

#First need to look at whether a normal distribution can be assumed:
hist(scoresdemo1$Fluency)
hist(scoresdemo1$Elaboration)
hist(scoresdemo1$AvgWordCount)
hist(scoresdemo1$Flexibility)
hist(scoresdemo1$FlexFluRatio)
hist(scoresdemo1$Originality)
#Fluency, Flexibility, Originality are approximately normal. Avg word count and elaboration are not. 

#FLUENCY 
by(scoresdemo1$Fluency, scoresdemo1$Lines, var)
bartlett.test(Fluency ~ Lines, data=scoresdemo1)
bartlett.test(Fluency ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=15))
bartlett.test(Fluency ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=10))
bartlett.test(Fluency ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=5))

#The variances of the groups are difference, as Bartlett's test for homogeneity of variances is significant. Follow-up tests reveal the biggest differences is between 5 and 15, but not between 10 and either of these.

#Does the variance of fluency also differ by the segmentation? 
by(scoresdemo1$Fluency, scoresdemo1$Segmented, var)
bartlett.test(Fluency ~ Segmented, data=scoresdemo1)
#Not significantly different. 


#ELABORATION
by(scoresdemo1$Elaboration, scoresdemo1$Lines, var)
leveneTest(scoresdemo1$Elaboration, scoresdemo1$Lines)

by(scoresdemo1$AvgWordCount, scoresdemo1$Lines, var)
leveneTest(scoresdemo1$AvgWordCount, scoresdemo1$Lines)
	#No

#Differ by segmentation?
by(scoresdemo1$Elaboration, scoresdemo1$Segmented, var)
leveneTest(scoresdemo1$Elaboration, scoresdemo1$Segmented)

by(scoresdemo1$AvgWordCount, scoresdemo1$Segmented, var)
leveneTest(scoresdemo1$AvgWordCount, scoresdemo1$Segmented)
	#Yes, significant, more variance in elaboration scores and average word count for 	unsegmented than segmented group. 

#FLEXBILITY
#because no way to control for fluency in this test, will use the ratio scores (which performed similar above)
leveneTest(scoresdemo1$FlexFluRatio, scoresdemo1$Lines)
leveneTest(scoresdemo1$FlexFluRatio, scoresdemo1$Segmented)


#ORIGINALITY
by(scoresdemo1$Originality, scoresdemo1$Lines, var)
bartlett.test(Originality ~ Lines, data=scoresdemo1)

bartlett.test(Originality ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=15))
bartlett.test(Originality ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=10))
bartlett.test(Originality ~ Lines, data=subset(scoresdemo1, scoresdemo1$Lines!=5))

bartlett.test(Originality ~ Segmented, data=scoresdemo1)
by(scoresdemo1$Originality, scoresdemo1$Segmented, var)

#Variance in Originality scores differed by number of lines, with greatest variance at 5 lines and 15 lines, niether of which differed from one another, but significantly differed from 10 lines. No difference by segmentation (only a trend). 


#########################################################################
## TABLES FOR PAPER
#########################################################################

#Table 1 - means by box number/type
roundmeans = function(x) {round(mean(x, na.rm=T), digits=2)}
roundsd = function(x) {round(sd(x, na.rm=T), digits=2)}
meansT1 = aggregate(scoresdemo1[,5:9], list(scoresdemo1$Lines, scoresdemo1$Segmented), roundmeans)
sdT1 = aggregate(scoresdemo1[,5:9], list(scoresdemo1$Lines, scoresdemo1$Segmented), roundsd)
meansT1$condition = paste(meansT1$Group.1, meansT1$Group.2, sep="_")
meansT1$Fluency.c = paste(meansT1$Fluency, " (", sdT1$Fluency, ")", sep="")
meansT1$AvgWordCount.c = paste(meansT1$AvgWordCount, " (", sdT1$AvgWordCount, ")", sep="")
meansT1$Elaboration.c = paste(meansT1$Elaboration, " (", sdT1$Elaboration, ")", sep="")
meansT1$Originality.c = paste(meansT1$Originality, " (", sdT1$Originality, ")", sep="")
meansT1$Flexibility.c = paste(meansT1$Flexibility, " (", sdT1$Flexibility, ")", sep="")

write.csv(t(meansT1[,8:13]), file="Table1.csv")

#Table 2 - means for high/low contientiousness
meansT2 = aggregate(scoresdemo1[,5:9], list(scoresdemo1$Lines, scoresdemo1$Segmented, scoresdemo1$MedianSplit.Conscientiousness), roundmeans)
sdT2 = aggregate(scoresdemo1[,5:9], list(scoresdemo1$Lines, scoresdemo1$Segmented, scoresdemo1$MedianSplit.Conscientiousness), roundsd)
meansT2$condition = paste(meansT2$Group.1, meansT2$Group.2,meansT2$Group.3, sep="_")
meansT2$Fluency.c = paste(meansT2$Fluency, " (", sdT2$Fluency, ")", sep="")
meansT2$AvgWordCount.c = paste(meansT2$AvgWordCount, " (", sdT2$AvgWordCount, ")", sep="")
meansT2$Elaboration.c = paste(meansT2$Elaboration, " (", sdT2$Elaboration, ")", sep="")
meansT2$Originality.c = paste(meansT2$Originality, " (", sdT2$Originality, ")", sep="")
meansT2$Flexibility.c = paste(meansT2$Flexibility, " (", sdT2$Flexibility, ")", sep="")

write.csv(t(meansT2[,9:14]), file="Table2.csv")


#########################################################################
## Looking at ceiling effects
#########################################################################
#Did participants report more items than the artifical ceiling? 
by(scoresdemo1$Fluency, scoresdemo1$Lines, summary)

length(which(subset(scoresdemo1, scoresdemo1$Lines==5)$Fluency > 5))
dim(subset(scoresdemo1, scoresdemo1$Lines==5))

length(which(subset(scoresdemo1, scoresdemo1$Lines==10)$Fluency > 10))
dim(subset(scoresdemo1, scoresdemo1$Lines==10))

length(which(subset(scoresdemo1, scoresdemo1$Lines==15)$Fluency > 15))
dim(subset(scoresdemo1, scoresdemo1$Lines==15))

#Look only at the condition where it matters: segmented
length(which(subset(scoresdemo1, scoresdemo1$Lines==5 & scoresdemo1$Segmented=="segmented")$Fluency > 5))
dim(subset(scoresdemo1, scoresdemo1$Lines==5 & scoresdemo1$Segmented=="segmented"))

length(which(subset(scoresdemo1, scoresdemo1$Lines==10 & scoresdemo1$Segmented=="segmented")$Fluency > 10))
dim(subset(scoresdemo1, scoresdemo1$Lines==10 & scoresdemo1$Segmented=="segmented"))

length(which(subset(scoresdemo1, scoresdemo1$Lines==15 & scoresdemo1$Segmented=="segmented")$Fluency > 15))
dim(subset(scoresdemo1, scoresdemo1$Lines==15 & scoresdemo1$Segmented=="segmented"))

#More people went over the ceiling in the 5 condition than in the 10 or 15, but all had at least one. 

#########################################################################
## DEMOGRAPHICS of our sample
#########################################################################
summary(scoresdemo1[,56:58])