###################################################### ## Patterson et al - Code to create score variables ## ## This script provides code for calculating the scores ## presented in Patterson et al. (under review). ## ## 2016-09-26 ###################################################### #required libraries library(reshape) library(ggplot2) library(stringr) #Set working directory to location of file setwd("") data = read.csv(file="Pattersonetal2016_FMD_Data.csv", header=T) ######################################################## ## Data processing ######################################################## #Need to reshape the data into long format DR_long = melt(subset(data, select=c("ResponseID", grep("DR_", names(data), value=T))), id.vars="ResponseID") IB_long = melt(subset(data, select=c("ResponseID", grep("IB_", names(data), value=T))), id.vars="ResponseID") IB_long1 = subset(IB_long, IB_long$variable %in% c("IB_title","IB_text", "IB_text2", "IB_text3")==FALSE) DR_long1 = subset(DR_long, DR_long$variable!="DR_title") DR_long1$variable = gsub("DR_", "", DR_long1$variable) names(DR_long1)[3] = "DR" IB_long1$variable = gsub("IB_", "", IB_long1$variable) names(IB_long1)[3] = "IB" #Merge DR (risk of disease spread) and IB (impact on business) datasets datalong = merge(DR_long1, IB_long1, by=c("ResponseID", "variable"), all=T) #Reverse IB, so that higher values = more critical (time until hurts business) datalong$IB_rev = 5 - datalong$IB #Add group for movement type datalong$mvgroup = factor(gsub("_[[:digit:]]*", "", as.character(datalong$variable))) #Create aggregate data - mean, sd, se by movement dataagg = aggregate(subset(datalong, select=c("DR", "IB_rev")), list(datalong$variable), mean, na.rm=T) dataaggsd = aggregate(subset(datalong, select=c("DR", "IB_rev")), list(datalong$variable), sd, na.rm=T) names(dataaggsd)[2:3] = c("DR_sd", "IB_rev_sd") dataagg1 = merge(dataagg, dataaggsd, by="Group.1") dataagg1$DR_se = dataagg1$DR_sd/sqrt(colSums(!is.na((data[,which(names(data) %in% paste("DR_", dataagg1$Group.1, sep=""))])))) dataagg1$IB_rev_se = dataagg1$IB_rev_sd/sqrt(colSums(!is.na((data[,which(names(data) %in% paste("IB_", dataagg1$Group.1, sep=""))])))) #Add group for movement type dataagg1$mvgroup = factor(gsub("_[[:digit:]]*", "", as.character(dataagg1$Group.1))) #create reversed IB scores in the original data for (i in paste("IB_", dataagg1$Group.1, sep="")) { data$a = 5 - data[,which(names(data)==i)] names(data)[which(names(data)=="a")] = paste(i, "rev", sep="_") } #add column with number that corresponds to movement question dataagg1$MovmtNumber = gsub(".*_", "", dataagg1$Group.1) dataagg1$MovmtNumberContin = 1:30 ######################################################## ## Creating Scores ######################################################## #Based on Reviewer feedback: present the average reversed IB score on the graph along with the consensus score for Risk of Disease Spread Presentation of the RDS variable will be changed to indicate the \% majority saying the top two categories (some or high risk = above 50) or the bottom two categories (no or low risk = below 50). The table will include bucketed reverse IB scores based on placement of the means on the rated scale: 3-4 (48 hours to a week); 2.5-3 (7 days to 14 days); 2 - 2.5 (14 days to 21 days); 1-2 (21 days to 60 days). #create new variables for each DR rating that codes 1 as "high" (3 or 4) and 0 as "low" risk (1 or 2) for (i in paste("DR_", dataagg1$Group.1, sep="")) { data$a = ifelse(data[,which(names(data)==i)] > 2, 1, 0) names(data)[which(names(data)=="a")] = paste(i, "phigh", sep="_") } #create function to calculate percent of 1's for each variable percentof1s = function(x) {round((sum(x, na.rm=T)/sum(complete.cases(x)))*100, 2)} #calculate for each new variable dataagg_p = apply(subset(data, select=grep("phigh", names(data), value=T)), 2, percentof1s) #add to aggregated dataset dataagg1$Percent_DR_High = dataagg_p[1:30] dataagg1$checknames = names(dataagg_p)[1:30] #create a new variable for IB means that codes which above category they fall into: dataagg1$IB_rev_cat = ifelse(dataagg1$IB_rev > 3.01, "2 to 7 days", ifelse(dataagg1$IB_rev > 2.51 & dataagg1$IB_rev < 3.01, "7 to 14 days", ifelse(dataagg1$IB_rev > 2.01 & dataagg1$IB_rev < 2.51, "14 to 21 days", ifelse(dataagg1$IB_rev >= 1 & dataagg1$IB_rev < 2.01, "21 to 60 days", NA)))) ######################################################## ## Figure 1 ######################################################## #Set colors for plot and adjust text labels so they don't overlap colors = c("#d7191c","#fdae61", "#ffffbf", "#abdda4", "#2b83ba") dataagg1$Percent_DR_high_nudge = dataagg1$Percent_DR_High dataagg1$Percent_DR_high_nudge[1] = dataagg1$Percent_DR_high_nudge[1]-3 dataagg1$Percent_DR_high_nudge[21] = dataagg1$Percent_DR_high_nudge[21]-3.3 dataagg1$Percent_DR_high_nudge[6] = dataagg1$Percent_DR_high_nudge[6]+3 dataagg1$Percent_DR_high_nudge[10] = dataagg1$Percent_DR_high_nudge[10]-3 #Create static figure ggplot(dataagg1, aes(x=Percent_DR_High, y=IB_rev)) + coord_cartesian(ylim=c(1,4), xlim=c(0,105)) + labs(title="Expert Consensus on Risk of Disease Spread and Average Time to Impact on Business", x="Percent majority consensus of risk of disease spread", y="Average time until negative impact on business") + geom_vline(xintercept=c(25,75), color="gray") + geom_hline(yintercept=c(2, 2.5, 3), lty=2, color="gray") + geom_errorbar(aes(ymin=IB_rev-IB_rev_se, ymax=IB_rev+IB_rev_se, color=dataagg1$mvgroup), width=1) + geom_point(aes(x= dataagg1$Percent_DR_High, y=dataagg1$IB_rev, shape=mvgroup, fill=mvgroup), alpha=.75, size=8.5) + scale_shape_manual(values = c(21:25), name="Movement Type", labels=c("Equipment", "General", "Genetic", "Harvest", "Person")) + scale_fill_manual(name="Movement Type", labels=c("Equipment", "General", "Genetic", "Harvest", "Person"), values=colors) + scale_color_manual(name="Movement Type", labels=c("Equipment", "General", "Genetic", "Harvest", "Person"), values=rep("darkgray", 5)) + scale_y_continuous(breaks = c(1, 2, 3, 4), labels=rev(c("< 48 hours", "7 days", "21 days", "> 60 days"))) + geom_text(aes(x=dataagg1$Percent_DR_high_nudge, y=dataagg1$IB_rev, label=dataagg1$MovmtNumberContin), size=4) + annotate("text", x=12.5, y=1.2, label="Majority Low", size=4) + annotate("text", x=50, y=1.2, label="Unclear Consensus", size=4) + annotate("text", x=90, y=1.2, label="Majority High", size=4) + theme_classic() ######################################################## ## Information for Table ######################################################## dataagg1$Category = ifelse(dataagg1$mvgroup=="equip", "Equipment", ifelse(dataagg1$mvgroup=="general", "General", ifelse(dataagg1$mvgroup=="genetic", "Genetic", ifelse(dataagg1$mvgroup=="harvest", "Harvest", "Person")))) dataagg1$NumCat = paste(dataagg1$Category, dataagg1$MovmtNumberContin, sep=" ") #Create one column for bin (high, even, low), one for % majority in that bin #Mirror scores to match figure - percent majority low or high dataagg1$Placement = ifelse(dataagg1$Percent_DR_High > 25 & dataagg1$Percent_DR_High < 75, "Unclear Consensus", ifelse(dataagg1$Percent_DR_High <= 25, "Low", ifelse(dataagg1$Percent_DR_High >= 75, "High", NA))) scores1 = subset(dataagg1, select=c("Percent_DR_High", "NumCat", "Percent_DR_High", "Placement", "IB_rev_cat")) names(scores1) = c("remove", "Movement", "Percent High RDS", "Binned Risk of Disease Spread", "Time to Negative Business Impact") #order the factors scores1$`Binned Risk of Disease Spread` = factor(scores1$`Binned Risk of Disease Spread`, levels=c("Low", "Unclear Consensus", "High")) scores1$`Time to Negative Business Impact` = factor(scores1$`Time to Negative Business Impact`, levels=c("14 to 21 days", "7 to 14 days", "2 to 7 days")) scores1[order(scores1$`Binned Risk of Disease Spread`, scores1$`Time to Negative Business Impact`, scores1$`Percent High RDS`, decreasing=T) ,-1]