Figure 3

Description: Scores by reproducibility criterion

Coder name(s): Althea ArchMiller

Preamble

Load libraries

library(ezknitr)
library(knitr)
library(devtools)
library(ggplot2)

Clear environment and set seed

remove(list=ls())
set.seed(8675)

Load Data

load(file = "data/processed_data/averages_of_reviewed_studies.Rdata")

Empty dataset to fill in for plotting

plottingdata <- as.data.frame(matrix(NA, nrow = 30, ncol = 7))
colnames(plottingdata) <- c("Score", "SD", "upper", "lower", 
                            "Category", "Question", "Response")

Fill in with Data Availability Question

Yes, graphs

plottingdata$Score[1] <- 
  mean(na.exclude(averages$graphsReproduced[averages$dataAvailable=="True"]))
plottingdata$SD[1] <- 
  sd(na.exclude(averages$graphsReproduced[averages$dataAvailable=="True"]))
plottingdata$Category[1] <- "Figures reproduced"
plottingdata$Question[1] <- "Q5: Data available?"
plottingdata$Response[1] <- "Yes"

No, graphs

plottingdata$Score[2] <- 
  mean(na.exclude(averages$graphsReproduced[averages$dataAvailable=="False"]))
plottingdata$SD[2] <- 
  sd(na.exclude(averages$graphsReproduced[averages$dataAvailable=="False"]))
plottingdata$Category[2] <- "Figures reproduced"
plottingdata$Question[2] <- "Q5: Data available?"
plottingdata$Response[2] <- "No"

Yes, numbers

plottingdata$Score[3] <- 
  mean(na.exclude(averages$numbersReproduced[averages$dataAvailable=="True"]))
plottingdata$SD[3] <- 
  sd(na.exclude(averages$numbersReproduced[averages$dataAvailable=="True"]))
plottingdata$Category[3] <- "Numbers reproduced"
plottingdata$Question[3] <- "Q5: Data available?"
plottingdata$Response[3] <- "Yes"

No, numbers

plottingdata$Score[4] <- 
  mean(na.exclude(averages$numbersReproduced[averages$dataAvailable=="False"]))
plottingdata$SD[4] <- 
  sd(na.exclude(averages$numbersReproduced[averages$dataAvailable=="False"]))
plottingdata$Category[4] <- "Numbers reproduced"
plottingdata$Question[4] <- "Q5: Data available?"
plottingdata$Response[4] <- "No"

Yes, conclusions

plottingdata$Score[5] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$dataAvailable=="True"]))
plottingdata$SD[5] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$dataAvailable=="True"]))
plottingdata$Category[5] <- "Conclusions reproduced"
plottingdata$Question[5] <- "Q5: Data available?"
plottingdata$Response[5] <- "Yes"

No, conclusions

plottingdata$Score[6] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$dataAvailable=="False"]))
plottingdata$SD[6] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$dataAvailable=="False"]))
plottingdata$Category[6] <- "Conclusions reproduced"
plottingdata$Question[6] <- "Q5: Data available?"
plottingdata$Response[6] <- "No"

Fill in with Data Raw Question

Yes, graphs

plottingdata$Score[7] <- 
  mean(na.exclude(averages$graphsReproduced[averages$preProcessed=="Raw format"]))
plottingdata$SD[7] <- 
  sd(na.exclude(averages$graphsReproduced[averages$preProcessed=="Raw format"]))
plottingdata$Category[7] <- "Figures reproduced"
plottingdata$Question[7] <- "Q6: Raw data?"
plottingdata$Response[7] <- "Yes"

No, graphs

plottingdata$Score[8] <- 
  mean(na.exclude(averages$graphsReproduced[averages$preProcessed=="Pre-processed"]))
plottingdata$SD[8] <- 
  sd(na.exclude(averages$graphsReproduced[averages$preProcessed=="Pre-processed"]))
plottingdata$Category[8] <- "Figures reproduced"
plottingdata$Question[8] <- "Q6: Raw data?"
plottingdata$Response[8] <- "No"

Yes, numbers

plottingdata$Score[9] <- 
  mean(na.exclude(averages$numbersReproduced[averages$preProcessed=="Raw format"]))
plottingdata$SD[9] <- 
  sd(na.exclude(averages$numbersReproduced[averages$preProcessed=="Raw format"]))
plottingdata$Category[9] <- "Numbers reproduced"
plottingdata$Question[9] <- "Q6: Raw data?"
plottingdata$Response[9] <- "Yes"

No, numbers

plottingdata$Score[10] <- 
  mean(na.exclude(averages$numbersReproduced[averages$preProcessed=="Pre-processed"]))
plottingdata$SD[10] <- 
  sd(na.exclude(averages$numbersReproduced[averages$preProcessed=="Pre-processed"]))
plottingdata$Category[10] <- "Numbers reproduced"
plottingdata$Question[10] <- "Q6: Raw data?"
plottingdata$Response[10] <- "No"

Yes, conclusions

plottingdata$Score[11] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$preProcessed=="Raw format"]))
plottingdata$SD[11] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$preProcessed=="Raw format"]))
plottingdata$Category[11] <- "Conclusions reproduced"
plottingdata$Question[11] <- "Q6: Raw data?"
plottingdata$Response[11] <- "Yes"

No, conclusions

plottingdata$Score[12] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$preProcessed=="Pre-processed"]))
plottingdata$SD[12] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$preProcessed=="Pre-processed"]))
plottingdata$Category[12] <- "Conclusions reproduced"
plottingdata$Question[12] <- "Q6: Raw data?"
plottingdata$Response[12] <- "No"

Fill in with Code Based Question

Yes, graphs

plottingdata$Score[13] <- 
  mean(na.exclude(averages$graphsReproduced[averages$codeBased=="True"]))
plottingdata$SD[13] <- 
  sd(na.exclude(averages$graphsReproduced[averages$codeBased=="True"]))
plottingdata$Category[13] <- "Figures reproduced"
plottingdata$Question[13] <- "Q8: Code based?"
plottingdata$Response[13] <- "Yes"

No, graphs

plottingdata$Score[14] <- 
  mean(na.exclude(averages$graphsReproduced[averages$codeBased=="False"]))
plottingdata$SD[14] <- 
  sd(na.exclude(averages$graphsReproduced[averages$codeBased=="False"]))
plottingdata$Category[14] <- "Figures reproduced"
plottingdata$Question[14] <- "Q8: Code based?"
plottingdata$Response[14] <- "No"

Yes, numbers

plottingdata$Score[15] <- 
  mean(na.exclude(averages$numbersReproduced[averages$codeBased=="True"]))
plottingdata$SD[15] <- 
  sd(na.exclude(averages$numbersReproduced[averages$codeBased=="True"]))
plottingdata$Category[15] <- "Numbers reproduced"
plottingdata$Question[15] <- "Q8: Code based?"
plottingdata$Response[15] <- "Yes"

No, numbers

plottingdata$Score[16] <- 
  mean(na.exclude(averages$numbersReproduced[averages$codeBased=="False"]))
plottingdata$SD[16] <- 
  sd(na.exclude(averages$numbersReproduced[averages$codeBased=="False"]))
plottingdata$Category[16] <- "Numbers reproduced"
plottingdata$Question[16] <- "Q8: Code based?"
plottingdata$Response[16] <- "No"

Yes, conclusions

plottingdata$Score[17] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$codeBased=="True"]))
plottingdata$SD[17] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$codeBased=="True"]))
plottingdata$Category[17] <- "Conclusions reproduced"
plottingdata$Question[17] <- "Q8: Code based?"
plottingdata$Response[17] <- "Yes"

No, conclusions

plottingdata$Score[18] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$codeBased=="False"]))
plottingdata$SD[18] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$codeBased=="False"]))
plottingdata$Category[18] <- "Conclusions reproduced"
plottingdata$Question[18] <- "Q8: Code based?"
plottingdata$Response[18] <- "No"

Fill in with Code Available Question

Yes, graphs

plottingdata$Score[19] <- 
  mean(na.exclude(averages$graphsReproduced[averages$codeAvailable=="True"]))
plottingdata$SD[19] <- 
  sd(na.exclude(averages$graphsReproduced[averages$codeAvailable=="True"]))
plottingdata$Category[19] <- "Figures reproduced"
plottingdata$Question[19] <- "Q7: Code available?"
plottingdata$Response[19] <- "Yes"

No, graphs

plottingdata$Score[20] <- 
  mean(na.exclude(averages$graphsReproduced[averages$codeAvailable=="False"]))
plottingdata$SD[20] <- 
  sd(na.exclude(averages$graphsReproduced[averages$codeAvailable=="False"]))
plottingdata$Category[20] <- "Figures reproduced"
plottingdata$Question[20] <- "Q7: Code available?"
plottingdata$Response[20] <- "No"

Yes, numbers

plottingdata$Score[21] <- 
  mean(na.exclude(averages$numbersReproduced[averages$codeAvailable=="True"]))
plottingdata$SD[21] <- 
  sd(na.exclude(averages$numbersReproduced[averages$codeAvailable=="True"]))
plottingdata$Category[21] <- "Numbers reproduced"
plottingdata$Question[21] <- "Q7: Code available?"
plottingdata$Response[21] <- "Yes"

No, numbers

plottingdata$Score[22] <- 
  mean(na.exclude(averages$numbersReproduced[averages$codeAvailable=="False"]))
plottingdata$SD[22] <- 
  sd(na.exclude(averages$numbersReproduced[averages$codeAvailable=="False"]))
plottingdata$Category[22] <- "Numbers reproduced"
plottingdata$Question[22] <- "Q7: Code available?"
plottingdata$Response[22] <- "No"

Yes, conclusions

plottingdata$Score[23] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$codeAvailable=="True"]))
plottingdata$SD[23] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$codeAvailable=="True"]))
plottingdata$Category[23] <- "Conclusions reproduced"
plottingdata$Question[23] <- "Q7: Code available?"
plottingdata$Response[23] <- "Yes"

No, conclusions

plottingdata$Score[24] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$codeAvailable=="False"]))
plottingdata$SD[24] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$codeAvailable=="False"]))
plottingdata$Category[24] <- "Conclusions reproduced"
plottingdata$Question[24] <- "Q7: Code available?"
plottingdata$Response[24] <- "No"

Fill in with Open Source Question

Yes, graphs

plottingdata$Score[25] <- 
  mean(na.exclude(averages$graphsReproduced[averages$openSource=="True"]))
plottingdata$SD[25] <- 
  sd(na.exclude(averages$graphsReproduced[averages$openSource=="True"]))
plottingdata$Category[25] <- "Figures reproduced"
plottingdata$Question[25] <- "Q9: Open source?"
plottingdata$Response[25] <- "Yes"

No, graphs

plottingdata$Score[26] <- 
  mean(na.exclude(averages$graphsReproduced[averages$openSource=="False"]))
plottingdata$SD[26] <- 
  sd(na.exclude(averages$graphsReproduced[averages$openSource=="False"]))
plottingdata$Category[26] <- "Figures reproduced"
plottingdata$Question[26] <- "Q9: Open source?"
plottingdata$Response[26] <- "No"

Yes, numbers

plottingdata$Score[27] <- 
  mean(na.exclude(averages$numbersReproduced[averages$openSource=="True"]))
plottingdata$SD[27] <- 
  sd(na.exclude(averages$numbersReproduced[averages$openSource=="True"]))
plottingdata$Category[27] <- "Numbers reproduced"
plottingdata$Question[27] <- "Q9: Open source?"
plottingdata$Response[27] <- "Yes"

No, numbers

plottingdata$Score[28] <- 
  mean(na.exclude(averages$numbersReproduced[averages$openSource=="False"]))
plottingdata$SD[28] <- 
  sd(na.exclude(averages$numbersReproduced[averages$openSource=="False"]))
plottingdata$Category[28] <- "Numbers reproduced"
plottingdata$Question[28] <- "Q9: Open source?"
plottingdata$Response[28] <- "No"

Yes, conclusions

plottingdata$Score[29] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$openSource=="True"]))
plottingdata$SD[29] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$openSource=="True"]))
plottingdata$Category[29] <- "Conclusions reproduced"
plottingdata$Question[29] <- "Q9: Open source?"
plottingdata$Response[29] <- "Yes"

No, conclusions

plottingdata$Score[30] <- 
  mean(na.exclude(averages$conclusionsReproduced[averages$openSource=="False"]))
plottingdata$SD[30] <- 
  sd(na.exclude(averages$conclusionsReproduced[averages$openSource=="False"]))
plottingdata$Category[30] <- "Conclusions reproduced"
plottingdata$Question[30] <- "Q9: Open source?"
plottingdata$Response[30] <- "No"

Calculate upper and lower limits for plotting

plottingdata$upper <- plottingdata$Score + plottingdata$SD
plottingdata$lower <- plottingdata$Score - plottingdata$SD

Compile raw data in long format

tempdata1 <- averages[,c("studyID", "graphsReproduced", 
                         "codeRunsAsIs", "dataAvailable", "preProcessed",
                         "codeBased", "codeAvailable","openSource" )]
tempdata2 <- averages[,c("studyID", "numbersReproduced", 
                         "codeRunsAsIs", "dataAvailable", "preProcessed",
                         "codeBased", "codeAvailable","openSource" )]
tempdata3 <- averages[,c("studyID", "conclusionsReproduced", 
                         "codeRunsAsIs", "dataAvailable", "preProcessed",
                         "codeBased", "codeAvailable","openSource" )]
colnames(tempdata1) <- 
  colnames(tempdata2) <- 
  colnames(tempdata3) <- c("studyID", "Score", 
                           "codeRunsAsIs", "dataAvailable", "preProcessed",
                           "codeBased", "codeAvailable","openSource" )
tempdata1$Category <- "Figures reproduced"
tempdata2$Category <- "Numbers reproduced"
tempdata3$Category <- "Conclusions reproduced"
plottingdata.long <- rbind(tempdata1, tempdata2, tempdata3)

tempdata.a <- plottingdata.long[,c("studyID", "Score", "Category", "codeRunsAsIs")]
tempdata.b <- plottingdata.long[,c("studyID", "Score", "Category", "dataAvailable")]
tempdata.c <- plottingdata.long[,c("studyID", "Score", "Category", "preProcessed")]
tempdata.d <- plottingdata.long[,c("studyID", "Score", "Category", "codeBased")]
tempdata.e <- plottingdata.long[,c("studyID", "Score", "Category", "codeAvailable")]
tempdata.f <- plottingdata.long[,c("studyID", "Score", "Category", "openSource")]

tempdata.b$question <- "Q5: Data available?"
tempdata.c$question <- "Q6: Raw data?"
tempdata.d$question <- "Q8: Code based?"
tempdata.e$question <- "Q7: Code available?"
tempdata.f$question <- "Q9: Open source?"

tempdata.b$response <- ifelse(tempdata.b$dataAvailable=="True", yes = "Yes", no = "No")
tempdata.c$response <- ifelse(tempdata.c$preProcessed=="Raw format", yes = "Yes", no = "No")
tempdata.d$response <- ifelse(tempdata.d$codeBased=="True", yes = "Yes", no = "No")
tempdata.e$response <- ifelse(tempdata.e$codeAvailable=="True", yes = "Yes", no = "No")
tempdata.f$response <- ifelse(tempdata.f$openSource=="True", yes = "Yes", no = "No")

colnames(tempdata.f) <-
  colnames(tempdata.e) <- 
  colnames(tempdata.d) <-
  colnames(tempdata.c) <- 
  colnames(tempdata.b) <- c("studyID", "Score", "Category", "originalResponse", 
                            "Question", "Response")

plottingdata.longComplete <- rbind(tempdata.b, tempdata.c, tempdata.d,
                                   tempdata.e, tempdata.f)

# Remove NAs from response column
plottingdata.longComplete <- plottingdata.longComplete[!is.na(plottingdata.longComplete$Response),]

Cut error bars off at 5

plottingdata$cutoff <- ifelse(test = plottingdata$upper >= 5, yes = 5, no = NA)
plottingdata$upper <- ifelse(test = plottingdata$upper > 5, yes = 5, no = plottingdata$upper)

Create figure

ggplot(data = plottingdata.longComplete, 
       aes(x = Question, y = Score, shape = Response, colour=Response))+
  geom_pointrange(data = plottingdata,
                  aes(ymin = lower, ymax = upper, shape = Response), 
                 position = position_dodge(width = 0.6))+
  geom_jitter(position = position_jitterdodge(jitter.width = 0.15,
                                              jitter.height = 0.15,
                                              dodge.width = 0.7), color="black")+
  facet_wrap(~Category, nrow=3)+
  #ylim(1,5.9)+
  theme_classic()+
  theme(legend.position = "top")+
  ylab("Reproducibility score")

## Warning: Removed 4 rows containing missing values (geom_point).

Footer

devtools::session_info()

## - Session info ----------------------------------------------------------
##  setting  value                       
##  version  R version 3.4.3 (2017-11-30)
##  os       Windows >= 8 x64            
##  system   x86_64, mingw32             
##  ui       RStudio                     
##  language (EN)                        
##  collate  English_United States.1252  
##  ctype    English_United States.1252  
##  tz       America/Chicago             
##  date     2019-06-03                  
## 
## - Packages --------------------------------------------------------------
##  package     * version date       lib source        
##  assertthat    0.2.0   2017-04-11 [1] CRAN (R 3.4.4)
##  backports     1.1.2   2017-12-13 [1] CRAN (R 3.4.4)
##  bindr         0.1.1   2018-03-13 [1] CRAN (R 3.4.4)
##  bindrcpp      0.2.2   2018-03-29 [1] CRAN (R 3.4.4)
##  callr         3.1.0   2018-12-10 [1] CRAN (R 3.4.4)
##  cli           1.0.1   2018-09-25 [1] CRAN (R 3.4.4)
##  colorspace    1.3-2   2016-12-14 [1] CRAN (R 3.4.4)
##  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.4.4)
##  desc          1.2.0   2018-05-01 [1] CRAN (R 3.4.4)
##  devtools    * 2.0.1   2018-10-26 [1] CRAN (R 3.4.4)
##  digest        0.6.18  2018-10-10 [1] CRAN (R 3.4.4)
##  dplyr         0.7.8   2018-11-10 [1] CRAN (R 3.4.4)
##  evaluate      0.12    2018-10-09 [1] CRAN (R 3.4.4)
##  ezknitr     * 0.6     2016-09-16 [1] CRAN (R 3.4.4)
##  fs            1.2.6   2018-08-23 [1] CRAN (R 3.4.4)
##  ggplot2     * 3.1.0   2018-10-25 [1] CRAN (R 3.4.4)
##  glue          1.3.0   2018-07-17 [1] CRAN (R 3.4.4)
##  gtable        0.2.0   2016-02-26 [1] CRAN (R 3.4.4)
##  highr         0.7     2018-06-09 [1] CRAN (R 3.4.4)
##  knitr       * 1.21    2018-12-10 [1] CRAN (R 3.4.4)
##  labeling      0.3     2014-08-23 [1] CRAN (R 3.4.1)
##  lazyeval      0.2.1   2017-10-29 [1] CRAN (R 3.4.4)
##  magrittr      1.5     2014-11-22 [1] CRAN (R 3.4.3)
##  markdown      0.9     2018-12-07 [1] CRAN (R 3.4.4)
##  memoise       1.1.0   2017-04-21 [1] CRAN (R 3.4.4)
##  mime          0.6     2018-10-05 [1] CRAN (R 3.4.4)
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 3.4.4)
##  pillar        1.3.0   2018-07-14 [1] CRAN (R 3.4.4)
##  pkgbuild      1.0.2   2018-10-16 [1] CRAN (R 3.4.4)
##  pkgconfig     2.0.2   2018-08-16 [1] CRAN (R 3.4.4)
##  pkgload       1.0.2   2018-10-29 [1] CRAN (R 3.4.4)
##  plyr          1.8.4   2016-06-08 [1] CRAN (R 3.4.4)
##  prettyunits   1.0.2   2015-07-13 [1] CRAN (R 3.4.4)
##  processx      3.2.1   2018-12-05 [1] CRAN (R 3.4.4)
##  ps            1.2.1   2018-11-06 [1] CRAN (R 3.4.4)
##  purrr         0.2.5   2018-05-29 [1] CRAN (R 3.4.4)
##  R.methodsS3   1.7.1   2016-02-16 [1] CRAN (R 3.4.1)
##  R.oo          1.22.0  2018-04-22 [1] CRAN (R 3.4.4)
##  R.utils       2.7.0   2018-08-27 [1] CRAN (R 3.4.4)
##  R6            2.3.0   2018-10-04 [1] CRAN (R 3.4.4)
##  Rcpp          1.0.0   2018-11-07 [1] CRAN (R 3.4.4)
##  remotes       2.0.2   2018-10-30 [1] CRAN (R 3.4.4)
##  rlang         0.3.0.1 2018-10-25 [1] CRAN (R 3.4.4)
##  rprojroot     1.3-2   2018-01-03 [1] CRAN (R 3.4.4)
##  scales        1.0.0   2018-08-09 [1] CRAN (R 3.4.4)
##  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.4.4)
##  stringi       1.2.4   2018-07-20 [1] CRAN (R 3.4.4)
##  stringr       1.3.1   2018-05-10 [1] CRAN (R 3.4.4)
##  testthat      2.0.1   2018-10-13 [1] CRAN (R 3.4.4)
##  tibble        1.4.2   2018-01-22 [1] CRAN (R 3.4.4)
##  tidyselect    0.2.5   2018-10-11 [1] CRAN (R 3.4.4)
##  usethis     * 1.4.0   2018-08-14 [1] CRAN (R 3.4.4)
##  withr         2.1.2   2018-03-15 [1] CRAN (R 3.4.4)
##  xfun          0.4     2018-10-23 [1] CRAN (R 3.4.4)
##  yaml          2.2.0   2018-07-25 [1] CRAN (R 3.4.4)
## 
## [1] C:/Users/aarchmil/Documents/R/win-library/3.4
## [2] C:/Program Files/R/R-3.4.3/library

spun with: ezknitr::ezspin(file = “programs/04_reproducibility_criteria_figure.R”, keep_md = FALSE, out_dir = “html_reports”, fig_dir = “figures”)