# p-hacker functions and data generation code # merged into a single file April 11 2016 # Modified Dec 9 2015: this is the version being used for dissertation figure # The following packages/libraries are required: library(dplyr) library(metafor) library(meta) library(pwr) library(readxl) # If you encounter errors when loading these libraries, check to ensure that # you've installed them first. # In order to generate the p-hacked datasets and figure in Ch. 5 of the # dissertation text, first run: # generate_phacked_datasets() # this will take a while to run - when it finishes, try: # bigfig() # Note that the p-hacking functions in this file are dependent on general p-curve # functions in the pcurve-general-functions.R file, so load that first (may need to set working directory to match file location). source('pcurve-general-functions.R', chdir = TRUE) generate_phacked_datasets = function(){ # RNGkind(kind="Mersenne") set.seed(10097) # First group of digits printed in RAND table # number of significant studies to generate for each researcher/ES combination: n_studies = 5000 # sample sizes: note that for experimenters A and B, subjects aren't added to nonsignifcant experiments. # so for A and B, the sample size is drawn from between the first starting_n value and the last max_n value. # if the distance between these values is less than the # of Ss added, histograms of the distribution of sample # sizes are all weird and discontinuous. starting_n = c(10, 20) # max sample size range: max_n = c(30, 40) # print("researcher A") rA_es0.0 <<- p_hacker(n_studies = n_studies, true_ES = 0.0, starting_n = starting_n, max_n = max_n, researcher = "A") rA_es0.2 <<- p_hacker(n_studies = n_studies, true_ES = 0.2, starting_n = starting_n, max_n = max_n, researcher = "A") rA_es0.4 <<- p_hacker(n_studies = n_studies, true_ES = 0.4, starting_n = starting_n, max_n = max_n, researcher = "A") rA_es0.6 <<- p_hacker(n_studies = n_studies, true_ES = 0.6, starting_n = starting_n, max_n = max_n, researcher = "A") rA_es0.8 <<- p_hacker(n_studies = n_studies, true_ES = 0.8, starting_n = starting_n, max_n = max_n, researcher = "A") print("researcher B") rB_es0.0 <<- p_hacker(n_studies = n_studies, true_ES = 0.0, starting_n = starting_n, max_n = max_n, researcher = "B") rB_es0.2 <<- p_hacker(n_studies = n_studies, true_ES = 0.2, starting_n = starting_n, max_n = max_n, researcher = "B") rB_es0.4 <<- p_hacker(n_studies = n_studies, true_ES = 0.4, starting_n = starting_n, max_n = max_n, researcher = "B") rB_es0.6 <<- p_hacker(n_studies = n_studies, true_ES = 0.6, starting_n = starting_n, max_n = max_n, researcher = "B") rB_es0.8 <<- p_hacker(n_studies = n_studies, true_ES = 0.8, starting_n = starting_n, max_n = max_n, researcher = "B") print("researcher C") rC_es0.0 <<- p_hacker(n_studies = n_studies, true_ES = 0.0, starting_n = starting_n, max_n = max_n, researcher = "C") rC_es0.2 <<- p_hacker(n_studies = n_studies, true_ES = 0.2, starting_n = starting_n, max_n = max_n, researcher = "C") rC_es0.4 <<- p_hacker(n_studies = n_studies, true_ES = 0.4, starting_n = starting_n, max_n = max_n, researcher = "C") rC_es0.6 <<- p_hacker(n_studies = n_studies, true_ES = 0.6, starting_n = starting_n, max_n = max_n, researcher = "C") rC_es0.8 <<- p_hacker(n_studies = n_studies, true_ES = 0.8, starting_n = starting_n, max_n = max_n, researcher = "C") print("researcher D") rD_es0.0 <<- p_hacker(n_studies = n_studies, true_ES = 0.0, starting_n = starting_n, max_n = max_n, researcher = "D") rD_es0.2 <<- p_hacker(n_studies = n_studies, true_ES = 0.2, starting_n = starting_n, max_n = max_n, researcher = "D") rD_es0.4 <<- p_hacker(n_studies = n_studies, true_ES = 0.4, starting_n = starting_n, max_n = max_n, researcher = "D") rD_es0.6 <<- p_hacker(n_studies = n_studies, true_ES = 0.6, starting_n = starting_n, max_n = max_n, researcher = "D") rD_es0.8 <<- p_hacker(n_studies = n_studies, true_ES = 0.8, starting_n = starting_n, max_n = max_n, researcher = "D") print("researcher E") rE_es0.0 <<- p_hacker(n_studies = n_studies, true_ES = 0.0, starting_n = starting_n, max_n = max_n, researcher = "E") rE_es0.2 <<- p_hacker(n_studies = n_studies, true_ES = 0.2, starting_n = starting_n, max_n = max_n, researcher = "E") rE_es0.4 <<- p_hacker(n_studies = n_studies, true_ES = 0.4, starting_n = starting_n, max_n = max_n, researcher = "E") rE_es0.6 <<- p_hacker(n_studies = n_studies, true_ES = 0.6, starting_n = starting_n, max_n = max_n, researcher = "E") rE_es0.8 <<- p_hacker(n_studies = n_studies, true_ES = 0.8, starting_n = starting_n, max_n = max_n, researcher = "E") print("researcher F") rF_es0.0 <<- p_hacker(n_studies = n_studies, true_ES = 0.0, starting_n = starting_n, max_n = max_n, researcher = "F") rF_es0.2 <<- p_hacker(n_studies = n_studies, true_ES = 0.2, starting_n = starting_n, max_n = max_n, researcher = "F") rF_es0.4 <<- p_hacker(n_studies = n_studies, true_ES = 0.4, starting_n = starting_n, max_n = max_n, researcher = "F") rF_es0.6 <<- p_hacker(n_studies = n_studies, true_ES = 0.6, starting_n = starting_n, max_n = max_n, researcher = "F") rF_es0.8 <<- p_hacker(n_studies = n_studies, true_ES = 0.8, starting_n = starting_n, max_n = max_n, researcher = "F") } ################################################################################ #################### bigfig ################################################################################ # this function generates the large 7x6 pcurve plot using simulated data bigfig = function(){ plot.new() par(mfrow=c(7,6)) par(mar=c(3, 3, 3, 3)) plot.new() mtext(" \n ", side = 1) plot.new() mtext("d = 0.0 \n", side = 1, cex = 1.2) plot.new() mtext("d = 0.2 \n", side = 1, cex = 1.2) plot.new() mtext("d = 0.4 \n", side = 1, cex = 1.2) plot.new() mtext("d = 0.6 \n", side = 1, cex = 1.2) plot.new() mtext("d = 0.8 \n", side = 1, cex = 1.2) plot.new() mtext(" \nresearcher A\n1 DV, no peeking", padj = 1) pcqh(rA_es0.0, ylabel = "Frequency") pcqh(rA_es0.2) pcqh(rA_es0.4) pcqh(rA_es0.6) pcqh(rA_es0.8) plot.new() mtext("\nresearcher B\n1 DV, add n=10 if NS", padj = 1) pcqh(rB_es0.0, ylabel = "Frequency") pcqh(rB_es0.2) pcqh(rB_es0.4) pcqh(rB_es0.6) pcqh(rB_es0.8) plot.new() mtext("\nresearcher C\n1 DV, add n=2 if NS", padj = 1) pcqh(rC_es0.0, ylabel = "Frequency") pcqh(rC_es0.2) pcqh(rC_es0.4) pcqh(rC_es0.6) pcqh(rC_es0.8) plot.new() mtext("researcher D\n2 DVs, no peeking\nexclude >2SD if NS\n exclude bad Ss if NS", padj = 1) pcqh(rD_es0.0, ylabel = "Frequency") pcqh(rD_es0.2) pcqh(rD_es0.4) pcqh(rD_es0.6) pcqh(rD_es0.8) plot.new() mtext("researcher E\n2 DVs, add n=10 if NS\nexclude >2SD if NS\nexclude bad Ss if NS", padj = 1) pcqh(rE_es0.0, ylabel = "Frequency") pcqh(rE_es0.2) pcqh(rE_es0.4) pcqh(rE_es0.6) pcqh(rE_es0.8) plot.new() mtext("researcher F\n2 DVs, add n=2 if NS\nexclude >2SD if NS\nexclude bad Ss if NS", padj = 1) pcqh(rF_es0.0, ylabel = "Frequency") pcqh(rF_es0.2) pcqh(rF_es0.4) pcqh(rF_es0.6) pcqh(rF_es0.8) } ################################################################################ #################### p_hacker # this function is a wrapper for p_hacker_single. it conducts multiple studies # and returns a table, one row per study. p_hacker <- function(n_studies, true_ES, starting_n, max_n, researcher){ start_time = Sys.time() studies = data.frame() for(junk in 1:n_studies){ study = p_hacker_single(true_ES = true_ES, starting_n = starting_n, max_n = max_n, researcher) studies = rbind(studies, study) } rownames(studies) = NULL print(paste0("Time to simulate ", n_studies, " testing ES = ", sprintf("%.2f", true_ES), " in seconds: ", (Sys.time()-start_time))) return(studies) } ################################################################################ #################### p_hacker_single # this is the main function that simulates p-hacking behaviors. it takes in # a true effect size, min and max sample sizes, and a predefined researcher # 'profile' that determines a set of p-hacking methods. Researchers are lettered # A through F. The function returns results from a single study formatted like: # ID_string, t_observed, df_observed, p_value, d_value # Note that this function always returns a set of results; if an initial attempt # fails or reaches a max sample size, a new sample will be generated. p_hacker_single <- function(true_ES, starting_n, max_n, researcher){ # this p-hacks to produce a single significant study # first generate an experiment ID identifying the researcher and true ES, to be attached to each study expID = paste0(researcher, "-", sprintf("%.2f", true_ES)) # note that if using researchers A or D, max_n is not used, sample sizes generated from starting_n alone. # if researcher is A or D, have starting_n represent the full range like so: if(researcher %in% c("A", "D")){ starting_n[2] = max_n[2] } # if using researchers (B, C, E, F), generate dataset and stopping point ahead of time if(researcher %in% c("B", "C", "E", "F")){ data = gen_sample(true_ES = true_ES, starting_n = starting_n) stop_n = round(runif(1, min=max_n[1], max=max_n[2])) } # plain = NULL outliers = NULL dropbad = NULL # # n_attempts records the number of experiments conducted to achieve a significant result (including the 'successful' experiment) n_attempts = 1 # ############################################# ################################ researcher A while(researcher == "A"){ # researcher A does not p-hack, only tests one DV # first generate data data = gen_sample(true_ES = true_ES, starting_n = starting_n) # now test one = one_t_test(data, expID) if(!is.null(one)){ return(cbind(one, n_attempts)) } # record the 'loss' n_attempts = n_attempts + 1 # if results weren't significant, give up and try a new sample (return to top of while loop) } # ############################################# ################################ researcher B while(researcher == "B"){ # just like A, except add n=10 if NS n_to_add = 5 # add 5 Ss per group # plain = one_t_test(data, expID) if(!is.null(plain)){ return(cbind(plain, n_attempts)) } data = add_subjects(data, n_to_add) # if we are over the max sample size, give up and start a new experiment. if(data$ctl_n[1] + data$exp_n[1] > (stop_n * 2)){ data = gen_sample(true_ES = true_ES, starting_n = starting_n) n_attempts = n_attempts + 1 } } ############################################# ################################ researcher C while(researcher == "C"){ # adds n=1 per cell if NS n_to_add = 1 plain = one_t_test(data, expID) if(!is.null(plain)){ return(cbind(plain, n_attempts)) } # if we got to this point, nothing worked. add n=1 per cell. data = add_subjects(data, n_to_add) # if we are over the max sample size, give up and start a new experiment. if(data$ctl_n[1] + data$exp_n[1] > (stop_n * 2)){ data = gen_sample(true_ES = true_ES, starting_n = starting_n) n_attempts = n_attempts + 1 } } # ############################################# ################################ researcher D while(researcher == "D"){ # # first generate data data = gen_sample(true_ES = true_ES, starting_n = starting_n) # now test plain = plain_t_test(data, expID) if(!is.null(plain)){ return(cbind(plain, n_attempts)) } # outliers = outlier_t_test(data, expID, outSD = 2) if(!is.null(outliers)){ return(cbind(outliers, n_attempts)) } # dropSs = drop_bad(data, expID) if(!is.null(dropSs)){ return(cbind(dropSs, n_attempts)) } # record the 'loss' n_attempts = n_attempts + 1 # if results weren't significant, give up and try a new sample (return to top of while loop) } # ############################################# ################################ researcher E while(researcher == "E"){ # n_to_add = 5 # add 5 Ss per group # plain = plain_t_test(data, expID) if(!is.null(plain)){ return(cbind(plain, n_attempts)) } # outliers = outlier_t_test(data, expID, outSD = 2) if(!is.null(outliers)){ return(cbind(outliers, n_attempts)) } # dropSs = drop_bad(data, expID) if(!is.null(dropSs)){ return(cbind(dropSs, n_attempts)) } data = add_subjects(data, n_to_add) # if we are over the max sample size, give up and start a new experiment. if(data$ctl_n[1] + data$exp_n[1] > (stop_n * 2)){ data = gen_sample(true_ES = true_ES, starting_n = starting_n) n_attempts = n_attempts + 1 } } ############################################# ################################ researcher F while(researcher == "F"){ # n_to_add = 1 plain = plain_t_test(data, expID) if(!is.null(plain)){ return(cbind(plain, n_attempts)) } # outliers = outlier_t_test(data, expID, outSD = 2) if(!is.null(outliers)){ return(cbind(outliers, n_attempts)) } # dropSs = drop_bad(data, expID) if(!is.null(dropSs)){ return(cbind(dropSs, n_attempts)) } # if we got to this point, nothing worked. add n=1 per cell. data = add_subjects(data, n_to_add) # if we are over the max sample size, give up and start a new experiment. if(data$ctl_n[1] + data$exp_n[1] > (stop_n * 2)){ data = gen_sample(true_ES = true_ES, starting_n = starting_n) n_attempts = n_attempts + 1 } } # } # close the whole thing, duh ######################################################## ######################################################## ######################################################## ########## Component functions for p_hacker ############ ######################################################## ######################################################## ######################################################## ######################################################## ################################################################################ #################### gen_sample # this function generates a dataset where 2 independent groups each 'perform' # two uncorrelated DVs. A sample of n=100 per cell is generated at the start # of the simualted experiment; the 'last' subject in each experiment is defined # using ctl_n or exp_n. When subjects are added to a study, no new data gets # generated, instead ctl_n and exp_n are incremented. Note that sample sizes # are drawn from a uniform distribution. gen_sample <- function(true_ES, starting_n){ # n_init = round(runif(1, min=starting_n[1], max=starting_n[2])) # ctl_dv1 = rnorm(100) + true_ES exp_dv1 = rnorm(100) # ctl_dv2 = rnorm(100) + true_ES exp_dv2 = rnorm(100) # ctl_n = n_init exp_n = n_init data = data.frame(ctl_dv1, ctl_dv2, exp_dv1, exp_dv2, ctl_n, exp_n) return(data) } ################################################################################ #################### add_subjects # This function simply increments the last subject counter in the supplied data # frame. add_subjects <- function(data, n_to_add){ # data$ctl_n[1] = data$ctl_n[1] + n_to_add data$exp_n[1] = data$exp_n[1] + n_to_add return(data) } ################################################################################ #################### one_t_test # Just conduct one t-test on one DV one_t_test <- function(data, expID){ # just test one dv # # ctl_dv1 = data$ctl_dv1[1:data$ctl_n[1]] # exp_dv1 = data$exp_dv1[1:data$exp_n[1]] dv1 = t.test(ctl_dv1, exp_dv1, var.equal = TRUE) if(dv1$statistic > 0 & dv1$p.value < .05){ # if effects are in the right direction and significant... publish. t_obs = dv1$statistic df_obs = data$ctl_n[1] + data$exp_n[1] - 2 cohen_d = d_from_t(t_obs = t_obs, df_obs = df_obs) p_vals = p_from_t(t_obs = t_obs, df_obs = df_obs) return(data.frame(expID, t_obs, df_obs, p_vals, cohen_d)) } } ################################################################################ #################### plain_t_test # Conduct t-tests on DV1 and DV2. plain_t_test <- function(data, expID){ # here we have a plain two-tailed t test to check for depletion effects. # even though two-tailed, still needs to be in the right direction. this is how it's published. # ctl_dv1 = data$ctl_dv1[1:data$ctl_n[1]] ctl_dv2 = data$ctl_dv2[1:data$ctl_n[1]] exp_dv1 = data$exp_dv1[1:data$exp_n[1]] exp_dv2 = data$exp_dv2[1:data$exp_n[1]] # dv1 = t.test(ctl_dv1, exp_dv1, var.equal = TRUE) dv2 = t.test(ctl_dv2, exp_dv2, var.equal = TRUE) # if(dv1$statistic > 0 & dv1$p.value < .05){ # if effects are in the right direction and significant... publish. t_obs = dv1$statistic df_obs = data$ctl_n[1] + data$exp_n[1] - 2 cohen_d = d_from_t(t_obs = t_obs, df_obs = df_obs) p_vals = p_from_t(t_obs = t_obs, df_obs = df_obs) return(data.frame(expID, t_obs, df_obs, p_vals, cohen_d)) } if(dv2$statistic > 0 & dv2$p.value < .05){ # if effects are in the right direction and significant... publish. t_obs = dv2$statistic df_obs = data$ctl_n[1] + data$exp_n[1] - 2 cohen_d = d_from_t(t_obs = t_obs, df_obs = df_obs) p_vals = p_from_t(t_obs = t_obs, df_obs = df_obs) return(data.frame(expID, t_obs, df_obs, p_vals, cohen_d)) } } ################################################################################ #################### outlier_t_test # Run t-tests on DV1 and DV2 with outliers removed. outlier_t_test <- function(data, expID, outSD){ # just like the plain t-test, except we remove outliers first. # # ctl_dv1 = remove_sd(data$ctl_dv1[1:data$ctl_n[1]], outSD = outSD) ctl_dv2 = remove_sd(data$ctl_dv2[1:data$ctl_n[1]], outSD = outSD) exp_dv1 = remove_sd(data$exp_dv1[1:data$exp_n[1]], outSD = outSD) exp_dv2 = remove_sd(data$exp_dv2[1:data$exp_n[1]], outSD = outSD) # dv1 = t.test(ctl_dv1, exp_dv1, var.equal = TRUE) dv2 = t.test(ctl_dv2, exp_dv2, var.equal = TRUE) if(dv1$statistic > 0 & dv1$p.value < .05){ # if effects are in the right direction and significant... publish. t_obs = dv1$statistic df_obs = length(ctl_dv1) + length(exp_dv1) - 2 cohen_d = d_from_t(t_obs = t_obs, df_obs = df_obs) p_vals = p_from_t(t_obs = t_obs, df_obs = df_obs) return(data.frame(expID, t_obs, df_obs, p_vals, cohen_d)) } if(dv2$statistic > 0 & dv2$p.value < .05){ # if effects are in the right direction and significant... publish. t_obs = dv2$statistic df_obs = length(ctl_dv2) + length(exp_dv2) - 2 cohen_d = d_from_t(t_obs = t_obs, df_obs = df_obs) p_vals = p_from_t(t_obs = t_obs, df_obs = df_obs) return(data.frame(expID, t_obs, df_obs, p_vals, cohen_d)) } } ################################################################################ #################### drop_bad # Run t-tests on DV1 and DV2 with the 'worst' subject from each group removed. drop_bad <- function(data, expID){ # # ctl_dv1 = sort(data$ctl_dv1[1:data$ctl_n[1]], decreasing = FALSE) ctl_dv2 = sort(data$ctl_dv2[1:data$ctl_n[1]], decreasing = FALSE) exp_dv1 = sort(data$exp_dv1[1:data$exp_n[1]], decreasing = TRUE) exp_dv2 = sort(data$exp_dv2[1:data$exp_n[1]], decreasing = TRUE) ctl_dv1 = ctl_dv1[2:data$ctl_n[1]] ctl_dv2 = ctl_dv2[2:data$ctl_n[1]] exp_dv1 = exp_dv1[2:data$exp_n[1]] exp_dv2 = exp_dv2[2:data$exp_n[1]] # now we proceed normally dv1 = t.test(ctl_dv1, exp_dv1, var.equal = TRUE) dv2 = t.test(ctl_dv2, exp_dv2, var.equal = TRUE) if(dv1$statistic > 0 & dv1$p.value < .05){ # if effects are in the right direction and significant... publish. t_obs = dv1$statistic df_obs = length(ctl_dv1) + length(exp_dv1) - 2 cohen_d = d_from_t(t_obs = t_obs, df_obs = df_obs) p_vals = p_from_t(t_obs = t_obs, df_obs = df_obs) return(data.frame(expID, t_obs, df_obs, p_vals, cohen_d)) } if(dv2$statistic > 0 & dv2$p.value < .05){ # if effects are in the right direction and significant... publish. t_obs = dv2$statistic df_obs = length(ctl_dv2) + length(exp_dv2) - 2 cohen_d = d_from_t(t_obs = t_obs, df_obs = df_obs) p_vals = p_from_t(t_obs = t_obs, df_obs = df_obs) return(data.frame(expID, t_obs, df_obs, p_vals, cohen_d)) } } ################################################################################ #################### remove_sd # Remove values beyond a given SD threshold remove_sd <- function(x, outSD){ # mean = mean(x) # mean is the mean sd = sd(x) # sd is the sd bound = sd * outSD # this is how far from the mean we will return values for, at maximum upperbound = mean + bound lowerbound = mean - bound outputvalues = x[lowerbound < x] outputvalues = outputvalues[upperbound > outputvalues] return(outputvalues) }