# pcurve general functions # the following libraries are required to run these functions: library(dplyr) library(metafor) library(meta) library(pwr) library(readxl) # to load in meta-analytic data from the included Excel file and run p-curve # on it, try: # rxdpl_data = import_xls() # choose the excel file where the meta-analytic dataset is, and then: # pcurve_plot(rxdpl_data) # this should produce a two-panel figure with a p-curve on the left and a # bootstrapped sampling distribution on the right. ################################################################################ #################### pp_curve # this returns the pp-values from p-curve, given a candidate effect size pp_curve=function(tdata, d_est = 0) { t_obs = tdata$t_obs df_obs = tdata$df_obs ncp_est=sqrt((df_obs+2)/4)*d_est tc=qt(.975,df_obs) power_est=1-pt(tc,df_obs,ncp_est) p_larger=1-pt(t_obs,df=df_obs,ncp=ncp_est) ppr= p_larger/power_est # no KS test here, just want pp-values return(ppr) } ################################################################################ #################### loss # Original loss function from Simonsohn et al. loss=function(t_obs, df_obs, d_est) { # this is the original loss function from Simonsohn et al ncp_est=sqrt((df_obs+2)/4)*d_est tc=qt(.975,df_obs) power_est=1-pt(tc,df_obs,ncp_est) p_larger=pt(t_obs,df=df_obs,ncp=ncp_est) ppr=(p_larger-(1-power_est))/power_est KSD=ks.test(ppr,punif)$statistic return(KSD) } ################################################################################ #################### f_loss # this operates the same way as Simonsohn's loss function, # but is set up to ingest data.frame() formatted with expID etc. and return the ES estimate. f_loss=function(d_est, matrix_in) { t_obs = matrix_in$t_obs df_obs = matrix_in$df_obs # first obtain noncentrality parameter value ncp_est=sqrt((df_obs+2)/4)*d_est # what's the critical t value for p =.05? tc=qt(.975,df_obs) # find how well this is powered, given d, t, df. power_est=1-pt(tc,df_obs,ncp_est) #Compute power for obtaining that t-value or bigger, given the noncentrality paramete # what's the probability of obtaining a p-value smaller # than observed, given d, df? p_larger=pt(t_obs,df=df_obs,ncp=ncp_est) #Probability of obtaining a t-value bigger than the one that is observed # ppr=(p_larger-(1-power_est))/power_est # # Sys.sleep(0.1) KSD=ks.test(ppr,punif) # return(KSD$statistic) } ################################################################################ #################### plotloss # plotloss code from Simonsohn et al. plotloss=function(matrix_in,dmin=-2,dmax=2) { ################################################################################################# #SYNTAX: #t_obs : vector with observed t-values #df_obs : vector with degrees of freedom associated with each t-value #dmin : smallest effect size to consider #dnax : largest effect size to consider #e.g., dmin=-1, dmax=1 would look for the best fitting effect size in the d>=-1 and d<=1 range ################################################################################################# t_obs = matrix_in$t_obs df_obs = matrix_in$df_obs #Results will be stored in these vectors, create them first loss.all=c() di=c() #Compute loss for effect sizes between d=c(dmin,dmax) in steps of .01 for (i in 0:((dmax-dmin)*50)) { d=dmin+i/50 #effect size being considered di=c(di,d) #add it to the vector (kind of silly, but kept for symmetry) options(warn=-1) #turn off warning becuase R does not like its own pt() function! loss.all=c(loss.all,loss(df_obs=df_obs,t_obs=t_obs,d_est=d)) #apply loss function so that effect size, store result options(warn=0) #turn warnings back on } #find the effect leading to smallest loss in that set, that becomes the starting point in the optimize command imin=match(min(loss.all),loss.all) #which i tested effect size lead to the overall minimum? dstart=dmin+imin/50 #convert that i into a d. #optimize around the global minimum dhat=optimize(loss,c(dstart-.1,dstart+.1), df_obs=df_obs,t_obs=t_obs) options(warn=-0) #Plot results par(mfrow=c(1,2)) plot(di,loss.all,xlab="Effect size\nCohen-d", ylab="Loss (D stat in KS test)",ylim=c(0,1), main="How well does each effect size fit? (lower is better)") points(dhat$minimum,dhat$objective,pch=19,col="red",cex=2) text(dhat$minimum,dhat$objective+.6,paste0("p-curve's estimate of effect size:\nd=",round(dhat$minimum,3)),col="red") hist(matrix_in$p_vals, breaks=5, xlab="observed p-values", main="p-curve", ylab = "number of p-values", col=c("gray10", "gray25", "gray40", "gray55", "gray70")) return(dhat$minimum) } ################################################################################ #################### new_loss # modified loss function from Simonsohn et al. new_loss=function(d_est, matrix_in) { t_obs = matrix_in$t_obs df_obs = matrix_in$df_obs p_vals = (1 - pt(t_obs, df_obs)) if(sum(p_vals > .050) > 0){ stop("dataset includes p value above .05, stopping") } # first obtain noncentrality parameter value ncp_est=sqrt((df_obs+2)/4)*d_est # what's the critical t value for p =.05? tc=qt(.975,df_obs) # find how well this is powered, given d, t, df. power_est=1-pt(tc,df_obs,ncp_est) # what's the probability of obtaining a p-value smaller # than observed, given d, df? p_larger=pt(t_obs,df=df_obs,ncp=ncp_est) # ppr=(p_larger-(1-power_est))/power_est #Sys.sleep(0.1) KSD=ks.test(ppr,punif) # # #hist(ppr, breaks=50, xlab = c(KSD$p.value, KSD$statistic)) return(KSD$statistic) } ################################################################################ #################### gen_phack # Old code used to generate p-hacked datasets gen_phack <- function(n_samples, n_studies = 50, true_ES, researcher){ d_estimates = c() all_data = data.frame() print("Generating samples...") pb = txtProgressBar(min=1, max=n_samples) for(i in 1:n_samples){ dataset = p_hacker(n_studies = n_studies, true_ES = true_ES, researcher = researcher) # p-hack sets of 50 studies all_data = rbind(dataset, all_data) pc_est = optimize(f_loss, matrix_in=dataset, c(-2,2))$minimum d_estimates = append(d_estimates, pc_est) setTxtProgressBar(pb, i) } close(pb) # make starting_n into starting_n = c(10, 15), and also n_maximum = c(30,45) # #pc_est = paste0("pcurve d = ", round(optimize(f_loss, matrix_in=studies, c(-2,2))$minimum, 5)) # par(mfrow=c(1,2)) hist(d_estimates) hist(all_data$p_vals, breaks=5, xlab="observed p-values", main="pvals from all studies", ylab = "number of p-values", col=c("gray10", "gray25", "gray40", "gray55", "gray70")) # return(d_estimates) } ################################################################################ #################### pcqh # p-curve quick histogram generator pcqh = function(tdata, ylabel = ""){ # pcq = quick p curve with histogram # just returns ES estimate, rounded for printing # this has evolved to include RE and pet-peese pc_est = optimize(f_loss, matrix_in=tdata, c(-2,2))$minimum pc_est = sprintf("%.2f", pc_est) # trad_est = trad_ma(tdata)$TE.random trad_est = sprintf("%.2f", trad_est) # #petpeese_est = petpeese_ma(tdata) #petpeese_est = sprintf("%.2f", petpeese_est) # varname = deparse(substitute(tdata)) #varname = strsplit(varname, "s")[[1]][2] maintext = paste0(" RE MA= ", trad_est, "\npcurve= ", pc_est) hist(tdata$p_vals, breaks=5, main=maintext, col=c("gray10", "gray25", "gray40", "gray55", "gray70"), xlab = "", axes=FALSE) #hist(tdata$p_vals, xlim = c( .00, .02), breaks = 100, xlab = "", ylab = "", axes=FALSE) axis(side = 1, tick=TRUE, cex.axis = 0.8) mtext(text = ylabel, side = 2, cex = 0.9) print(paste0("for ", varname, " number of studies to get 1 out: ", round(mean(tdata$n_attempts), 2), " FP rate: ", round(1/mean(tdata$n_attempts), 3) )) } ################################################################################ #################### pcurve # p-curve wrapper function pcurve <- function(tdata, n_samples_boot){ # # #pc_est = plotloss(matrix_in=tdata, dmin=-2, dmax=2) pc_est = optimize(f_loss, matrix_in=tdata, c(-2,4))$minimum # ok, so now we have the pcurve estimate. we need 95% CI around it estimates = c() pb = txtProgressBar(min = 0, max = n_samples_boot) for (n in 1:n_samples_boot){ samp = sample_frac(tdata, replace=TRUE) samp_est = optimize(f_loss, matrix_in=samp, c(-2,4))$minimum estimates = append(samp_est, estimates) setTxtProgressBar(pb, n) } close(pb) # return(estimates) } ################################################################################ #################### pcurve_plot # p-curve visualization pcurve_plot <- function(tdata, histbounds=c(-2,2)){ estimates = pcurve(tdata, n_samples_boot = 1000) pcurve_d = optimize(f_loss, matrix_in=tdata, c(-2,4))$minimum plot.new() par(mfrow=c(1,2)) par(mar=c(5,5,5,1)) # first the p-curve hist(tdata$p_vals, breaks=5, xlab="observed p-values", main="p-curve", ylab = "number of p-values", col=c("gray10", "gray25", "gray40", "gray55", "gray70")) # # par(mar=c(5, 1, 5, 5)) estimates = sort(estimates) upper95th = round(length(estimates) * 0.975, 0) lower95th = round(length(estimates) * 0.025, 0) upper = round(estimates[upper95th], 3) lower = round(estimates[lower95th], 3) mainlabel = paste0("p-curve ES estimate: d = ", round(pcurve_d, 3), "\n95% CI [ ", lower, ", ", upper, " ]") hist(estimates, xlim=histbounds, axes=FALSE, xlab=mainlabel, ylab="", main="Bootstrapped distribution of\np-curve ES estimates", col="gray85") axis(side=4, pos=histbounds[2]+0.05, cex.axis=.9) mtext("bootstrapped estimates", side=4, line=2, cex=1.0) axis(side=1) abline(v=pcurve_d, col="red", lwd=4) abline(v=lower, col="blue", lty=3, lwd=3) abline(v=upper, col="blue", lty=3, lwd=3) print(c(estimates[lower95th], pcurve_d, estimates[upper95th])) } ################################################################################ #################### trad_ma # conduct traditional meta-analysis trad_ma <- function(tdata){ # trad_ma is traditional meta-analysis based on Carter & McCullough (2014) R code supplement # note that R packages pwr, meta, and metafor must be loaded for this to work right d = tdata$cohen_d n1 = round((tdata$df_obs + 2) / 2) # half the sample size if even, the lower value if odd n2 = n1 + ((tdata$df_obs + 2) %% 2) # half the sample size if even, the higher value if odd variance = ((n1+n2)/(n1*n2) + d^2/(2*(n1+n2))) stderr = sqrt(variance) trad_meta = metagen(d, variance) # check this next part, not sure it's right return(trad_meta) } ################################################################################ #################### import_xls # imports XLS file with MA data, removes p > .05 studies, returns 'clean' dataset # note that this requires the readxl package. import_xls <- function(){ mat = read_excel(file.choose(), sheet = 2) # mat$p_vals = p_from_t(mat$t_obs, mat$df_obs) # mat$cohen_d = d_from_t(mat$t_obs, mat$df_obs) # mat = filter(mat, p_vals < .05) # return(mat) } ################################################################################ #################### import_csv # imports csv file with MA data, removes p > .05 studies, returns 'clean' dataset # same functionality as import_xls here, except a csv file must first be generated. import_csv <- function(){ mat = read.csv(file.choose(), header=TRUE) # # this function takes in a matrix with studyID, df_obs, t_obs, p_val, cohen_d # and calculates p_val and cohen_d from df_obs and t_obs mat$p_vals = p_from_t(mat$t_obs, mat$df_obs) # mat$cohen_d = d_from_t(mat$t_obs, mat$df_obs) # mat = filter(mat, p_vals < .05) # return(mat) } d_from_t <- function(t_obs, df_obs){ # calculate Cohen's d from t and df return((t_obs*2)/sqrt(df_obs)) } t_from_d <- function(d, df){ # calculate t statistic from Cohen's d t = (d * sqrt(df)) / 2 return(t) } p_from_t <- function(t_obs, df_obs){ # calculate two-tailed p-value from t, df p = (1 - pt(t_obs, df_obs)) * 2 return(p) } ################################################################################ #################### pc_boot # p-curve boostrapping routine pc_boot <- function(t_obs, df_obs, iter){ len = length(t_obs) estimates = c() while(length(estimates) < iter){ index = round(runif(len, min=1, max=len)) samp_t_obs = t_obs[index] samp_df_obs = df_obs[index] ksd = optimize(loss, c(-2, 2), df_obs=samp_df_obs, t_obs=samp_t_obs) d_est = ksd$minimum estimates = append(estimates, d_est) #print(d_est) } return(estimates) }