Let’s look at the relationship between statistical power and false discoveries, as outlined in Ioannidis’ 2005 paper.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
alpha=0.05 # false positive rate
beta = seq(1.,0.05,-0.05) # false negative rate
powerVals = 1-beta
priorVals=c(.01,0.1,0.5,0.9)
nstudies=100
df=data.frame(power=rep(powerVals,length(priorVals))) %>%
mutate(priorVal=kronecker(priorVals,rep(1,length(powerVals))),
alpha=alpha)
# Positive Predictive Value (PPV) - the likelihood that a positive finding is true
PPV = function(df) {
df$PPV = (df$power*df$priorVal)/(df$power*df$priorVal + df$alpha*(1-df$priorVal))
return(df)
}
df=PPV(df)
ggplot(df,aes(power,PPV,color=as.factor(priorVal))) +
geom_line(size=1) +
ylim(0,1) +
xlim(0,1) +
ylab('Posterior predictive value (PPV)')
set.seed(123456)
trueEffectSize=0.2
dfCurse=data.frame(sampSize=seq(20,300,20)) %>%
mutate(effectSize=trueEffectSize,
alpha=0.05)
simCurse = function(df,nruns=1000){
sigResults=0
sigEffects=c()
for (i in 1:nruns){
tmpData=rnorm(df$sampSize,mean=df$effectSize,sd=1)
ttestResult=t.test(tmpData)
if (ttestResult$p.value<df$alpha){
sigResults = sigResults + 1
sigEffects=c(sigEffects,ttestResult$estimate)
}
}
df$power=sigResults/nruns
df$effectSizeEstimate=mean(sigEffects)
return(df)
}
dfCurse = dfCurse %>% group_by(sampSize) %>% do(simCurse(.))
ggplot(dfCurse,aes(power,effectSizeEstimate)) +
geom_line(size=1) +
ylim(0,max(dfCurse$effectSizeEstimate)*1.2) +
geom_hline(yintercept = trueEffectSize,size=1,linetype='dotted',color='red')
Let’s look more closely at an example:
set.seed(123456)
sampSize=60
effectSize=0.2
nruns=1000
alpha=0.05
df=data.frame(idx=seq(1,nruns)) %>%
mutate(pval=NA,
estimate=NA)
for (i in 1:nruns){
tmpData=rnorm(sampSize,mean=effectSize,sd=1)
ttestResult=t.test(tmpData)
df$pval[i]=ttestResult$p.value
df$estimate[i]=ttestResult$estimate
}
df = df %>%
mutate(significant=pval<alpha) %>%
group_by(significant)
power=mean(df$pval<alpha)
power
## [1] 0.331
meanSigEffect=mean(df$estimate[df$pval<alpha])
meanSigEffect
## [1] 0.3351666
meanTrueEffect=mean(df$estimate)
meanTrueEffect
## [1] 0.2022374
ggplot(df,aes(estimate,fill=significant)) +
geom_histogram(bins=50)