######################################################################################## # # # This code is used to to determine the appropriate sample size to achieve a specified # # predefined level of power for a test when the differece between level of disease in # # two population is defined. # # # # Functions for determining the various parts of the problem are defined first and # # then the main code is given below # # # ######################################################################################## true.power=function(p1,p2, n1, n2,num.sims=500000) { ######################################################################################## # # # This function implements the Monte Carlo approach to calculating the true power # # # ######################################################################################## #draw a large sample from each population x1=rbinom(num.sims,round(n1),p1) x2=rbinom(num.sims,round(n2),p2) f1=x1/n1 f2=x2/n2 f=(x1+x2)/(n1+n2) s.sq1=f1*(1.0-f1)/n1 s.sq2=f2*(1.0-f2)/n2 s.sq.d=f*(1.0-f)*(1.0/n1 + 1.0/n2) s.d=sqrt(s.sq.d) t.stat=(f1-f2)/s.d t.stat=ifelse(x1+x2 == 0, 0,t.stat) #deal with undefined values reject=ifelse(t.stat > qnorm(0.975,0,1),1,0) return(sum(reject)/num.sims) #return power of the test } ### End true.power ### ######################################################################################## # # # Main program: Specify the anticipated prevalence and the desired significance and # # # and power # # # ######################################################################################## #Specify prevalence under the alternative hypothesis (p1 must be higher prevalence population) p1=0.001 p2=0.0001 #Set parameters that describe the relationship of sample size, significance level and power fraction=1/5 alpha=0.05 power=0.80 #Specify allowable tolerance between estimated and true power (don't get greedy because tolerance can't always be achieved) tol=0.00015 #Calculate the sample size without the continuity correction derived by Fleiss et al. and #implemented in Hmisc() library. This is used as a starting point z.alpha =qnorm(1 - alpha/2) z.beta = qnorm(power) ratio = (1 - fraction)/fraction p = fraction * p1 + (1 - fraction) * p2 n1 = round((z.alpha * sqrt((ratio + 1) * p * (1 - p)) + z.beta * sqrt(ratio * p1 * (1 - p1) + p2 * (1 - p2)))^2/ratio/((p1 - p2)^2)) n2 = round(ratio * n1) #add the continuity correction n1.cc=round(n1/4.0 * (1+ sqrt( 1 + 2*(ratio+1)/(ratio*n1*(p1-p2)) ) )^2) n2.cc = round(ratio * n1.cc) print(cbind("Using Fleiss et al. results the sample sizes are ", n1, n2)) #Use a binary search to find the agreement between calculated and Monte-carlo based power. #Start by defining search interval end-points left1=round(n1/4) right1=n1 mid1=(right1+left1)/2 left2=round(n2/4) right2=n2 mid2=(right2+left2)/2 #Binary search code while (abs(true.power(p1,p2, mid1 ,mid2)-power) > tol & (right1- left1) > 1) { mid1=round((left1+right1)/2) mid2=round(ratio*mid1) temp.pow=true.power(p1,p2, mid1 ,mid2) print(cbind(mid1, mid2, temp.pow, left1,right1)) if (abs(temp.pow-power) < tol) { print(cbind(mid1, mid2, temp.pow, left1,right1)) break } if (temp.pow > power) {right1 = round(mid1)} if (temp.pow < power) {left1 = round(mid1)} } print(cbind("The Monte-Carlo based sample sizes are ", mid1, mid2)) print(cbind("The power of the test is", 100*round(true.power(p1,p2, mid1 ,mid2),4),100*round(true.power(p1,p2, n1 ,n2),4), 100*round(true.power(p1,p2, n1.cc ,n2.cc),4))) print(cbind("The Monte-Carlo, uncorrected and continuity corrected sample sizes are ", mid1, mid2, n1, n2, n1.cc, n2.cc)) print(cbind("The Monte-Carlo, uncorrected and continuity corrected total sample sizes are ", mid1+mid2, n1+n2, n1.cc+n2.cc)) print(cbind("Total savings in sample size ", (n1.cc+n2.cc)-(mid1+mid2), (n1+n2)-(mid1+mid2))) print(cbind("Percent savings in sample size ", 100*((n1.cc+n2.cc)-(mid1+mid2))/(n1.cc+n2.cc), 100*((n1+n2)-(mid1+mid2))/(n1+n2))) ######################################################################################## # # # Graph : Do a graph that describes the power as a function of sample size # # This is unnecessary, but makes a nice picture # # # ######################################################################################## new.n1=seq(round(n1/3), n1.cc, 5) new.n2= round(ratio*new.n1) pow.tmp=rep(0,length(new.n1)) for (i in 1:length(new.n1)) { pow.tmp[i]=true.power(p1,p2, new.n1[i] ,new.n2[i]) print(cbind(new.n1[i],new.n2[i],pow.tmp[i])) } plot(new.n1, pow.tmp, main='Power as a function of sample size. Horizontal line indicates desired power',xlab='sample size in population 1',ylab='Power') plot(new.n1, pow.tmp, main=' ',xlab='sample size draw from population 1',ylab='Power') abline(h=0.8) plot(new.n1, pow.tmp, main=' ',xlab='sample size drawn from the exposed population',ylab='Power', lwd=2) abline(h=0.8) abline(v=c(mid1,n1,n1.cc), lty=c(1,2,3), lwd=c(2,2,2)) leg.names=c("Monte-Carlo", "Uncorrected", "Continuity Corrected") legend(450,0.87,leg.names, lty=c(1,2,3), lwd=c(2,2,2))