Epidemiology Research International

Research Article

Estimating Prevalence Using an Imperfect Test

Algorithm 1

R code.

#
# R function for Bayesian estimation of prevalence using an
# imperfect test.
#
# Notes
#
# 1. Prior for prevalence is uniform on (0,1)
# 2. Priors for sensitivity and specificity are independent scaled
# beta distributions
# 3. Function uses a simple quadrature algorithm with number of
# quadrature points as an optional argument "ngrid" (see below);
# the default value ngrid=20 has been sufficient for all examples
# tried by the author, but is not guaranteed to give accurate
# results for all possible values of the other arguments.
#
prevalence.bayes<-function(theta,T,n,lowse=0.5,highse=1.0,
sea=1,seb=1,lowsp=0.5,highsp=1.0,spa=1,spb=1,ngrid=20,coverage=0.95) {
#
# arguments
# theta: vector of prevalences for which posterior density is required
# (will be converted internally to increasing sequence of equally
# spaced values, see "result" below)
# T: number of positive test results
# n: number of indiviudals tested
# lowse: lower limit of prior for sensitivity
# highse: upper limit of prior for sensitivity
# sea,seb: parameters of scaled beta prior for sensitivity
# lowsp: lower limit of prior for specificity
# highsp: upper limit of prior for specificity
# spa,spb: parameters of scaled beta prior for specificity
# ngrid: number of grid-cells in each dimension for quadrature
# coverage: required coverage of posterior credible interval
# (warning message given if not achieveable)
#
# result is a list with components
# theta: vector of prevalences for which posterior density has
# been calculated
# post: vector of posterior densities
# mode: posterior mode
# interval: maximum a posteriori credible interval
# coverage: achieved coverage
#
ibeta<-function(x,a,b) {
pbeta(x,a,b)*beta(a,b)
}
ntheta<-length(theta)
bin.width<-(theta[ntheta]-theta[1])/(ntheta-1)
theta<-theta[1]+bin.width*(0:(ntheta-1))
integrand<-array(0,c(ntheta,ngrid,ngrid))
h1<-(highse-lowse)/ngrid
h2<-(highsp-lowsp)/ngrid
for (i in 1:ngrid) {
se<-lowse+h1*(i-0.5)
pse<-(1/(highse-lowse))*dbeta((se-lowse)/(highse-lowse),sea,seb)
for (j in 1:ngrid) {
sp<-lowsp+h2*(j-0.5)
psp<-(1/(highsp-lowsp))*dbeta((sp-lowsp)/(highsp-lowsp),spa,spb)
c1<-1-sp
c2<-se+sp-1
f<-(1/c2)choose(n,T)(ibeta(c1+c2,T+1,n-T+1)-ibeta(c1,T+1,n-T+1))
p<-c1+c2*theta
density<-rep(0,ntheta)
for (k in 1:ntheta) {
density[k]<-dbinom(T,n,p[k])/f
}
integrand[,i,j]<-densitypsepsp
}
}
post<-rep(0,ntheta)
for (i in 1:ntheta) {
post[i]<-h1h2sum(integrand[i,,])
}
ord<-order(post,decreasing=T)
mode<-theta[ord[1]]
take<-NULL
prob<-0
i<-0
while ((prob<coverage/bin.width)&(i<ntheta)) {
i<-i+1
take<-c(take,ord[i])
prob<-prob+post[ord[i]]
}
if (i==ntheta) {
print("WARNING: range of values of theta too narrow")
}
interval<-theta[range(take)]
list(theta=theta,post=post,mode=mode,interval=interval,coverage=prob*bin.width)
}
#
# example
#
n<-100
T<-20
ngrid<-25
lowse<-0.7
highse<-0.95
lowsp<-0.8
highsp<-1.0
sea<-2
seb<-2
spa<-4
spb<-6
theta<-0.001*(1:400)
coverage<-0.9
result<-prevalence.bayes(theta,T,n,lowse,highse,
sea,seb,lowsp,highsp,spa,spb,ngrid,coverage)
result$mode # 0.115
result$interval # 0.011 0.226
plot(result$theta,result$post,type="l",xlab="theta",ylab="p(theta)")