Sample size for 2-stage freedom survey with fixed cluster-sensitivity

Calculate sample sizes for 2-stage surveys for demonstrating disease freedom, for specified target cluster-level sensitivity and system (population) sensitivity. This analysis calculates the number of clusters and the number of units within each cluster to be tested to provide specified cluster and system sensitivities (probability of detecting disease) for the given unit and cluster-level design prevalences and test sensitivity. Test specificity is assumed to be 100% (or follow-up testing of any positive will be undertaken to confirm or exclude disease).

Numbers of clusters to test are calculated using the hypergeometric apprximation if the number of clusters in the population is specified as well as using the binomial fomula assuming unknown (large) number of clusters in the population. If the population size is not specified only the binomial results are presented.

Numbers of units to test in each cluster are calculated for a range of cluster sizes using the hypergeometric approximation and for unknown (large) cluster sizes using the binomial calculation.

Design prevalence (specified level of disease to be detected) must be specified at both unit and cluster levels. Design prevalence can be specified as either:

a proportion of the population infected; or
a specific (integer) number of units or clusters infected. For cluster-prevalence, the number of clusters in the population must be specified if design prevalence is specified as an integer.

Inputs

Inputs required include:

unit-level design prevalence as either a proportion or an integer number of units;
cluster-level design prevalence as either a proportion or an integer number of clusters;
the estimated test sensitivity;
the target cluster-level sensitivity (SeH) which is the probability of detecting disease if it is present in a cluster at the specified unit-level design prevalence;
the target system sensitivity (SSe) which is the probability of detecting disease if it is present in the population at the specified unit and cluster level design prevalences;
The number of clusters in the population (optional).

Outputs

Outputs from the analysis include:

The total numbers of clusters to be sampled;
The maximum total sample size;
The numbers of units to test in each cluster, for a range of cluster sizes to achieve the specified value for SeH; and
The numbers of units to test in cluster and the corresponding numbers of clusters to test, for a range of cluster sizes and SeH values.

Note:

If it is not possible to achieve the desired cluster sensitivity by testing all units in a cluster of given size, the sample size is left blank;
If animal-level prevalence is specified as a number of animals, sample size for an unknown herd/flock size cannot be calculated;
If cluster-level prevalence is specified as a number of clusters, the number of clusters to be sampled for a large or unknown population cannot be calculated.

No results

No example available

No references available

				######################################
# Program to calculate sample size for 2-stage freedom survey 
# assuming perfect test specificity
# for specified SeH or n
######################################
# uses RSurveillance package

rm(list = ls())

# cat("
Test:",length(commandArgs()))
test<- ifelse(length(commandArgs()) < 3, TRUE, FALSE)
fpath<- ifelse(test, "webRootUrl", "rtoolsPath")

# load header scripts
  source(paste(fpath, "R/epi_head.R", sep = ""))
  source(paste(fpath, "R/HTMLStream.R", sep = ""))
  source(paste(fpath, "R/epitools_functions.r", sep = ""))

# extract command arguments 
# 1 = animal design prevalence, 2 = dp.a format (0=proportion, 1=number), 
# 3 = herd design prevalence, 4 = dp.h format (0=proportion, 1=number), 
# 5 = test sensitivity,  
# 6 = SeH
# 7 = target SSe,  
# 8 = number of herds

    a1<- type.convert(a0[8:13])
# cat("
", a1)

dp.a<- a1[1] 
dp.a.int<- ifelse(dp.a <1, F, T)          
dp.h<- a1[2]           
dp.h.int<- ifelse(dp.h <1, F, T)          
sens<- a1[3]
spec<- 1
seh<- a1[4]
sse<- a1[5] 
herds<- a1[6]
heading<- "2-Stage Freedom Sample Size"
heading2<- "Number of clusters to be sampled"
 heading3<- paste("Numbers of units to be sampled for different cluster sizes for SeH =", seh)
 heading4<- "Numbers of units to be sampled for different cluster sizes and varying SeH"
 heading5<- "Numbers of clusters to be sampled for varying SeH"
 
 digits<- 4
d1<- paste(substr(date(), 1, 10), substr(date(), 20,24), " @", substr(date(), 11, 16))
 
filename<- digest(Sys.time)
tmp.path<- paste(fpath, "tmp/", sep = "")
tmp.file<- paste(fpath, "tmp/", filename, sep = "")
sinkfile<- paste(fpath, "tmp/", filename, ".txt", sep="")   # fpath,

# set up table of inputs
inputs<- array("", dim = c(length(a1), 1))
rownames(inputs)<- c("Unit-level design prevalence", "Cluster-level design prevalence", 
                  "Test sensitivity", "Target cluster sensitivity (SeH)",  
                  "Target system sensitivity (SSe)", 
                  "No. clusters in population")
inputs[1, 1]<- ifelse(dp.a.int, paste(dp.a, " unit(s)"), paste(dp.a*100, "%", sep = ""))
inputs[2, 1]<- ifelse(dp.h.int, paste(dp.h, " cluster(s)"), paste(dp.h*100, "%", sep = ""))  
inputs[3, 1]<- sens
inputs[4, 1]<- seh
inputs[5, 1]<- sse
inputs[6, 1]<- ifelse(herds > 0, herds, "Unknown")
herds<- ifelse(herds > 0, herds, NA)
 
 # calculating sample sizes for varying herd sizes  
herd.size<- c(10, 20, 30, 40, 50, 100, 200, 500, 1000, 5000, 10000)          
tmp<- n.2stage(H=herds, N=herd.size, sep.sys=sse, sep.c=seh, pstar.c=dp.h, pstar.u=dp.a, se=sens)

n<- tmp[[2]]
#  n.freedom(N=herd.size, sep=seh, pstar=dp.a, se=sens)
  dim(n)<- c(length(n), 1)
  rownames(n)<- paste("Cluster size =", herd.size)
  colnames(n)<- "Number of units to sample"

 # calculate number of herds to test
h<- tmp[[1]]
#  n.freedom(N=herds, sep=sse, pstar=dp.h, se=seh)
if (is.na(h)) {
  n.herds<- array("", dim = c(1, 1))
  n.herds[1,1]<- "Unable to achieve target by testing all clusters"
#  rownames(n.herds)<- paste("Clusters in population =", inputs[6,1])
#  colnames(n.herds)<- c("Clusters to sample")    
} else {
  n.herds<- c(h, max(na.omit(n))*h)
  n.herds<- array(n.herds, dim = c(1, 2))
  rownames(n.herds)<- paste("Clusters in population =", inputs[6,1])
  colnames(n.herds)<- c("Clusters to sample", "Maximum number of samples")    
}

 
 # table of sample sizes for range of SeH values
seh.l<- c(seq(0.1, 0.5, by = 0.1), seq(0.6, 0.95, by = 0.05))
 n.tab<- array(0, dim = c(length(herd.size), length(seh.l)))
 rownames(n.tab)<- rownames(n)[1:nrow(n.tab)]
 colnames(n.tab)<- paste("SeH =", seh.l)
for (r in 1:nrow(n.tab)) {
  n.tab[r,]<- n.freedom(N=herd.size[r], sep=seh.l, pstar=dp.a, se=sens)
   n.tab[r, n.tab[r,] > herd.size[r]]<- NA
}
if (!dp.a.int) {
 n.x<- n.freedom(N=NA, sep=seh.l, pstar=dp.a, se=sens)
 n.tab<- rbind(n.tab, "Cluster size = unknown" = n.x)
} 


 # calculate numbers of herds for varying seh
if (!is.na(herds)) { 
  if (dp.h.int) {
    n.herds.l<- array(0, dim = c(1, length(seh.l)))
    rownames(n.herds.l)<- paste("Population =", herds, "clusters")
    n.herds.l[1,]<- n.freedom(N=herds, sep=sse, pstar=dp.h, se=seh.l)
    n.herds.l[1, n.herds.l[1,] > herds]<- NA
  } else {
    n.herds.l<- array(0, dim = c(2, length(seh.l)))
    rownames(n.herds.l)<- c(paste("Population =", herds, "clusters"), "Population size unknown")
    n.herds.l[1,]<- n.freedom(N=herds, sep=sse, pstar=dp.h, se=seh.l)
    n.herds.l[1, n.herds.l[1,] > herds]<- NA
    n.herds.l[2,]<- n.freedom(N=NA, sep=sse, pstar=dp.h, se=seh.l)
  }
} else {
  n.herds.l<- array(0, dim = c(1, length(seh.l)))
  rownames(n.herds.l)<- "No. clusters in population unknown"
  n.herds.l[1,]<- n.freedom(N=NA, sep=sse, pstar=dp.h, se=seh.l)
}  
colnames(n.herds.l)<- colnames(n.tab)
 
# write to html and file
subheadings<- c(heading2, heading3, heading4, heading5)
result.txt<- ""
output<- html.output(heading, subheadings, inputs, results = list(n.herds, n, n.tab, n.herds.l), graphs = "", graph.headings = "", show.inputs = T, show.graphs = F, tmp.file, result.txt = result.txt) 
write.html(output, tmp.file)
cat(output)