Sample size for 2-stage freedom survey with fixed cluster-sensitivity
Calculate sample sizes for 2-stage surveys for demonstrating disease freedom, for specified target cluster-level sensitivity and system (population) sensitivity. This analysis calculates the number of clusters and the number of units within each cluster to be tested to provide specified cluster and system sensitivities (probability of detecting disease) for the given unit and cluster-level design prevalences and test sensitivity. Test specificity is assumed to be 100% (or follow-up testing of any positive will be undertaken to confirm or exclude disease).
Numbers of clusters to test are calculated using the hypergeometric apprximation if the number of clusters in the population is specified as well as using the binomial fomula assuming unknown (large) number of clusters in the population. If the population size is not specified only the binomial results are presented.
Numbers of units to test in each cluster are calculated for a range of cluster sizes using the hypergeometric approximation and for unknown (large) cluster sizes using the binomial calculation.
Design prevalence (specified level of disease to be detected) must be specified at both unit and cluster levels. Design prevalence can be specified as either:
- a proportion of the population infected; or
- a specific (integer) number of units or clusters infected. For cluster-prevalence, the number of clusters in the population must be specified if design prevalence is specified as an integer.
Inputs
Inputs required include:
- unit-level design prevalence as either a proportion or an integer number of units;
- cluster-level design prevalence as either a proportion or an integer number of clusters;
- the estimated test sensitivity;
- the target cluster-level sensitivity (SeH) which is the probability of detecting disease if it is present in a cluster at the specified unit-level design prevalence;
- the target system sensitivity (SSe) which is the probability of detecting disease if it is present in the population at the specified unit and cluster level design prevalences;
- The number of clusters in the population (optional).
Outputs
Outputs from the analysis include:
- The total numbers of clusters to be sampled;
- The maximum total sample size;
- The numbers of units to test in each cluster, for a range of cluster sizes to achieve the specified value for SeH; and
- The numbers of units to test in cluster and the corresponding numbers of clusters to test, for a range of cluster sizes and SeH values.
Note:
- If it is not possible to achieve the desired cluster sensitivity by testing all units in a cluster of given size, the sample size is left blank;
- If animal-level prevalence is specified as a number of animals, sample size for an unknown herd/flock size cannot be calculated;
- If cluster-level prevalence is specified as a number of clusters, the number of clusters to be sampled for a large or unknown population cannot be calculated.
No results
###################################### # Program to calculate sample size for 2-stage freedom survey # assuming perfect test specificity # for specified SeH or n ###################################### # uses RSurveillance package rm(list = ls()) # cat("
Test:",length(commandArgs())) test<- ifelse(length(commandArgs()) < 3, TRUE, FALSE) fpath<- ifelse(test, "webRootUrl", "rtoolsPath") # load header scripts source(paste(fpath, "R/epi_head.R", sep = "")) source(paste(fpath, "R/HTMLStream.R", sep = "")) source(paste(fpath, "R/epitools_functions.r", sep = "")) # extract command arguments # 1 = animal design prevalence, 2 = dp.a format (0=proportion, 1=number), # 3 = herd design prevalence, 4 = dp.h format (0=proportion, 1=number), # 5 = test sensitivity, # 6 = SeH # 7 = target SSe, # 8 = number of herds a1<- type.convert(a0[8:13]) # cat("
", a1) dp.a<- a1[1] dp.a.int<- ifelse(dp.a <1, F, T) dp.h<- a1[2] dp.h.int<- ifelse(dp.h <1, F, T) sens<- a1[3] spec<- 1 seh<- a1[4] sse<- a1[5] herds<- a1[6] heading<- "2-Stage Freedom Sample Size" heading2<- "Number of clusters to be sampled" heading3<- paste("Numbers of units to be sampled for different cluster sizes for SeH =", seh) heading4<- "Numbers of units to be sampled for different cluster sizes and varying SeH" heading5<- "Numbers of clusters to be sampled for varying SeH" digits<- 4 d1<- paste(substr(date(), 1, 10), substr(date(), 20,24), " @", substr(date(), 11, 16)) filename<- digest(Sys.time) tmp.path<- paste(fpath, "tmp/", sep = "") tmp.file<- paste(fpath, "tmp/", filename, sep = "") sinkfile<- paste(fpath, "tmp/", filename, ".txt", sep="") # fpath, # set up table of inputs inputs<- array("", dim = c(length(a1), 1)) rownames(inputs)<- c("Unit-level design prevalence", "Cluster-level design prevalence", "Test sensitivity", "Target cluster sensitivity (SeH)", "Target system sensitivity (SSe)", "No. clusters in population") inputs[1, 1]<- ifelse(dp.a.int, paste(dp.a, " unit(s)"), paste(dp.a*100, "%", sep = "")) inputs[2, 1]<- ifelse(dp.h.int, paste(dp.h, " cluster(s)"), paste(dp.h*100, "%", sep = "")) inputs[3, 1]<- sens inputs[4, 1]<- seh inputs[5, 1]<- sse inputs[6, 1]<- ifelse(herds > 0, herds, "Unknown") herds<- ifelse(herds > 0, herds, NA) # calculating sample sizes for varying herd sizes herd.size<- c(10, 20, 30, 40, 50, 100, 200, 500, 1000, 5000, 10000) tmp<- n.2stage(H=herds, N=herd.size, sep.sys=sse, sep.c=seh, pstar.c=dp.h, pstar.u=dp.a, se=sens) n<- tmp[[2]] # n.freedom(N=herd.size, sep=seh, pstar=dp.a, se=sens) dim(n)<- c(length(n), 1) rownames(n)<- paste("Cluster size =", herd.size) colnames(n)<- "Number of units to sample" # calculate number of herds to test h<- tmp[[1]] # n.freedom(N=herds, sep=sse, pstar=dp.h, se=seh) if (is.na(h)) { n.herds<- array("", dim = c(1, 1)) n.herds[1,1]<- "Unable to achieve target by testing all clusters" # rownames(n.herds)<- paste("Clusters in population =", inputs[6,1]) # colnames(n.herds)<- c("Clusters to sample") } else { n.herds<- c(h, max(na.omit(n))*h) n.herds<- array(n.herds, dim = c(1, 2)) rownames(n.herds)<- paste("Clusters in population =", inputs[6,1]) colnames(n.herds)<- c("Clusters to sample", "Maximum number of samples") } # table of sample sizes for range of SeH values seh.l<- c(seq(0.1, 0.5, by = 0.1), seq(0.6, 0.95, by = 0.05)) n.tab<- array(0, dim = c(length(herd.size), length(seh.l))) rownames(n.tab)<- rownames(n)[1:nrow(n.tab)] colnames(n.tab)<- paste("SeH =", seh.l) for (r in 1:nrow(n.tab)) { n.tab[r,]<- n.freedom(N=herd.size[r], sep=seh.l, pstar=dp.a, se=sens) n.tab[r, n.tab[r,] > herd.size[r]]<- NA } if (!dp.a.int) { n.x<- n.freedom(N=NA, sep=seh.l, pstar=dp.a, se=sens) n.tab<- rbind(n.tab, "Cluster size = unknown" = n.x) } # calculate numbers of herds for varying seh if (!is.na(herds)) { if (dp.h.int) { n.herds.l<- array(0, dim = c(1, length(seh.l))) rownames(n.herds.l)<- paste("Population =", herds, "clusters") n.herds.l[1,]<- n.freedom(N=herds, sep=sse, pstar=dp.h, se=seh.l) n.herds.l[1, n.herds.l[1,] > herds]<- NA } else { n.herds.l<- array(0, dim = c(2, length(seh.l))) rownames(n.herds.l)<- c(paste("Population =", herds, "clusters"), "Population size unknown") n.herds.l[1,]<- n.freedom(N=herds, sep=sse, pstar=dp.h, se=seh.l) n.herds.l[1, n.herds.l[1,] > herds]<- NA n.herds.l[2,]<- n.freedom(N=NA, sep=sse, pstar=dp.h, se=seh.l) } } else { n.herds.l<- array(0, dim = c(1, length(seh.l))) rownames(n.herds.l)<- "No. clusters in population unknown" n.herds.l[1,]<- n.freedom(N=NA, sep=sse, pstar=dp.h, se=seh.l) } colnames(n.herds.l)<- colnames(n.tab) # write to html and file subheadings<- c(heading2, heading3, heading4, heading5) result.txt<- "" output<- html.output(heading, subheadings, inputs, results = list(n.herds, n, n.tab, n.herds.l), graphs = "", graph.headings = "", show.inputs = T, show.graphs = F, tmp.file, result.txt = result.txt) write.html(output, tmp.file) cat(output)