Analysis of 2-stage freedom survey data
Analyse cluster-testing data for 2-stage surveys for demonstrating disease freedom. This analysis calculates the overall system sensitivity for the survey and the resulting probability of population freedom from disease. It assumes that a random sample of clusters (or all clusters) has been selected for testing from the population and that a random sample of units (or all units) has been tested within each selected clusters. It also assumes that the test system has a specificity of 100% (any positive results are further investigated to exclude false positives) and that no positive results were recorded. The analysis adjusts for imperfect sensitivity of the test used.
The analysis calculates both cluster and system (population) level sensitivity estimates using three different methods depending on the available data:
- assumed binomial sampling (sampling with replacement) where population size is unknown or not specified;
- a hypergeometric approximation (sampling without replacement) where population size is specified; or
- exact probability calculations where the entire population has been sampled.
Design prevalence (specified level of disease to be detected) must be specified at both unit and cluster levels. Design prevalence can be specified as either:
- a proportion of the population infected; or
- a specific (integer) number of clusters or units (within clusters) infected.
Inputs
Inputs required include:
- unit-level design prevalence as either a proportion or an integer number of units;
- cluster-level design prevalence as either a proportion or an integer number of clusters;
- the number of clusters in the population as either unknown, all clusters tested or a specified number of clusters;
- the estimated test sensitivity;
- the assumed prior probability that the population is free of disease; and
- testing data for each clusters, including clusters id (optional), number of units tested and (optionally) clusters size. Cluster size is required if unit-level design prevalence is specified as number rather than proportion of units.
Outputs
Outputs from the analysis include:
- Overall system sensitivity (probability of detecting disease if it was present at the specified animal and herd-level design prevalences);
- Probability of freedom of the population from disease (at the specified unit and cluster-level design prevalences; and
- A summary of cluster-level sensitivity (SeH) values and specific values for each cluster tested.
A spreadsheet of sample data can be downloaded from the Example tab or by clicking here.
No results
###################################### # Program to do analyse 2-stage freedom urvey ###################################### # uses RSurveillance package rm(list = ls()) # cat("
Test:",length(commandArgs())) test<- ifelse(length(commandArgs()) < 3, TRUE, FALSE) fpath<- ifelse(test, "c:/ms4w/Apache/htdocs/epitools/", "") # load header scripts source(paste(fpath, "R/epi_head.R", sep = "")) source(paste(fpath, "R/HTMLStream.R", sep = "")) source(paste(fpath, "R/epitools_functions.r", sep = "")) # extract command arguments # 1 = animal design prevalence, # 2 = herd design prevalence, # 3 = herds in popn (0=unknown, 1=all tested, 2=specified), 4 = no herds specified (-1 if herd numbers not specified), # 5 = test sensitivity, 6 = prior probability of freedom, 9 = herd size provided (1 = True, 0 = False) a1<- type.convert(a0[8:13]) # cat(a1) dp.a<- a1[1] dp.a.int<- ifelse(dp.a <1, F, T) dp.h<- a1[2] dp.h.int<- ifelse(dp.h <1, F, T) pop.herds<- a1[3] n.herds<- a1[4] sens<- a1[5] prior.pfree<- a1[6] # P.inf<- 1 - prior.pfree heading<- "2-Stage Freedom Analysis" heading2<- "Summary results" heading3<- "Individual cluster results" digits<- 4 # cat(42) filename<- digest(Sys.time) tmp.path<- paste(fpath, "tmp/", sep = "") tmp.file<- paste(fpath, "tmp/", filename, sep = "") sinkfile<- paste(fpath, "tmp/", filename, ".txt", sep="") # fpath, # cat("
", data.file) data.file<- ifelse(test, paste(fpath, "docs/2StageFreedomAnalysisDemo.txt", sep = ""), a0[length(a0)]) #test name dat<- read.delim(data.file, header=TRUE, sep=",") # first clumn = cluster id, 2nd = test result (T/F or 1/0) if (pop.herds <= 1) n.herds<- nrow(dat) if (pop.herds <= 0) n.herds<- NA # cat(53) # set up table of inputs inputs<- array("", dim = c(length(a1)-1, 1)) rownames(inputs)<- c("Unit-level design prevalence", "Cluster-level design prevalence", "Clusters in population", "Test sensitivity", "Prior probability of freedom") inputs[1, 1]<- ifelse(dp.a.int, paste(dp.a, " units(s)"), paste(dp.a*100, "%", sep = "")) inputs[2, 1]<- ifelse(dp.h.int, paste(dp.h, " clusters(s)"), paste(dp.h*100, "%", sep = "")) inputs[3, 1]<- ifelse(is.na(n.herds), "Unknown", paste(n.herds, " clusters")) inputs[4:5, 1]<- a1[5:6] # cat("
", data.file) # calculate seh for each herd attach(dat) if (!exists("Tested")) { cat("
I cannot do the analysis because there is no column labeled 'Tested' in the data.
Please correct the data and re-try the analysis.
") quit() } # cat(exists("HerdSize")) if (exists("HerdSize")) { # herd sizes provided if (sum(HerdSize < Tested & !is.na(HerdSize)) > 0) { cat("
I cannot calculate SeH because one or more cluster sizes are less than the number of units tested for that cluster.
Please correct the data and re-try the analysis.
") quit() } else { if(dp.a.int & sum(is.na(HerdSize)) >0) { cat("
I cannot calculate SeH because one or more cluster sizes are missing and design prevalence is specified as a number of units.
Please correct the data and re-try the analysis.
") quit() } N<- HerdSize n<- Tested } } else { N<- rep(NA, nrow(dat)) } # cat(N) tmp<- sep.sys(H=n.herds, N=N, n=n, dp.h, dp.a, sens) dat$SeH<- tmp[[2]] SSe<- tmp[[1]] sink(sinkfile) graphfile<- paste(tmp.file, "_stripchart.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 6) stripchart(dat$SeH[dat$SeH>0], method = "jitter", jitter = 0.2, pch = 1, col = "blue", main = "Distribution of SeH values", xlab = "Cluster sensitivity") CloseGraphOutput("R") sink() # Calculate probability of freedom PFree<- pfree.1(SSe, p.intro=0, prior=prior.pfree)[1,4] tmp<- Summarise(na.omit(dat$SeH), digits=digits) result<- array("", dim = c(12, 1)) # colnames(result)<- c("Sensitivity", "Probability of freedom") rownames(result)<- c("System Sensitivity", "Probability of freedom", paste("SeH", names(tmp))) rownames(result)[nrow(result)]<- "Number of clusters tested" colnames(result)<- "Value" result[1,1]<- round(SSe, digits) result[2,1]<- round(PFree, digits) result[3:12,1]<- tmp # result[2:11,2]<- Summarise(npv, digits=digits) dat$SeH<- round(dat$SeH, digits) # npv<- round(npv, digits) detach(dat) # write to html and file subheadings<- c(heading2, heading3) result.txt<- "" output<- html.output(heading, subheadings, inputs, results = list(result, dat), graphs = graphfile, graph.headings = "Distribution of herd sensitivities", show.inputs = T, show.graphs = T, tmp.file, result.txt = result.txt) write.html(output, tmp.file) cat(output)