trace tracker logo

ROC analysis

This utility calculates test sensitivity and specificity for a test producing a continuous outcome. Suggested cut-points are calculated for a range of target values for sensitivity and specificity. A ROC curve and two-grah ROC curve are generated and Youden's index (J and test efficiency (for selected prevalence values (are also calculated).

Inputs are the desired level of confidence in the resulting sensitivity and specificity estimates and two columns of data for analysis.

Data required is a series of test results for both infected and uninfected individuals. This data can be pasted in either of two formats:

  • Stacked - the first column contains status identifiers as either "Infected" or "Uninfected" and the second column contains the corresponding test result; or
  • Unstacked - separate columns contain test results for infected and uninfected individuals. Column order is unimportant but columns must be labelled appropriately as "Infected" or "Uninfected" in a header row.

Regardless of the format used, the first row must contain column headers. Additional columns of data will be ignored.

Outputs include:

  • numerical and graphical summaries of testing results for both infected and uninfected groups;
  • cut-point values to achieve minimum target vaues for both sensitivity and specificity along with corresponding estimates and Wilson binomial confidence intervals;
  • one- and two-graph ROC curves, with estimated AUC for the one-graph curve;
  • Area under the ROC curve (AUC) and associated DeLong confidence limits and Z test. See DeLong et al. (1988). Comparing the areas under two or more correlated receiver operating characteristic curves: a nonparametric approach. Biometrics 44:837-845;
  • graphs of Youden's J Index and test efficiency for a range of prevalence values;
  • graphs of mis-classification cost terms for a range of prevalence values and relative costs of false negative/false positive; and
  • detailed sensitivity and specificity results in a downloadable spreadsheet file.

For more information, see Greiner, M, Pfeiffer, D and Smith, RD (2000). Principles and practical application of the receiver-operating characteristic analysis for diagmostic tests. Preventive Veterinary Medicine 45:23-41.

Download example data here.


No results

No example available
ROC_Data_Demo.xls
No references available
				#########################################################################3333
# ROC Calculator
#########################################################################3333

# check version and load header script
# cat(5)
rm(list = ls())
test<- ifelse(length(commandArgs()) < 3, TRUE, FALSE)
fpath<- ifelse(test, "webRootUrl", "rtoolsPath")
# load header scripts
# cat(10)
  source(paste(fpath, "R/epi_head.R", sep = ""))
  source(paste(fpath, "R/HTMLStream.R", sep = ""))
  source(paste(fpath, "R/epitools_functions.r", sep = ""))
# cat(15)

# extract command arguments
    a1<- type.convert(a0[8:9])

# cat(a0)
# 
filename<- digest(Sys.time())
tmp.path<- paste(fpath, "tmp/", sep = "")
tmp.file<- paste(fpath, "tmp/", filename, sep = "")
sinkfile<- paste(fpath, "tmp/", filename, ".txt", sep="")   # 
  
# cat("
", 27) sink(sinkfile) stacked<- ifelse(test, 1, a1[1]) # format of data - 1 = stacked, 0 = unstacked conf<- ifelse(test, 0.95, a1[2]) test.name<- ifelse(test, "ELISA", a0[10]) #test name # test.name<- "test name" data.file<- ifelse(test, paste(fpath, "docs/ROC_Data_Demo.txt", sep = ""), a0[11]) #"ELISA_ROC_Data_Unstacked.csv" # data.file<- paste(fpath, "ELISA_ROC_Data_Unstacked.csv", sep = "") digits<- 3 target.vals<- c(0.999, 0.995, 0.99, 0.98, 0.95, 0.9, 0.8) prev.vals<- c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5) rel.cost<- c(0.1, 0.2, 0.5, 1, 2, 5, 10) # relative cost of False negatives compared to false positives dark.cols<- c("darkblue", "red", "darkgreen", "brown", "purple", "darkorange", "aquamarine4", "maroon4", "black", "salmon", "cyan4", "darkviolet") light.cols<- c("lightblue", "pink", "lightgreen", "khaki", "plum2", "yellow", "aquamarine", "orchid1", "gray", "moccasin", "lightcyan2", "violet") label.names<- c("Infected", "Uninfected") # cat(a1) # read testing data roc.data<- read.csv(data.file, as.is = 1) # cat("
", 50) if (stacked == 1) { roc.data[tolower(roc.data[,1]) == tolower(label.names[1]),1]<- label.names[1] roc.data[tolower(roc.data[,1]) == tolower(label.names[2]),1]<- label.names[2] infected<- roc.data[which(roc.data[,1] == label.names[1]),2] uninfected<- roc.data[which(roc.data[,1] == label.names[2]),2] all.results<- roc.data[,2] rd<- roc.data } else { infected<- na.omit(roc.data$Infected) uninfected<- na.omit(roc.data$Uninfected) r.labels<- c(rep(label.names[1], length(infected)), rep(label.names[2], length(uninfected))) all.results<- c(infected, uninfected) rd<- data.frame(r.labels, all.results) } # end of if/else colnames(rd)[1:2]<- c("Status", "Result") n.inf<- length(infected) n.uninf<- length(uninfected) if (n.inf == 0) { sink() cat("No infected units in the data so can't do analysis.") quit() } if (n.uninf == 0) { sink() cat("No uninfected units in the data so can't do analysis.") quit() } roc.table<- table(rd[,2], rd[,1]) cum.count<- apply(roc.table, FUN = cumsum, MARGIN = 2) tmp.inf<- n.inf tmp.uninf<- 0 for (i in 2:nrow(cum.count)) { tmp.inf[i]<- n.inf - cum.count[i-1, 1] tmp.uninf[i]<- cum.count[i-1, 2] } # end of i loop cum.count[ ,1]<- tmp.inf cum.count[ ,2]<- tmp.uninf cp.list<- rownames(cum.count) Se<- array(0, dim = c(length(cp.list), 3)) Sp<- array(0, dim = c(length(cp.list), 3)) efficiency<- array(0, dim = c(length(cp.list), length(prev.vals))) colnames(efficiency)<- prev.vals colnames(Se)<- c("Sensitivity", paste("Se Lower ", conf*100, "% CL", sep = ""), paste("Se Upper ", conf*100, "% CL", sep = "")) colnames(Sp)<- c("Specificity", paste("Sp Lower ", conf*100, "% CL", sep = ""), paste("Sp Upper ", conf*100, "% CL", sep = "")) MCT<- array(0, dim = c(length(cp.list), length(prev.vals), length(rel.cost))) colnames(MCT)<- paste("Prev =", prev.vals) dimnames(MCT)[[3]]<- paste("FN/FP =", rel.cost) # calc Sensitivity wilson.conf<- binom.wilson(cum.count[,1], n.inf, conf) Se[, 1]<- wilson.conf$proportion Se[, 2]<- wilson.conf$lower Se[, 3]<- wilson.conf$upper # calc Specificity wilson.conf<- binom.wilson(cum.count[,2], n.uninf, conf) Sp[, 1]<- wilson.conf$proportion Sp[, 2]<- wilson.conf$lower Sp[, 3]<- wilson.conf$upper J<- Se[, 1] + Sp[, 1] - 1 for (p in 1:length(prev.vals)) { efficiency[,p]<- prev.vals[p] * Se[,1] + (1 - prev.vals[p]) * Sp[,1] for (r in 1:length(rel.cost)) { MCT[, p, r]<- rel.cost[r]*prev.vals[p]*(1 - Se[,1]) + (1 - prev.vals[p])*(1 - Sp[,1]) } # end of r loop } # end of p loop # library(pROC) roc.lst<- roc(rd$Status == "Infected", rd$Result) auc<- array("", dim = c(2, 1)) rownames(auc)<- c("Area under curve (AUC)", paste(conf*100, "% CI for AUC", sep = "")) auc.ci<- round(ci.auc(roc.lst, conf.level = conf), digits) auc[1, 1]<- auc.ci[2] auc[2,1]<- paste(auc.ci[1], auc.ci[3], sep = "-") z.conf<- qnorm(1 - (1 - conf)/2, 0, 1) # calculate corrsponding (2-tail) z value rd$Rank<- rank(rd$Result) R<- sum(rd$Rank[rd$Status == "Uninfected"]) E.R<- 0.5*n.uninf*(n.uninf+n.inf+1) var.R<- n.uninf*n.inf*var(rd$Rank)/(n.uninf+n.inf) z<- (R-E.R)/sqrt(var.R) p<- pnorm(z, lower = T) # cat("
", R) # cat("
AUC:", auc) cp.list<- as.numeric(cp.list) Se.Sp.list<- cbind(cp.list, Se, Sp, J, efficiency) colnames(Se.Sp.list)<- c("Cut-point", colnames(Se), colnames(Sp), c("Youden's J", paste("Eff. at P =", prev.vals))) Se.cp<- array(0, dim = c(length(target.vals), 7)) Sp.cp<- array(0, dim = c(length(target.vals), 7)) # cat("
", class(cp.list), mode(cp.list)) # cat(test) for (cp in 1:length(target.vals)) { #cp<- 1 # tmp<- sort(cp.list[which(Sp[, 1] <= target.vals[cp])]) # Sp.cp[cp, 1]<- tmp[length(tmp)] Sp.cp[cp, 1]<- min(cp.list[which(Sp[, 1] >= target.vals[cp])]) # cat("
", Sp.cp[cp, 1]) Sp.cp[cp, 2:4]<- Sp[match(Sp.cp[cp, 1], cp.list), ] Sp.cp[cp, 5:7]<- Se[match(Sp.cp[cp, 1], cp.list), ] # tmp<- cp.list[which(Se[, 1] >= target.vals[cp])] # Se.cp[cp, 1]<- tmp[length(tmp)] # cat("
", Se.cp[cp, 1]) Se.cp[cp, 1]<- max(cp.list[which(Se[, 1] >= target.vals[cp])]) Se.cp[cp, 2:4]<- Se[match(Se.cp[cp, 1], cp.list), ] Se.cp[cp, 5:7]<- Sp[match(Se.cp[cp, 1], cp.list), ] } # end of cp loop Se.cp<- cbind(target.vals, Se.cp) colnames(Se.cp)<- c("Target Se", "Cut-point", colnames(Se), colnames(Sp)) Sp.cp<- cbind(target.vals, Sp.cp) colnames(Sp.cp)<- c("Target Sp", "Cut-point", colnames(Sp), colnames(Se)) inf.summary<- Summarise(infected, digits, pc = c(0.05, 0.25, 0.5, 0.75, 0.95)) uninf.summary<- Summarise(uninfected, digits, pc = c(0.05, 0.25, 0.5, 0.75, 0.95)) result.summary<- rbind(inf.summary[1:10], uninf.summary[1:10]) colnames(result.summary)<- c("Min", "5%", "25%", "Median", "75%", "95%", "Max", "Mean", "Std. deviation", "Count") rownames(result.summary)<- label.names # cat("
", mode(Se.cp)) # maximise J and Efficiency max.effic<- array(0, dim = c(length(prev.vals)+1, 3)) rownames(max.effic)<- c("Youden's J", paste("Eff. at P =", prev.vals)) colnames(max.effic)<- c("Cut-point", "Sensitivity", "Specificity") for (j in 1:(length(prev.vals)+1)) { tmp<- Se.Sp.list[which(Se.Sp.list[, j+7] == max(Se.Sp.list[, j+7])), ] max.effic[j, 1:2]<- tmp[1:2] max.effic[j, 3]<- tmp[5] } # minimise MCT min.MCT<- array(0, dim = c(length(prev.vals), 4, length(rel.cost))) rownames(min.MCT)<- paste("P =", prev.vals) colnames(min.MCT)<- c("Cut-point", "MCT", "Sensitivity", "Specificity") # cat("
", colnames(MCT)) dimnames(min.MCT)[[3]]<- dimnames(MCT)[[3]] for (p in 1:(length(prev.vals))) { # p<- 1 for (r in 1:(length(rel.cost))) { # r<- 1 tmp<- which.min(MCT[, p, r]) min.MCT[p, 1, r]<- cp.list[tmp] min.MCT[p, 2, r]<- MCT[tmp, p, r] min.MCT[p, 3, r]<- Se.Sp.list[tmp, 2] min.MCT[p, 4, r]<- Se.Sp.list[tmp, 5] } # } # cat("
", 200) # density curve of all results Title<-c("Density curve of all results") x.label<- "Test Result" graphfile<- paste(fpath, "tmp/", filename, "_density.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 6) tmp<- density(all.results) plot(density(all.results), main = Title, col = dark.cols[1], xlab = x.label, xlim = c(min(all.results), max(all.results)), ylim = c(0, max(tmp$y))) CloseGraphOutput("R") # cat("
", test) # bar plot of infected and uninfected Title<- c("Distribution of test results for infected", "Distribution of test results for uninfected") x.label<- "Test Result" tmp1<- hist(infected, plot = F) tmp2<- hist(uninfected, plot = F) ymax<- max(tmp1$counts, tmp2$counts) graphfile<- paste(fpath, "tmp/", filename, "_barplot.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 8, wd = 6) par(mfrow = c(2, 1)) hist(infected, col = light.cols[2], main = Title[1], border = dark.cols[2], xlim = range(all.results), ylim = c(0, ymax)) hist(uninfected, col = light.cols[3], main = Title[2], border = dark.cols[3], xlim = range(all.results), ylim = c(0, ymax)) CloseGraphOutput("R") # box plot of infected and uninfected Title<-c("Box plot of results by infection status") x.label<- "Test Result" graphfile<- paste(fpath, "tmp/", filename, "_boxplot.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 6) boxplot(infected, uninfected, ylab = x.label, border = "black", #dark.cols[2:1], light.cols[2:1] col = "white", main = Title, names = label.names) CloseGraphOutput("R") # strip plot of infected and uninfected Title<-c("Strip chart of results by infection status") x.label<- "Test Result" graphfile<- paste(fpath, "tmp/", filename, "_stripchart.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 6) stripchart(rd$Result ~ rd$Status, xlab = x.label, col = dark.cols[2:1], method = "jitter", jitter = 0.4, main = Title, vertical = T, pch = 1, cex = 0.8, ylab = "Test result") CloseGraphOutput("R") # roc curve Title<-c("ROC Curve") x.label<- "False positive rate (1 - Specificity)" y.label<- "True positive rate (Sensitivity)" graphfile<- paste(fpath, "tmp/", filename, "_ROC.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 8) plot(1-Sp[, 1], Se[, 1], type = "l", col = dark.cols[1], xlab = x.label, ylab = y.label, main = Title) # plot(perf1, type = "l", col = dark.cols[1], xlab = x.label, # ylab = y.label, main = Title) segments(0,0, 1,1, col = dark.cols[2]) CloseGraphOutput("R") # 2-graph roc curve Title<-c("Two graph ROC Curve") x.label<- "Test Result" y.label<- "Sensitivity/Specificity" graphfile<- paste(fpath, "tmp/", filename, "_2GROC.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 8) plot(x = cp.list, y = Se[, 1], type = "l", col = dark.cols[2], xlab = x.label, ylab = y.label, main = Title, lty = "dashed", lwd = 1.5) lines(x = cp.list, y = Sp[, 1], col = dark.cols[3], lwd = 1.5) legend("right", legend = c("Sensitivity", "Specificity"), col = dark.cols[2:3], cex = 0.8, lty = c("dashed", "solid"), lwd = 1.5) CloseGraphOutput("R") # plot Youden's index and efficiency Title<-c("Youden's J and test efficiency") x.label<- "Cut-off value" y.label<- "J/Efficiency" graphfile<- paste(fpath, "tmp/", filename, "_J.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 8) plot(x = cp.list, y = J, type = "l", col = dark.cols[1], xlab = x.label, ylab = y.label, main = Title, ylim = c(0,1)) for (l in 1:length(prev.vals)) { lines(x = cp.list, y = efficiency[, l], col = dark.cols[l+1]) } #endof l loop legend("bottomright", legend = c("Youden's J", paste("Eff. at P =", prev.vals)), col = dark.cols, cex = 0.6, lty = 1) CloseGraphOutput("R") # plot mis-classification cost term Title<-c("Mis-classification Cost Term") x.label<- "Cut-off value" y.label<- "MCT" graphfile<- paste(fpath, "tmp/", filename, "_MCT.png", sep="") OpenGraphOutput(graphfile, pointsize = 12, ht = 10, wd = 7) par(mfrow = c(ceiling(length(prev.vals)/2), 2)) for (g in 1:length(prev.vals)) { plot(x = cp.list, y = MCT[, g, 1], type = "l", col = dark.cols[1], xlab = x.label, ylab = y.label, main = Title, ylim = c(0, max(MCT[,g,]))) for (l in 2:length(rel.cost)) { lines(x = cp.list, y = MCT[, g, l], col = dark.cols[l]) } #endof l loop legend("topright", legend = dimnames(MCT)[[3]], col = dark.cols, cex = 0.8, lty = 1) mtext(paste("Prevalence =", prev.vals[g]), side = 3, line = 0.5, cex = 0.7) } # end of g loop CloseGraphOutput("R") # html output d1<- paste(substr(date(), 1, 10), substr(date(), 20,24), " @", substr(date(), 11, 16)) # reformat headers output<- paste("

Sensitivity and specificity and ROC Analysis for", test.name, "

\n") output<- paste(output, "

Results

\n") # cat("
", d1) # start building HTML output output<- paste(output, "

", "Analysed: ", d1, "

\n", sep="") output<- paste(output, "

Summary of testing data

\n", sep="") tmp1<- HTMLStream(result.summary, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left", digits = digits) tmp2<- "

Summary graphs

\n" output<- paste(output, tmp1, tmp2, sep="") temp<- array("", dim = c(1, 4)) colnames(temp)<- c("Density curve of
all test results", "Histogram of test results
by disease status", "Boxplots of test results
by disease status", "Stripchart of test results
by disease status") graph<- paste(fpath, "tmp/", filename, "_density.png", sep="") # img.url<- paste(filename, "_density.png", sep="") temp[1]<- paste("\n", sep="") # graph<- paste(fpath, "tmp/", filename, "_barplot.png", sep="") # img.url<- paste(filename, "_barplot.png", sep="") temp[2]<- paste("\n", sep="") # graph<- paste(fpath, "tmp/", filename, "_boxplot.png", sep="") # img.url<- paste(filename, "_boxplot.png", sep="") temp[3]<- paste("\n", sep="") # graph<- paste(fpath, "tmp/", filename, "_stripchart.png", sep="") # img.url<- paste(filename, "_boxplot.png", sep="") temp[4]<- paste("\n", sep="") # output<- paste(output, HTMLStream(temp,Border = 0,classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", digits = 2, append = TRUE, align = "", caption = "", captionalign = "", classcaption = "", classtable = "", cellalign = "", cellborder = 0)) # # summary of sensitivity and specificity output<- paste(output, "

Summary of Sensitivity and Specificity

\n", sep="") tmp<- "

Cut-point results for target Sensitivity

\n" tmp1<- HTMLStream(Se.cp, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left", digits = digits) tmp2<- "

Cut-point results for target Specificity

\n" tmp3<- HTMLStream(Sp.cp, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left", digits = digits) tmp4<- "

Sensitivity and Specificity graph

\n" graph<- paste(fpath, "tmp/", filename, "_2GROC.png", sep="") # img.url<- paste(filename, "_density.png", sep="") tmp5<- paste("
\n", sep="") # output<- paste(output, tmp, tmp1, tmp2, tmp3, tmp4, tmp5, sep="") # graph of ROC analysis output<- paste(output, "

Graph of ROC Curve

\n", sep="") graph<- paste(fpath, "tmp/", filename, "_ROC.png", sep="") # img.url<- paste(filename, "_density.png", sep="") tmp<- paste("
\n", sep="") # tmp1<- paste("

Area under ROC curve

\n", HTMLStream(auc, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left", digits = digits), sep = "") # tmp2<- "

Graph of Youden's J and test Efficiency

\n" graph<- paste(fpath, "tmp/", filename, "_J.png", sep="") tmp3<- paste("
\n", sep="") # tmp3a<- paste("

Cut-points to maximise Youden's J and test Efficiency

", HTMLStream(max.effic, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left", digits = digits)) tmp4<- "

Graph of Mis-classification Cost Term

\n" graph<- paste(fpath, "tmp/", filename, "_MCT.png", sep="") tmp5<- paste("
\n", sep="") # output<- paste(output, tmp, tmp1, tmp2, tmp3, tmp3a, tmp4, tmp5, sep="") output<- paste(output, "

Cut-points to minimise Mis-classification Cost Term

\n") for (p in 1:length(prev.vals)) { output<- paste(output, "
", colnames(MCT)[p], "
", HTMLStream(min.MCT[p,,], cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left", digits = digits)) } # cat("
", conf) # Detailed results file.name<- file(paste(tmp.file, "_Se_Sp_details.xls", sep = ""), open = "wt") writeLines(c("Detailed Sensitivity and Specificity results", paste("Test =", test.name), date(), ""), con = file.name) write.table(Se.Sp.list, file = file.name, sep = "\t", append = T, col.names = NA) writeLines("", con = file.name) writeLines("Mis-classification cost estimates", con = file.name) for (p in 1:length(prev.vals)) { writeLines(paste("Prevalence =", prev.vals[p]), con = file.name) temp<- cbind(cp.list, MCT[, p,]) colnames(temp)[1]<- "Cut-point" write.table(temp, file = file.name, sep = "\t", append = T, col.names = NA) writeLines("", con = file.name) } # end of p loop writeLines("", con = file.name) writeLines("Original data", con = file.name) write.table(rd, file = file.name, sep = "\t", append = T, col.names = NA) close(file.name) output<- paste(output, "

Download detailed results

\n") output<- paste(output, "Detailed results
\n", sep = "") sink() # write output to file file.name<- file(paste(tmp.file, "_result.html", sep = ""), open = "wt") cat(output, file=file.name) #) cat(output)