ROC analysis

This utility calculates test sensitivity and specificity for a test producing a continuous outcome. Suggested cut-points are calculated for a range of target values for sensitivity and specificity. A ROC curve and two-grah ROC curve are generated and Youden's index (J and test efficiency (for selected prevalence values (are also calculated).

Inputs are the desired level of confidence in the resulting sensitivity and specificity estimates and two columns of data for analysis.

Data required is a series of test results for both infected and uninfected individuals. This data can be pasted in either of two formats:

Stacked - the first column contains status identifiers as either "Infected" or "Uninfected" and the second column contains the corresponding test result; or
Unstacked - separate columns contain test results for infected and uninfected individuals. Column order is unimportant but columns must be labelled appropriately as "Infected" or "Uninfected" in a header row.

Regardless of the format used, the first row must contain column headers. Additional columns of data will be ignored.

Outputs include:

numerical and graphical summaries of testing results for both infected and uninfected groups;
cut-point values to achieve minimum target vaues for both sensitivity and specificity along with corresponding estimates and Wilson binomial confidence intervals;
one- and two-graph ROC curves, with estimated AUC for the one-graph curve;
Area under the ROC curve (AUC) and associated DeLong confidence limits and Z test. See DeLong et al. (1988). Comparing the areas under two or more correlated receiver operating characteristic curves: a nonparametric approach. Biometrics 44:837-845;
graphs of Youden's J Index and test efficiency for a range of prevalence values;
graphs of mis-classification cost terms for a range of prevalence values and relative costs of false negative/false positive; and
detailed sensitivity and specificity results in a downloadable spreadsheet file.

For more information, see Greiner, M, Pfeiffer, D and Smith, RD (2000). Principles and practical application of the receiver-operating characteristic analysis for diagmostic tests. Preventive Veterinary Medicine 45:23-41.

Download example data here.

No results

No example available
ROC_Data_Demo.xls

No references available

				#########################################################################3333
# ROC Calculator
#########################################################################3333

# check version and load header script
# cat(5)
rm(list = ls())
test<- ifelse(length(commandArgs()) < 3, TRUE, FALSE)
fpath<- ifelse(test, "webRootUrl", "rtoolsPath")
# load header scripts
# cat(10)
  source(paste(fpath, "R/epi_head.R", sep = ""))
  source(paste(fpath, "R/HTMLStream.R", sep = ""))
  source(paste(fpath, "R/epitools_functions.r", sep = ""))
# cat(15)

# extract command arguments
    a1<- type.convert(a0[8:9])

# cat(a0)
# 
filename<- digest(Sys.time())
tmp.path<- paste(fpath, "tmp/", sep = "")
tmp.file<- paste(fpath, "tmp/", filename, sep = "")
sinkfile<- paste(fpath, "tmp/", filename, ".txt", sep="")   # 
  
# cat("
", 27)
   sink(sinkfile)

stacked<- ifelse(test, 1, a1[1])           # format of data - 1 = stacked, 0 = unstacked
conf<- ifelse(test, 0.95, a1[2])
test.name<- ifelse(test, "ELISA", a0[10])              #test name
# test.name<- "test name"
data.file<- ifelse(test, paste(fpath, "docs/ROC_Data_Demo.txt", sep = ""), a0[11])              #"ELISA_ROC_Data_Unstacked.csv"
# data.file<- paste(fpath, "ELISA_ROC_Data_Unstacked.csv", sep = "")
digits<- 3
target.vals<- c(0.999, 0.995, 0.99, 0.98, 0.95, 0.9, 0.8)
prev.vals<- c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5)
rel.cost<- c(0.1, 0.2, 0.5, 1, 2, 5, 10)     # relative cost of False negatives compared to false positives
dark.cols<- c("darkblue", "red", "darkgreen", "brown", "purple", "darkorange", "aquamarine4", "maroon4", "black", "salmon", "cyan4", "darkviolet")
light.cols<- c("lightblue", "pink", "lightgreen", "khaki", "plum2", "yellow", "aquamarine", "orchid1", "gray", "moccasin", "lightcyan2", "violet")
label.names<- c("Infected", "Uninfected")
# cat(a1)

# read testing data
  roc.data<- read.csv(data.file, as.is = 1)
# cat("
", 50)

if (stacked == 1) {
  roc.data[tolower(roc.data[,1]) == tolower(label.names[1]),1]<- label.names[1]
  roc.data[tolower(roc.data[,1]) == tolower(label.names[2]),1]<- label.names[2]
    infected<- roc.data[which(roc.data[,1] == label.names[1]),2]
    uninfected<- roc.data[which(roc.data[,1] == label.names[2]),2]
    all.results<- roc.data[,2]
    rd<- roc.data
} else {
    infected<- na.omit(roc.data$Infected)
    uninfected<- na.omit(roc.data$Uninfected)
    r.labels<- c(rep(label.names[1], length(infected)), rep(label.names[2], length(uninfected)))
    all.results<- c(infected, uninfected)
    rd<- data.frame(r.labels, all.results)
} # end of if/else
    colnames(rd)[1:2]<- c("Status", "Result")

n.inf<- length(infected)
n.uninf<- length(uninfected)
if (n.inf == 0) {
  sink()
  cat("No infected units in the data so can't do analysis.")
  quit()
}
if (n.uninf == 0) {
  sink()
  cat("No uninfected units in the data so can't do analysis.")
  quit()
}
roc.table<- table(rd[,2], rd[,1])
cum.count<- apply(roc.table, FUN = cumsum, MARGIN = 2)
tmp.inf<- n.inf
tmp.uninf<- 0
for (i in 2:nrow(cum.count)) {
    tmp.inf[i]<- n.inf - cum.count[i-1, 1]
    tmp.uninf[i]<- cum.count[i-1, 2]
} # end of i loop
cum.count[ ,1]<- tmp.inf
cum.count[ ,2]<- tmp.uninf

cp.list<- rownames(cum.count)
Se<- array(0, dim = c(length(cp.list), 3))
Sp<- array(0, dim = c(length(cp.list), 3))
efficiency<- array(0, dim = c(length(cp.list), length(prev.vals)))
colnames(efficiency)<- prev.vals
colnames(Se)<- c("Sensitivity", paste("Se Lower ", conf*100, "% CL", sep = ""), paste("Se Upper ", conf*100, "% CL", sep = ""))
colnames(Sp)<- c("Specificity", paste("Sp Lower ", conf*100, "% CL", sep = ""), paste("Sp Upper ", conf*100, "% CL", sep = ""))
MCT<- array(0, dim = c(length(cp.list), length(prev.vals), length(rel.cost)))
colnames(MCT)<- paste("Prev =", prev.vals)
dimnames(MCT)[[3]]<- paste("FN/FP =", rel.cost)

# calc Sensitivity
wilson.conf<- binom.wilson(cum.count[,1], n.inf, conf)
Se[, 1]<- wilson.conf$proportion
Se[, 2]<- wilson.conf$lower
Se[, 3]<- wilson.conf$upper

# calc Specificity
wilson.conf<- binom.wilson(cum.count[,2], n.uninf, conf)
Sp[, 1]<- wilson.conf$proportion
Sp[, 2]<- wilson.conf$lower
Sp[, 3]<- wilson.conf$upper

J<- Se[, 1] + Sp[, 1] - 1
for (p in 1:length(prev.vals)) {
    efficiency[,p]<- prev.vals[p] * Se[,1] + (1 - prev.vals[p]) * Sp[,1]
    for (r in 1:length(rel.cost)) {
        MCT[, p, r]<- rel.cost[r]*prev.vals[p]*(1 - Se[,1]) + (1 - prev.vals[p])*(1 - Sp[,1])
    } # end of r loop
} # end of p loop
# library(pROC)
roc.lst<- roc(rd$Status == "Infected", rd$Result)
auc<- array("", dim = c(2, 1))
rownames(auc)<- c("Area under curve (AUC)", paste(conf*100, "% CI for AUC", sep = ""))
auc.ci<- round(ci.auc(roc.lst, conf.level = conf), digits)
auc[1, 1]<- auc.ci[2]
auc[2,1]<- paste(auc.ci[1], auc.ci[3], sep = "-")

z.conf<- qnorm(1 - (1 - conf)/2, 0, 1)        # calculate corrsponding (2-tail) z value
rd$Rank<- rank(rd$Result)
R<- sum(rd$Rank[rd$Status == "Uninfected"])
E.R<- 0.5*n.uninf*(n.uninf+n.inf+1)
var.R<- n.uninf*n.inf*var(rd$Rank)/(n.uninf+n.inf)
z<- (R-E.R)/sqrt(var.R)
p<- pnorm(z, lower = T)

# cat("
", R)
# cat("
AUC:", auc)
cp.list<- as.numeric(cp.list)

Se.Sp.list<-  cbind(cp.list, Se, Sp, J, efficiency)
colnames(Se.Sp.list)<- c("Cut-point", colnames(Se), colnames(Sp),
                       c("Youden's J", paste("Eff. at P =", prev.vals)))

Se.cp<- array(0, dim = c(length(target.vals), 7))
Sp.cp<- array(0, dim = c(length(target.vals), 7))
# cat("
", class(cp.list), mode(cp.list))
# cat(test)


for (cp in 1:length(target.vals)) {
#cp<- 1
#    tmp<- sort(cp.list[which(Sp[, 1] <= target.vals[cp])])
#    Sp.cp[cp, 1]<- tmp[length(tmp)]
    Sp.cp[cp, 1]<- min(cp.list[which(Sp[, 1] >= target.vals[cp])])
#    cat("
", Sp.cp[cp, 1])
    Sp.cp[cp, 2:4]<- Sp[match(Sp.cp[cp, 1], cp.list), ]
    Sp.cp[cp, 5:7]<- Se[match(Sp.cp[cp, 1], cp.list), ]
#    tmp<- cp.list[which(Se[, 1] >= target.vals[cp])]
#    Se.cp[cp, 1]<- tmp[length(tmp)]
#    cat("
", Se.cp[cp, 1])
    Se.cp[cp, 1]<- max(cp.list[which(Se[, 1] >= target.vals[cp])])
    Se.cp[cp, 2:4]<- Se[match(Se.cp[cp, 1], cp.list), ]
    Se.cp[cp, 5:7]<- Sp[match(Se.cp[cp, 1], cp.list), ]
} # end of cp loop
Se.cp<- cbind(target.vals, Se.cp)
colnames(Se.cp)<- c("Target Se", "Cut-point", colnames(Se), colnames(Sp))
Sp.cp<- cbind(target.vals, Sp.cp)
colnames(Sp.cp)<- c("Target Sp", "Cut-point", colnames(Sp), colnames(Se))

inf.summary<- Summarise(infected, digits, pc = c(0.05, 0.25, 0.5, 0.75, 0.95))
uninf.summary<- Summarise(uninfected, digits, pc = c(0.05, 0.25, 0.5, 0.75, 0.95))
result.summary<- rbind(inf.summary[1:10], uninf.summary[1:10])
colnames(result.summary)<- c("Min", "5%", "25%", "Median", "75%", "95%", "Max", "Mean", "Std. deviation", "Count")
rownames(result.summary)<- label.names
#    cat("
", mode(Se.cp))

# maximise J and Efficiency
max.effic<- array(0, dim = c(length(prev.vals)+1, 3))
rownames(max.effic)<- c("Youden's J", paste("Eff. at P =", prev.vals))
colnames(max.effic)<- c("Cut-point", "Sensitivity", "Specificity")
for (j in 1:(length(prev.vals)+1)) {
    tmp<- Se.Sp.list[which(Se.Sp.list[, j+7] == max(Se.Sp.list[, j+7])), ]
    max.effic[j, 1:2]<- tmp[1:2]
    max.effic[j, 3]<- tmp[5]
}

# minimise MCT
min.MCT<- array(0, dim = c(length(prev.vals), 4, length(rel.cost)))
rownames(min.MCT)<- paste("P =", prev.vals)
colnames(min.MCT)<- c("Cut-point", "MCT", "Sensitivity", "Specificity")
# cat("
", colnames(MCT))
dimnames(min.MCT)[[3]]<- dimnames(MCT)[[3]]
for (p in 1:(length(prev.vals))) {
# p<- 1
for (r in 1:(length(rel.cost))) {
# r<- 1
    tmp<- which.min(MCT[, p, r])
    min.MCT[p, 1, r]<- cp.list[tmp]
    min.MCT[p, 2, r]<- MCT[tmp, p, r]
    min.MCT[p, 3, r]<- Se.Sp.list[tmp, 2]
    min.MCT[p, 4, r]<- Se.Sp.list[tmp, 5]
}  # 
}
#  cat("
", 200)

# density curve of all results
  Title<-c("Density curve of all results")
  x.label<- "Test Result"
  graphfile<- paste(fpath, "tmp/", filename, "_density.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 6)
  tmp<- density(all.results)
  plot(density(all.results), main = Title, col = dark.cols[1], xlab = x.label, 
           xlim = c(min(all.results), max(all.results)), ylim = c(0, max(tmp$y)))
  CloseGraphOutput("R")

# cat("
", test)
# bar plot of infected and uninfected
  Title<- c("Distribution of test results for infected", "Distribution of test results for uninfected")
  x.label<- "Test Result"
  tmp1<- hist(infected, plot = F)
  tmp2<- hist(uninfected, plot = F)
  ymax<- max(tmp1$counts, tmp2$counts)
  graphfile<- paste(fpath, "tmp/", filename, "_barplot.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 8, wd = 6)
  par(mfrow = c(2, 1))
  hist(infected, col = light.cols[2], main = Title[1], border = dark.cols[2], xlim = range(all.results), ylim = c(0, ymax))
  hist(uninfected, col = light.cols[3], main = Title[2], border = dark.cols[3], xlim = range(all.results), ylim = c(0, ymax))
  CloseGraphOutput("R")

# box plot of infected and uninfected
  Title<-c("Box plot of results by infection status")
  x.label<- "Test Result"
  graphfile<- paste(fpath, "tmp/", filename, "_boxplot.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 6)
  boxplot(infected, uninfected, ylab = x.label, border = "black",         #dark.cols[2:1],  light.cols[2:1]
          col = "white", main = Title, names = label.names)
  CloseGraphOutput("R")

# strip plot of infected and uninfected
  Title<-c("Strip chart of results by infection status")
  x.label<- "Test Result"
  graphfile<- paste(fpath, "tmp/", filename, "_stripchart.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 6)
  stripchart(rd$Result ~ rd$Status, xlab = x.label, col = dark.cols[2:1],
          method = "jitter", jitter = 0.4, main = Title, vertical = T,
          pch = 1, cex = 0.8, ylab = "Test result")
  CloseGraphOutput("R")

# roc curve
  Title<-c("ROC Curve")
  x.label<- "False positive rate (1 - Specificity)"
  y.label<- "True positive rate (Sensitivity)"
  graphfile<- paste(fpath, "tmp/", filename, "_ROC.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 8)
  plot(1-Sp[, 1], Se[, 1], type = "l", col = dark.cols[1], xlab = x.label,
           ylab = y.label, main = Title)
#  plot(perf1, type = "l", col = dark.cols[1], xlab = x.label,
#           ylab = y.label, main = Title)
  segments(0,0, 1,1, col = dark.cols[2])
  CloseGraphOutput("R")


# 2-graph roc curve
  Title<-c("Two graph ROC Curve")
  x.label<- "Test Result"
  y.label<- "Sensitivity/Specificity"
  graphfile<- paste(fpath, "tmp/", filename, "_2GROC.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 8)
  plot(x = cp.list, y = Se[, 1], type = "l", col = dark.cols[2], xlab = x.label,
           ylab = y.label, main = Title, lty = "dashed", lwd = 1.5)
  lines(x = cp.list, y = Sp[, 1], col = dark.cols[3], lwd = 1.5)
  legend("right", legend = c("Sensitivity", "Specificity"), col = dark.cols[2:3],
          cex = 0.8, lty = c("dashed", "solid"), lwd = 1.5)
  CloseGraphOutput("R")

# plot Youden's index and efficiency
  Title<-c("Youden's J and test efficiency")
  x.label<- "Cut-off value"
  y.label<- "J/Efficiency"
  graphfile<- paste(fpath, "tmp/", filename, "_J.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 6, wd = 8)
  plot(x = cp.list, y = J, type = "l", col = dark.cols[1], xlab = x.label,
           ylab = y.label, main = Title, ylim = c(0,1))
  for (l in 1:length(prev.vals)) {
      lines(x = cp.list, y = efficiency[, l], col = dark.cols[l+1])
  } #endof l loop
  legend("bottomright", legend = c("Youden's J", paste("Eff. at P =", prev.vals)), col = dark.cols,
          cex = 0.6, lty = 1)
  CloseGraphOutput("R")


# plot mis-classification cost term                     
  Title<-c("Mis-classification Cost Term")
  x.label<- "Cut-off value"
  y.label<- "MCT"
  graphfile<- paste(fpath, "tmp/", filename, "_MCT.png", sep="")
  OpenGraphOutput(graphfile, pointsize = 12, ht = 10, wd = 7)
  par(mfrow = c(ceiling(length(prev.vals)/2), 2))
  for (g in 1:length(prev.vals)) {
      plot(x = cp.list, y = MCT[, g, 1], type = "l", col = dark.cols[1], xlab = x.label,
               ylab = y.label, main = Title, ylim = c(0, max(MCT[,g,])))
      for (l in 2:length(rel.cost)) {
          lines(x = cp.list, y = MCT[, g, l], col = dark.cols[l])
      } #endof l loop
      legend("topright", legend = dimnames(MCT)[[3]], col = dark.cols,
              cex = 0.8, lty = 1)
      mtext(paste("Prevalence =", prev.vals[g]), side = 3, line = 0.5, cex = 0.7)
  } # end of g loop
  CloseGraphOutput("R")

# html output
d1<- paste(substr(date(), 1, 10), substr(date(), 20,24), " @", substr(date(), 11, 16))
# reformat headers
output<- paste("
Sensitivity and specificity and ROC Analysis for", test.name, "
\n")
output<- paste(output, "
Results
\n")

# cat("
", d1)
# start building HTML output
output<- paste(output, "", "Analysed: ", d1, "\n", sep="")
output<- paste(output, "Summary of testing data\n", sep="")
tmp1<- HTMLStream(result.summary, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left", digits = digits)
tmp2<- "Summary graphs
\n"
output<- paste(output, tmp1, tmp2, sep="")
temp<- array("", dim = c(1, 4))
colnames(temp)<- c("Density curve of
 all test results", 
                 "Histogram of test results
 by disease status", 
                 "Boxplots of test results
 by disease status",
                 "Stripchart of test results
 by disease status")
graph<- paste(fpath, "tmp/", filename, "_density.png", sep="")
# img.url<- paste(filename, "_density.png", sep="")
temp[1]<- paste("\n", sep="")    #
graph<- paste(fpath, "tmp/", filename, "_barplot.png", sep="")
# img.url<- paste(filename, "_barplot.png", sep="")
temp[2]<- paste("\n", sep="")    #
graph<- paste(fpath, "tmp/", filename, "_boxplot.png", sep="")
# img.url<- paste(filename, "_boxplot.png", sep="")
temp[3]<- paste("\n", sep="")    #
graph<- paste(fpath, "tmp/", filename, "_stripchart.png", sep="")
# img.url<- paste(filename, "_boxplot.png", sep="")
temp[4]<- paste("\n", sep="")    #
output<- paste(output, HTMLStream(temp,Border = 0,classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  digits = 2, append = TRUE, align = "", caption = "", captionalign = "", classcaption = "", classtable = "", cellalign = "", cellborder = 0))    #

# summary of sensitivity and specificity
output<- paste(output, "
Summary of Sensitivity and Specificity\n", sep="")
tmp<- "Cut-point results for target Sensitivity\n"
tmp1<- HTMLStream(Se.cp, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left", digits = digits)
tmp2<- "Cut-point results for target Specificity\n"
tmp3<- HTMLStream(Sp.cp, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left", digits = digits)
tmp4<- "Sensitivity and Specificity graph\n"
graph<- paste(fpath, "tmp/", filename, "_2GROC.png", sep="")
# img.url<- paste(filename, "_density.png", sep="")
tmp5<- paste("
\n", sep="")    #
output<- paste(output, tmp, tmp1, tmp2, tmp3, tmp4, tmp5, sep="")

# graph of ROC analysis
output<- paste(output, "Graph of ROC Curve\n", sep="")
graph<- paste(fpath, "tmp/", filename, "_ROC.png", sep="")
# img.url<- paste(filename, "_density.png", sep="")
tmp<- paste("
\n", sep="")    #
tmp1<-  paste("Area under ROC curve\n", HTMLStream(auc, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left", digits = digits), sep = "")   # 
tmp2<- "Graph of Youden's J and test Efficiency\n"
graph<- paste(fpath, "tmp/", filename, "_J.png", sep="")
tmp3<- paste("
\n", sep="")    #
tmp3a<- paste("Cut-points to maximise Youden's J and test Efficiency", HTMLStream(max.effic, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left", digits = digits))

tmp4<- "Graph of Mis-classification Cost Term\n"
graph<- paste(fpath, "tmp/", filename, "_MCT.png", sep="")
tmp5<- paste("
\n", sep="")    #
output<- paste(output, tmp, tmp1, tmp2, tmp3, tmp3a, tmp4, tmp5, sep="")

output<- paste(output, "Cut-points to minimise Mis-classification Cost Term\n")
for (p in 1:length(prev.vals)) {
    output<- paste(output, "", colnames(MCT)[p], "", HTMLStream(min.MCT[p,,], cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left", digits = digits))
}

# cat("
", conf)
# Detailed results

    file.name<- file(paste(tmp.file, "_Se_Sp_details.xls", sep = ""), open = "wt")
    writeLines(c("Detailed Sensitivity and Specificity results", paste("Test =", test.name), date(), ""), con = file.name)
        write.table(Se.Sp.list, file = file.name, sep = "\t", append = T, col.names = NA)
        writeLines("", con = file.name)
        writeLines("Mis-classification cost estimates", con = file.name)
        for (p in 1:length(prev.vals)) {
            writeLines(paste("Prevalence =", prev.vals[p]), con = file.name)
            temp<- cbind(cp.list, MCT[, p,])
            colnames(temp)[1]<- "Cut-point"
            write.table(temp, file = file.name, sep = "\t", append = T, col.names = NA)
            writeLines("", con = file.name)
        } # end of p loop
        writeLines("", con = file.name)
        writeLines("Original data", con = file.name)
        write.table(rd, file = file.name, sep = "\t", append = T, col.names = NA)
    close(file.name)
output<- paste(output, "Download detailed results\n")
output<- paste(output, "Detailed results
\n", sep = "")

sink()
# write output to file
file.name<- file(paste(tmp.file, "_result.html", sep = ""), open = "wt")
cat(output, file=file.name)             #)
cat(output)