Random sampling from a sampling frame

Select a random sample from a sampling frame, for example of all farms or villages in a district or region. Sampling may be carried out with the following options:

Simple Random Sampling (SRS) or Probability Proportional to Size (PPS);
With or without replacement;
Stratified or unstratified; or
From the entire sampling frame or from a sub-group based on specified selection criteria.

For PPS sampling you must enter the column number containing cluster sizes for PPS calculations. For stratified sampling, you must enter the column number on which to stratify. For sub-group sampling, you must enter the column number on which to select, as well as the comparison condition and value for sub-group selection. If a sub-group of data is to be selected it must be on a numeric field. If the data is not numeric it will need to be recoded so that it is.

Download and check out the example data if you are unsure what is required.

No results

No example available
RandomClusterDemo.xls

No references available

				######################################
# Program to do random village/farm sampling
######################################

# x<- 1
rm(list = ls())

# cat("
Test:",length(commandArgs()))
test<- ifelse(length(commandArgs()) == 2, TRUE, FALSE)
fpath<- ifelse(test, "webRootUrl", "rtoolsPath")

# load header scripts
  source(paste(fpath, "R/epi_head.R", sep = ""))
  source(paste(fpath, "R/HTMLStream.R", sep = ""))
  source(paste(fpath, "R/epitools_functions.r", sep = ""))

# extract command arguments 
# 1 = sample size, 2 = SRS, 3 = PPS field #, 4 = replace, 5 = stratify (T/F), 6 = stratify field #, 
# 7 = subgroup (T/F), 8 = subgroup filed #, 9 = condition, 10 = group identifier
# cat("
", a0[8:17])
    a1<- type.convert(a0[8:17])
# cat(a0, "
")

# cat("
", a1)
n<- a1[1]           # sample size
SRS<- a1[2]           # sampling type
PPS.field<- a1[3]          # field for pps sampling
replacement<- a1[4]          # sample with/without replacement
stratify<- a1[5]           # use stratification
strat.field<- a1[6]        # stratification field
subgroup<- a1[7]       # select subgroup 
grp.field<- a1[8]        # subgroup field
grp.cond<- a1[9]        # subgroup conditional
grp.id<- a1[10]       # subgroup identifier
comp.list<- c("==", ">", ">=", "<", "<=", "!=")
comp<- comp.list[grp.cond]
heading<- "Random sampling results"

filename<- digest(Sys.time())
data.file<- ifelse(test, paste(fpath, "docs/RandomVillageDemo.txt", sep = ""), a0[length(a0)])              #test name
tmp.path<- paste(fpath, "tmp/", sep = "")
tmp.file<- paste(fpath, "tmp/", filename, sep = "")
sinkfile<- paste(fpath, "tmp/", filename, ".txt", sep="")   # fpath,
# cat("
", data.file)

size<- file.info(data.file)$size
# cat("
", size)
if (size == 0) {
  cat("
No Data entered")
  quit()
}
# cat("
", 52)
if (!SRS && PPS.field == -1) {
  cat("
Column number for PPS sampling not entered")
  quit()
}
# cat("
", 57)
if (stratify && strat.field == -1) {
  cat("
Column number for stratification not entered")
  quit()
}
# cat("
", 62)
if (subgroup && grp.field == -1) {
  cat("
Column number for sub-grouping not entered")
  quit()
}
# cat("
", 67)
if (subgroup && grp.id == -1) {
  cat("
Comparison value for sub-grouping not entered")
  quit()
}
# cat("
", 72)
# data.list<- read.delim(data.file, header = TRUE, sep = ",")
data.list<- read.csv(data.file)
# cat(nrow(data.list))

inputs<- array(a1[1:9], dim = c(8, 1))
rownames(inputs)<- c("Sample size", "Sampling strategy", "PPS Field", "With/Without replacement",
                  "Stratification", "Stratification field", "Select a sub-group",
                  "Sub-group condition")
inputs[2]<- ifelse(a1[2], "Simple Random Sampling", "Probability Proportional to Size")
inputs[3]<- ifelse(a1[2], "", colnames(data.list)[a1[3]])
inputs[4]<- ifelse(a1[4], "Sampling with replacement", "Sampling without replacement")
inputs[5]<- ifelse(a1[5], "Stratified", "Not Stratified")
inputs[6]<- ifelse(a1[5], colnames(data.list)[a1[6]], "")
inputs[7]<- ifelse(a1[7], "Yes", "No")
inputs[8]<- ifelse(a1[7], paste(colnames(data.list)[a1[8]], comp, grp.id), "")
inputs[inputs == ""]<- "N/A"

# function to do simple random sampling
# select sr sample from a dataframe with or without replacement
srs<- function(dat, n, replacement) {
    x<- dat[sample(nrow(dat), n, replace = replacement),]
    return(x)
} # end of srs 

# function to do PPS sampling
PPS.sample<- function(sub.list, PPS.field, replacement, n.strat, l) {
    # set up weighted list
    wgt.list<- 0
    for (r in 1:nrow(sub.list[[l]])) {
        wgt.list<- c(wgt.list, rep(r, sub.list[[l]][r, PPS.field]))
    } # end of r loop
    wgt.list<- wgt.list[2:length(wgt.list)]
    if (replacement) {          # sampling with replacement
        id.list<- sample(wgt.list, n.strat[l], replace = replacement)
    } else {
        count<- 0
        id.list<- 0
        while (count < n.strat[l]) {
            tmp<- sample(wgt.list, 1)
            if (count == 0) {
                id.list[1]<- tmp
                count<- count+1
            } else if (!(tmp %in% id.list)) {
                id.list[count+1]<- tmp
                count<- count+1
            } # end of if/else
        } # end of while loop
    } # end of if/else replacement
    tmp<- sub.list[[l]][id.list,]
    return(tmp)
} # end of PPS.sample

# function to do sampling (either srs or pps)
do.sample<- function(sub.list, sub.grp, replacement) {
    n.strat<- 0
    for (l in 1:length(sub.list)) {
        n.strat[l]<- max(round(nrow(sub.list[[l]])*n/nrow(sub.grp), 0), 1)
        if (SRS) {                # do SRS sampling
            tmp<- srs(sub.list[[l]], n.strat[l], replacement)
        } else {
            tmp<- PPS.sample(sub.list, PPS.field, replacement, n.strat, l)
        }
        if (l == 1) {
            ran.list<- tmp
        } else {
            ran.list<- rbind(ran.list, tmp)
        }
    }
    # check length of list and add/subtract as necessary
    if (nrow(ran.list) > n) {
        ran.list<- ran.list[sample(1:nrow(ran.list), n, replace = F),]
    } else if (nrow(ran.list) < n) {
        while(nrow(ran.list) < n) {
            tmp<- sub.grp[sample(1:nrow(sub.grp), 1),]
            if (!(tmp[1,1] %in% ran.list[,1])) {
                ran.list<- rbind(ran.list, tmp)
            }
        } # end of while loop
    } # end of if/else nrow
    return(ran.list)
} # end of SR.sample

# subset data
if (subgroup) {
    sub.grp<- subset(data.list, eval(parse(text = paste("data.list[,grp.field]", comp, grp.id))))  
} else {
    sub.grp<- data.list
} # end of ifelse subgroup

# stratify data
if (stratify) {
    sub.list<- split(sub.grp, sub.grp[, strat.field])
} else {
    sub.list<- list(sub.grp)
} # end of if/else stratify


# do random sampling
ran.list<- do.sample(sub.list, sub.grp, replacement)
ran.list<- ran.list[order(ran.list[,1]),]

# ran.list
# nrow(ran.list)

file.name<- file(paste(tmp.file, "_result.xls", sep = ""), open = "wt")
  writeLines(c(heading, date(), ""), con = file.name)
  writeLines("Inputs", con = file.name)
  write.table(inputs, file = file.name, sep = "\t", append = T, col.names = F)    
  writeLines(c("", "List of randomly sampled units"), con = file.name)
  write.table(ran.list, file = file.name, sep = "\t", append = T, row.names = F)    
close(file.name)


# output results
d1<- paste(substr(date(), 1, 10), substr(date(), 20,24), " @", substr(date(), 11, 16))
# reformat headers
output<- paste("
", heading, "
\n")
output<- paste(output, "", "Analysed: ", d1, "\n", sep="")
output<- paste(output, "
Inputs
\n")
output<- paste(output, HTMLStream(inputs, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left"))
output<- paste(output, "
Results
\n")
output<- paste(output, HTMLStream(ran.list, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar",  cellalign = "center", align = "left"))
output<- paste(output, "
Download excel file of results
\n")
output<- paste(output, "Detailed results
\n", sep = "")



# sink()
# write output to file
file.name<- file(paste(tmp.file, "_result.html", sep = ""), open = "wt")
cat(output, file=file.name)  
close(file.name)           #)
cat(output)