Random sampling from a sampling frame
Select a random sample from a sampling frame, for example of all farms or villages in a district or region. Sampling may be carried out with the following options:
- Simple Random Sampling (SRS) or Probability Proportional to Size (PPS);
- With or without replacement;
- Stratified or unstratified; or
- From the entire sampling frame or from a sub-group based on specified selection criteria.
For PPS sampling you must enter the column number containing cluster sizes for PPS calculations. For stratified sampling, you must enter the column number on which to stratify. For sub-group sampling, you must enter the column number on which to select, as well as the comparison condition and value for sub-group selection. If a sub-group of data is to be selected it must be on a numeric field. If the data is not numeric it will need to be recoded so that it is.
Download and check out the example data if you are unsure what is required.
No results
No references available
###################################### # Program to do random village/farm sampling ###################################### # x<- 1 rm(list = ls()) # cat("
Test:",length(commandArgs())) test<- ifelse(length(commandArgs()) == 2, TRUE, FALSE) fpath<- ifelse(test, "webRootUrl", "rtoolsPath") # load header scripts source(paste(fpath, "R/epi_head.R", sep = "")) source(paste(fpath, "R/HTMLStream.R", sep = "")) source(paste(fpath, "R/epitools_functions.r", sep = "")) # extract command arguments # 1 = sample size, 2 = SRS, 3 = PPS field #, 4 = replace, 5 = stratify (T/F), 6 = stratify field #, # 7 = subgroup (T/F), 8 = subgroup filed #, 9 = condition, 10 = group identifier # cat("
", a0[8:17]) a1<- type.convert(a0[8:17]) # cat(a0, "
") # cat("
", a1) n<- a1[1] # sample size SRS<- a1[2] # sampling type PPS.field<- a1[3] # field for pps sampling replacement<- a1[4] # sample with/without replacement stratify<- a1[5] # use stratification strat.field<- a1[6] # stratification field subgroup<- a1[7] # select subgroup grp.field<- a1[8] # subgroup field grp.cond<- a1[9] # subgroup conditional grp.id<- a1[10] # subgroup identifier comp.list<- c("==", ">", ">=", "<", "<=", "!=") comp<- comp.list[grp.cond] heading<- "Random sampling results" filename<- digest(Sys.time()) data.file<- ifelse(test, paste(fpath, "docs/RandomVillageDemo.txt", sep = ""), a0[length(a0)]) #test name tmp.path<- paste(fpath, "tmp/", sep = "") tmp.file<- paste(fpath, "tmp/", filename, sep = "") sinkfile<- paste(fpath, "tmp/", filename, ".txt", sep="") # fpath, # cat("
", data.file) size<- file.info(data.file)$size # cat("
", size) if (size == 0) { cat("
No Data entered") quit() } # cat("
", 52) if (!SRS && PPS.field == -1) { cat("
Column number for PPS sampling not entered") quit() } # cat("
", 57) if (stratify && strat.field == -1) { cat("
Column number for stratification not entered") quit() } # cat("
", 62) if (subgroup && grp.field == -1) { cat("
Column number for sub-grouping not entered") quit() } # cat("
", 67) if (subgroup && grp.id == -1) { cat("
Comparison value for sub-grouping not entered") quit() } # cat("
", 72) # data.list<- read.delim(data.file, header = TRUE, sep = ",") data.list<- read.csv(data.file) # cat(nrow(data.list)) inputs<- array(a1[1:9], dim = c(8, 1)) rownames(inputs)<- c("Sample size", "Sampling strategy", "PPS Field", "With/Without replacement", "Stratification", "Stratification field", "Select a sub-group", "Sub-group condition") inputs[2]<- ifelse(a1[2], "Simple Random Sampling", "Probability Proportional to Size") inputs[3]<- ifelse(a1[2], "", colnames(data.list)[a1[3]]) inputs[4]<- ifelse(a1[4], "Sampling with replacement", "Sampling without replacement") inputs[5]<- ifelse(a1[5], "Stratified", "Not Stratified") inputs[6]<- ifelse(a1[5], colnames(data.list)[a1[6]], "") inputs[7]<- ifelse(a1[7], "Yes", "No") inputs[8]<- ifelse(a1[7], paste(colnames(data.list)[a1[8]], comp, grp.id), "") inputs[inputs == ""]<- "N/A" # function to do simple random sampling # select sr sample from a dataframe with or without replacement srs<- function(dat, n, replacement) { x<- dat[sample(nrow(dat), n, replace = replacement),] return(x) } # end of srs # function to do PPS sampling PPS.sample<- function(sub.list, PPS.field, replacement, n.strat, l) { # set up weighted list wgt.list<- 0 for (r in 1:nrow(sub.list[[l]])) { wgt.list<- c(wgt.list, rep(r, sub.list[[l]][r, PPS.field])) } # end of r loop wgt.list<- wgt.list[2:length(wgt.list)] if (replacement) { # sampling with replacement id.list<- sample(wgt.list, n.strat[l], replace = replacement) } else { count<- 0 id.list<- 0 while (count < n.strat[l]) { tmp<- sample(wgt.list, 1) if (count == 0) { id.list[1]<- tmp count<- count+1 } else if (!(tmp %in% id.list)) { id.list[count+1]<- tmp count<- count+1 } # end of if/else } # end of while loop } # end of if/else replacement tmp<- sub.list[[l]][id.list,] return(tmp) } # end of PPS.sample # function to do sampling (either srs or pps) do.sample<- function(sub.list, sub.grp, replacement) { n.strat<- 0 for (l in 1:length(sub.list)) { n.strat[l]<- max(round(nrow(sub.list[[l]])*n/nrow(sub.grp), 0), 1) if (SRS) { # do SRS sampling tmp<- srs(sub.list[[l]], n.strat[l], replacement) } else { tmp<- PPS.sample(sub.list, PPS.field, replacement, n.strat, l) } if (l == 1) { ran.list<- tmp } else { ran.list<- rbind(ran.list, tmp) } } # check length of list and add/subtract as necessary if (nrow(ran.list) > n) { ran.list<- ran.list[sample(1:nrow(ran.list), n, replace = F),] } else if (nrow(ran.list) < n) { while(nrow(ran.list) < n) { tmp<- sub.grp[sample(1:nrow(sub.grp), 1),] if (!(tmp[1,1] %in% ran.list[,1])) { ran.list<- rbind(ran.list, tmp) } } # end of while loop } # end of if/else nrow return(ran.list) } # end of SR.sample # subset data if (subgroup) { sub.grp<- subset(data.list, eval(parse(text = paste("data.list[,grp.field]", comp, grp.id)))) } else { sub.grp<- data.list } # end of ifelse subgroup # stratify data if (stratify) { sub.list<- split(sub.grp, sub.grp[, strat.field]) } else { sub.list<- list(sub.grp) } # end of if/else stratify # do random sampling ran.list<- do.sample(sub.list, sub.grp, replacement) ran.list<- ran.list[order(ran.list[,1]),] # ran.list # nrow(ran.list) file.name<- file(paste(tmp.file, "_result.xls", sep = ""), open = "wt") writeLines(c(heading, date(), ""), con = file.name) writeLines("Inputs", con = file.name) write.table(inputs, file = file.name, sep = "\t", append = T, col.names = F) writeLines(c("", "List of randomly sampled units"), con = file.name) write.table(ran.list, file = file.name, sep = "\t", append = T, row.names = F) close(file.name) # output results d1<- paste(substr(date(), 1, 10), substr(date(), 20,24), " @", substr(date(), 11, 16)) # reformat headers output<- paste("", heading, "
\n") output<- paste(output, "", "Analysed: ", d1, "
\n", sep="") output<- paste(output, "Inputs
\n") output<- paste(output, HTMLStream(inputs, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left")) output<- paste(output, "Results
\n") output<- paste(output, HTMLStream(ran.list, cellborder = 0, classfirstline = "mbg", classfirstcolumn = "mbg", classcellinside = "left_mar", cellalign = "center", align = "left")) output<- paste(output, "Download excel file of results
\n") output<- paste(output, "Detailed results
\n", sep = "") # sink() # write output to file file.name<- file(paste(tmp.file, "_result.html", sep = ""), open = "wt") cat(output, file=file.name) close(file.name) #) cat(output)