}
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal){
rmarkdown::render('Report.Rmd',  # file 2
output_format="all",
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".html", sep=''),
output_dir = 'reports')
})
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal){
rmarkdown::render('Report.Rmd',  # file 2
output_format="all",
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".html", sep=''),
output_dir = 'reports')
})
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal){
browser()
rmarkdown::render('Report.Rmd',  # file 2
output_format="all",
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".html", sep=''),
output_dir = 'reports')
})
paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".html", sep='')
rmarkdown::render('Report.Rmd',  # file 2
output_format="all",
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".html", sep=''),
output_dir = 'reports')
type="pdf"
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".", type, sep=''),
output_dir = 'reports')
type="pdf_document"
substr(type,1,-1+regexpr("_", type,fixed=TRUE)
)
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep=''),
output_dir = 'reports')
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal, types){
browser()
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep=''),
output_dir = 'reports')
}, type="pdf_document")
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal, types){
#browser()
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep=''),
output_dir = 'reports')
}, type="pdf_document")
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal, type){
#browser()
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep=''),
output_dir = 'reports')
}, type="pdf_document")
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal, type){
#browser()
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep=''),
output_dir = 'reports')
}, type="word_document")
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
plyr::d_ply(cereal[ cereal$mfr %in% c("K","G"),], "mfr", function (my.cereal, type){
#browser()
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file =  paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep=''),
output_dir = 'reports')
}, type="html_document")
# You can use word_document,  pdf_document, html_document etc for the type of file.
# Consult the documentation
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
select <- cereal$mfr %in% c("K","G") # we only create two reports
plyr::d_ply(cereal[select ,], "mfr", function (my.cereal, type){
#browser()
# create the output file name, but if type is word, must change the ending to docx
out.filename <- paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep='')
out.filename <- gsub("\.word$", ".docx", out.filename)
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file = out.filename ,
output_dir = 'reports')
}, type="html_document")
# You can use word_document,  pdf_document, html_document etc for the type of file.
# Consult the documentation
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
select <- cereal$mfr %in% c("K","G") # we only create two reports
plyr::d_ply(cereal[select ,], "mfr", function (my.cereal, type){
#browser()
# create the output file name, but if type is word, must change the ending to docx
out.filename <- paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep='')
out.filename <- gsub("\\.word$", ".docx", out.filename)
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file = out.filename ,
output_dir = 'reports')
}, type="html_document")
# You can use word_document,  pdf_document, html_document etc for the type of file.
# Consult the documentation
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
select <- cereal$mfr %in% c("K","G") # we only create two reports
plyr::d_ply(cereal[select ,], "mfr", function (my.cereal, type){
#browser()
# create the output file name, but if type is word, must change the ending to docx
out.filename <- paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep='')
out.filename <- gsub("\\.word$", ".docx", out.filename)
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file = out.filename ,
output_dir = 'reports')
}, type="html_document")
# You can use word_document,  pdf_document, html_document etc for the type of file.
# Consult the documentation
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
select <- cereal$mfr %in% c("K","G") # we only create two reports
plyr::d_ply(cereal[select ,], "mfr", function (my.cereal, type){
#browser()
# create the output file name, but if type is word, must change the ending to docx
out.filename <- paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep='')
out.filename <- gsub("\\.word$", ".docx", out.filename)
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file = out.filename ,
output_dir = 'reports')
}, type="word_document")
# You can use word_document,  pdf_document, html_document etc for the type of file.
# Consult the documentation
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
select <- cereal$mfr %in% c("K","G") # we only create two reports
plyr::d_ply(cereal[select ,], "mfr", function (my.cereal, type){
#browser()
# create the output file name, but if type is word, must change the ending to docx
out.filename <- paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep='')
out.filename <- gsub("\\.word$", ".docx", out.filename)
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file = out.filename ,
output_dir = 'reports')
}, type="word_document")
# You can use word_document,  pdf_document, html_document etc for the type of file.
# Consult the documentation
# Generate multiple reports from a single RMarkdown documents
# You need two files.
#   This file which select the data to be analyzed and repeatively calls the Rmarkdown file
#   The RMarkdown document which generates the report
# File 1: Should be an R-Script
# contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
# load packages
library(knitr)
library(markdown)
library(rmarkdown)
cereal <- read.csv('cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
xtabs(~mfr, data=cereal)
# create the directory for the reports
dir.create("reports")
# for each manufacturer create a report
# these reports are saved in output_dir with the name specified by output_file
select <- cereal$mfr %in% c("K","G") # we only create two reports
plyr::d_ply(cereal[select ,], "mfr", function (my.cereal, type){
#browser()
# create the output file name, but if type is word, must change the ending to docx
out.filename <- paste("report_", my.cereal$mfr[1], '_', Sys.Date(), ".",
substr(type,1,-1+regexpr("_", type,fixed=TRUE)), sep='')
out.filename <- gsub("\\.word$", ".docx", out.filename)
rmarkdown::render('Report.Rmd',  # file 2
output_format=type,
output_file = out.filename ,
output_dir = 'reports')
}, type="pdf_document")
# You can use word_document,  pdf_document, html_document etc for the type of file.
# Consult the documentation
#-------------------------------------------------------------------
# Read in the accident data and get the date and fatality variables set
accidents <- read.csv(file.path("..","sampledata","Accidents",'road-accidents-2010.csv'), header=TRUE,
as.is=TRUE, strip.white=TRUE)
setwd("~/Dropbox/Stat-R/CourseNotes/Rcode")
#-------------------------------------------------------------------
# Read in the accident data and get the date and fatality variables set
accidents <- read.csv(file.path("..","sampledata","Accidents",'road-accidents-2010.csv'), header=TRUE,
as.is=TRUE, strip.white=TRUE)
# Convert date to internal date format
accidents$mydate <- as.Date(accidents$Date, format="%d/%m/%Y")
# Create the fatality variable
accidents$Fatality <- accidents$Accident_Severity == 1
accidents[1:5,]
vehicles <- read.csv(file.path("..","sampledata","Accidents",'road-accidents-vehicles-2010.csv'), header=TRUE,
as.is=TRUE, strip.white=TRUE)
head(vehicles)
# Count number of vehicles in accident
n.vehicles <- plyr::ddply(vehicles, "Acc_Index", plyr::summarize,
n.vehicles=length(Acc_Index))
head(n.vehicles)
xtabs(~n.vehicles,data=n.vehicles)
head(vehicles)
head(n.vehicles)
str(n.vehicles)
ddply
splitter_d
library(plyr)
splitter_d
View(n.vehicles)
vehicles[1:5, 1:3]
# Odd behaviour with a string. Notice that the Acc_Index first column is read a character string
test.csv <- textConnections("
Acc_Index, Vehicle_Reference, Vehicle_Type
201001BS70003,   1,           19
201001BS70003,   2,            1
201001BS70004,   1,            9
201001BS70006,   1,           20
201001BS70006,   2,            1")
test <- read.csv(test.csv, as.is=TRUE, strip.white=TRUE)
test
str(test)
test.csv <- textConnections("
Acc_Index, Vehicle_Reference, Vehicle_Type
201001BS70003,   1,           19
201001BS70003,   2,            1
201001BS70004,   1,            9
201001BS70006,   1,           20
201001BS70006,   2,            1")
test.data <- read.csv(test.csv, as.is=TRUE, strip.white=TRUE)
test.data
test.csv <- textConnection("
Acc_Index, Vehicle_Reference, Vehicle_Type
201001BS70003,   1,           19
201001BS70003,   2,            1
201001BS70004,   1,            9
201001BS70006,   1,           20
201001BS70006,   2,            1")
test.data <- read.csv(test.csv, as.is=TRUE, strip.white=TRUE)
test.data
str(test.data)
plyr::ddply(test.data, "Acc_Index", plyr::summarize
n.veh = length(Acc_Index))
plyr::ddply(test.data, "Acc_Index", plyr::summarize,
n.veh = length(Acc_Index))
View(n.vehicles)
View(accidents)
# Odd behaviour with a string. Notice that the Acc_Index first column is read a character string
test.csv <- textConnection("
Acc_Index, Vehicle_Reference, Vehicle_Type
20100170003,   1,           19
20100170003,   2,            1
201001BS70004,   1,            9
201001BS70006,   1,           20
201001BS70006,   2,            1")
test.data <- read.csv(test.csv, as.is=TRUE, strip.white=TRUE)
test.data
str(test.data)
# but look what happens when I use ddply on it
library(plyr)
plyr::ddply(test.data, "Acc_Index", plyr::summarize,
n.veh = length(Acc_Index))
# Count number of vehicles in accident
n.vehicles <- plyr::ddply(vehicles, "Acc_Index", plyr::summarize,
n.vehicles=length(Acc_Index),
old=Acc_Index[1])
vehicles$rownum <- 1:nrow(vehicles)
# Count number of vehicles in accident
n.vehicles <- plyr::ddply(vehicles, "Acc_Index", plyr::summarize,
n.vehicles=length(Acc_Index),
where=min(rownum))
head(n.vehicles)
vehicles[ n.vehicles$where,1:5]
vehicles[ n.vehicles$where[1:5],1:5]
# here is something very odd. Look what happens to the Acc_Index variable when read in.
# It looks like a character variable, but is interpretted as a number?
vehicles <- read.csv(file.path("..","sampledata","Accidents",'road-accidents-vehicles-2010.csv'), header=TRUE,
as.is=TRUE, strip.white=TRUE)
head(vehicles)
vehicles$rownum <- 1:nrow(vehicles)
vehicles[1:5, 1:3]
# Count number of vehicles in accident
n.vehicles <- plyr::ddply(vehicles, "Acc_Index", plyr::summarize,
n.vehicles=length(Acc_Index),
where=min(rownum))
head(n.vehicles)
xtabs(~n.vehicles,data=n.vehicles)
# are there any accidents with missing data?
setdiff(accidents$Accident_Index, n.vehicles$Acc_Index)
setdiff(n.vehicles$Acc_Index, accidents$Accident_Index)
# merge with accident data. Notice key column has a different name in both files
accidents2 <- merge(accidents, n.vehicles, by.x="Accident_Index", by.y="Acc_Index")
dim(accidents2)
# compute the fatality proportion by number of vehicles
p.fatal <- plyr::ddply(accidents2, "n.vehicles", plyr::summarize,
p.fatal=mean(Fatality))
head(p.fatal)
# a plot
fatal.plot <- ggplot(data=p.fatal, aes(x=n.vehicles, y=p.fatal))+
ggtitle("Relationship between fatality proportion and # of vehicles")+
geom_point()
fatal.plot
ggsave(fatal.plot,
file=file.path("..","..","MyStuff","Images","merge-bind-pfatal-nvehicles.png"),
h=6, w=6, units="in", dpi=300)
