#set up working directory in R setwd("C:\\Users\\authorized user\\Dropbox\\STAT445\\week1\\tutorials") getwd() #Read data from .csv (comma-separated values) file directly into R (MLB<-read.csv("Major League Baseball Main Stats.csv")) MLB[,2]=as.numeric(MLB[,2]) #print data set that was imported into R MLB #check if MLB is matrix is.matrix(MLB) #FALSE #check if MLB is data.frame object is.data.frame(MLB) #TRUE #turn MLB into a matrix object #MLB=as.matrix(MLB) #calculate mean vector (mean for each of the columns in MLB respectively) #round it to the second decimal place #and print the results (round(colMeans(MLB[,2:6]),2)) #calculate variance-covariance matrix for the columns of the MLB #that are of numeric type (columns from 2 to 6) #and print the results rounded to the second decimal place (round(cov(MLB[,2:6]),2)) #calculate correlation matrix for the columns of the MLB #that are of numeric type (columns from 2 to 6) #and print the results rounded to the second decimal place (round(cor(MLB[,2:6]),2)) #all pairs of two-variable scatterplots for #the numeric variables in the MLB data set pairs(MLB[,2:6]) #all pairs of two-variable scatterplots for #the numeric variables in the MLB data set #included smoothed line in each of the scatterplots pairs(MLB[,2:6],panel=panel.smooth) #function panel.cor #that will create entries in the lower-left triangle below the diagonal #containing estimates of the correlation coefficients panel.cor <- function(x, y, digits=2, prefix="", cex.cor) { #graphics setup usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex <- 0.8/strwidth(txt) #hypothesis testing with null hypothesis: #"true correlation is equal to zero" test <- cor.test(x,y) # borrowed from printCoefmat #symbolic number coding Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", " ")) #add the exact correlation coefficients to #the lower-left triangle below the diagonal text(0.5, 0.5, txt, cex = cex * r) text(.8, .8, Signif, cex=cex, col=2) } #all pairs of two-variable scatterplots for #the numeric variables in the MLB data set #with the entries in the lower-left triangle below the diagonal #containing estimates of the correlation coefficients #panel.cor function will be executed for each pair pairs(MLB[,2:6],panel=panel.smooth,lower.panel=panel.cor)