setwd("C:\\Users\\authorized user\\Dropbox\\STAT445\\week10\\tutorials") data <- as.matrix(read.csv("T4-3.csv")) ############################################################ #assess univariate normality of the data ############################################################ qqnorm(data[ ,2]) qqline(data[,2]) qqnorm(data[,3]) qqline(data[,3]) ############################################################ ############################################################ # Construct the q-q plot for assesing normality of # multivariate data ############################################################ #if all marginal (univariate) distributions are normal #that does not imply that their join distribution will #be multivariate normal #assess multivariate normal by using squared Mahalanobis #distance. #that comes from t(data-mu)*sigma(^-1)(data-mu) which is #squared Mahalanobis distance (generalized distance) # and the distribution of this #is chi-square with degrees of freedom equal to the number #of variables in the multivariate data set. #So if we plot the Mahalanobis distance we should see #aplot of chi-square #and if we compare Mahalanobis distance distribution #with chi-square with degrees of freedom equal to the number #of variables in the multivariate data set we should get #data and the line matching ############################################################ ###Set the values of n (number of observation units), ###and p (the dimension of the observation vector- number of variables). n<-dim(data)[1] p<-dim(data)[2] ###Compute the mean vector and variance-covariance matrix. xbar <- apply(data,2, "mean") sx <- cov(data) ###Compute the generalized distances from the sample mean. diffs <- data - matrix(xbar, nrow=n, ncol=p, byrow=TRUE) gdist <- diag(diffs%*%solve(sx)%*%t(diffs)) ###Sort these. s.gdist <-sort(gdist) ###Find the matching list of quantiles. quant <- qchisq(((1:n)-0.5)/n, p) ###Plot the sorted generalized distances vs. the quantiles. par(mfrow=c(2,1)) plot(density(diffs),main="Distribution of Generalized distances") plot(quant, s.gdist,pch=20, main="Q-Q Plot of Generalized distances vs. Quantiles of Chi Squared") ###Add a 45-degree line. lines(quant,quant)