setwd("C:\\Users\\authorized user\\Dropbox\\STAT445\\week13\\tutorials") ############################################################################ #q-q plot #multivariate case ############################################################################ library(MASS) data=mvrnorm(n = 1000, mu=c(4.10,2.08,0.604), Sigma=matrix(c(0.025,0.0075,0.00175,0.0075,0.0070,0.00135,0.00175, 0.00135, 0.00043),nrow=3,byrow=T) ) ###Set the values of n (number of observation units), ###and p (the dimension of the observation vector- number of variables). n<-dim(data)[1] p<-dim(data)[2] ###Compute the mean vector and variance-covariance matrix. xbar <- apply(data,2, "mean") sx <- cov(data) ###Compute the generalized distances from the sample mean. diffs <- data - matrix(xbar, nrow=n, ncol=p, byrow=TRUE) gdist <- diag(diffs%*%solve(sx)%*%t(diffs)) ###Sort these. s.gdist <-sort(gdist) ###Find the matching list of quantiles. quant <- qchisq(((1:n)-0.5)/n, p) ###Plot the sorted generalized distances vs. the quantiles. plot(quant, s.gdist,pch=20, main="Q-Q Plot of Generalized distances vs. Quantiles of Chi Squared") ###Add a 45-degree line. lines(quant,quant) #change the data so that you can obtain a big positive outlier data[1,1]=10 plot(density(data[,1])) xbar <- apply(data,2, "mean") sx <- cov(data) ###Compute the generalized distances from the sample mean. diffs <- data - matrix(xbar, nrow=n, ncol=p, byrow=TRUE) gdist <- diag(diffs%*%solve(sx)%*%t(diffs)) ###Sort these. s.gdist <-sort(gdist) ###Find the matching list of quantiles. quant <- qchisq(((1:n)-0.5)/n, p) ###Plot the sorted generalized distances vs. the quantiles. plot(quant, s.gdist,pch=20, main="Q-Q Plot of Generalized distances vs. Quantiles of Chi Squared") ###Add a 45-degree line. lines(quant,quant) #add big negative outlier data[1,1]=-10 plot(density(data[,1])) xbar <- apply(data,2, "mean") sx <- cov(data) ###Compute the generalized distances from the sample mean. diffs <- data - matrix(xbar, nrow=n, ncol=p, byrow=TRUE) gdist <- diag(diffs%*%solve(sx)%*%t(diffs)) ###Sort these. s.gdist <-sort(gdist) ###Find the matching list of quantiles. quant <- qchisq(((1:n)-0.5)/n, p) ###Plot the sorted generalized distances vs. the quantiles. plot(quant, s.gdist,pch=20, main="Q-Q Plot of Generalized distances vs. Quantiles of Chi Squared") ###Add a 45-degree line. lines(quant,quant) #add mild negative outlier data[1,1]=3 plot(density(data[,1])) xbar <- apply(data,2, "mean") sx <- cov(data) ###Compute the generalized distances from the sample mean. diffs <- data - matrix(xbar, nrow=n, ncol=p, byrow=TRUE) gdist <- diag(diffs%*%solve(sx)%*%t(diffs)) ###Sort these. s.gdist <-sort(gdist) ###Find the matching list of quantiles. quant <- qchisq(((1:n)-0.5)/n, p) ###Plot the sorted generalized distances vs. the quantiles. plot(quant, s.gdist,pch=20, main="Q-Q Plot of Generalized distances vs. Quantiles of Chi Squared") ###Add a 45-degree line. lines(quant,quant) ############################################################################ #END q-q plot multivariate case ############################################################################ ############################################################################ #q-q plot #Univariate case ############################################################################ #q-q plot of normal distribution data=rnorm(n=1000, 0, 1) plot(density(data)) qqnorm(data) qqline(data) #add big positive outlier data[1]=15 plot(density(data)) qqnorm(data) qqline(data) #add big negative outlier data[1]=-10 plot(density(data)) qqnorm(data) qqline(data) #q-q plot of right skewed distribution data=rchisq(n=1000, df=10, ncp = 0) plot(density(data)) qqnorm(data) qqline(data) #add big positive outlier data[1]=50 plot(density(data)) qqnorm(data) qqline(data) #add big negative outlier data[1]=-10 plot(density(data)) qqnorm(data) qqline(data) #q-q plot of left skewed distribution data=rbeta(10000,5,2) plot(density(data)) qqnorm(data) qqline(data) #add big positive outlier data[1]=5 plot(density(data)) qqnorm(data) qqline(data) #add big negative outlier data[1]=-3 plot(density(data)) qqnorm(data) qqline(data)