#excercise 8.16 page 475 # Variable explanation # x1= Bluegill # x2= Black crappie # x3= Smallmouth bass # x4= Largemouth bass # x5= Walleye # x6= Northern pike #Correlation matrix of the data #read the given matrix from the textbook Corr <- matrix(scan(n=36),nrow=6, byrow=TRUE) 1.0000 0.4919 0.2636 0.4653 -0.2277 0.0652 0.4919 1.0000 0.3217 0.3506 -0.1917 0.2045 0.2636 0.3217 1.0000 0.4108 0.0647 0.2493 0.4653 0.3506 0.4108 1.0000 -0.2249 0.2293 -0.2277 -0.1917 0.0647 -0.2249 1.0000 -0.2144 0.0652 0.2045 0.2493 0.2293 -0.2144 1.0000 colnames(Corr)=c('X1','X2','X3','X4','X5','X6') rownames(Corr)=c('X1','X2','X3','X4','X5','X6') # correlations between members # of the centrarchid family (x1 to x4) are positive, # whereas all correlations between these # and x5 (Walleye) are either negative or very small. # So x5 is not in the group with the centrarchids (x1 to x4). #Principal component variances and direction vectors #Principal component analysis (PCA) performed on x1-x6 variables (PCA=eigen(Corr)) #set names to the vector of eigen values names(PCA$values)=c('X1','X2','X3','X4','X5','X6') #set column names to the vector of eigen vectors #set row names to the vector of eigen vectors colnames(PCA$vectors)=c('X1','X2','X3','X4','X5','X6') rownames(PCA$vectors)=c('X1','X2','X3','X4','X5','X6') #PC1= -0.4744514*X1+ (-0.4730439)*X2 + (-0.3950955)X3 +(-0.4954348)*X4 + 0.2553558*X5 +(-0.2904526)*X6 # The first eigenvector shows that the direction of largest variation is # pointing to the common variation in the centrarchids (x1-x4), # with a lesser weight on pike (X6) (same sign), # and opposite weight on walleye X5. # This is confirming the interpretation of the correlation matrix itself. # The second eigenvector focuses on the walleye catch (-0.80680989); the third, # on northern pike (-0.808772699). # The others are more difficult to interpret. #scree plot PC.variance=PCA$values (PC.prop=PC.variance/sum(PC.variance)) plot(1:length(PC.variance),PC.variance,main="Scree Plot for game fish - all 6 variables", xlab="Principal Component Number", ylab="Principal Component Variance",type="b") #now perform PCA on the first four variables #Correlation matrix for the first four variables. Corr4 <- Corr[1:4,1:4] #Principal component variances and direction vectors. (PCA1=eigen(Corr4)) #set names to the vector of eigen values names(PCA1$values)=c('X1','X2','X3','X4') #set column names to the vector of eigen vectors #set row names to the vector of eigen vectors colnames(PCA1$vectors)=c('X1','X2','X3','X4') rownames(PCA1$vectors)=c('X1','X2','X3','X4') PCA1 #scree plot PC1.variance=PCA1$values (PC1.prop=PC1.variance/sum(PC1.variance)) plot(1:length(PC1.variance),PC1.variance,main="Scree Plot for game fish - first four variables", xlab="Principal Component Number", ylab="Principal Component Variance",type="b") #Scree plot shows substantial decline in the eigenvalues from the first #to the remaining three. #The first eigenvector shows that the catches of these four #centrarchid species tend to vary together (all values positive and close to each other). #The second eigenvector exhibits a tendency for #smallmouth bass (X3, large negative number (-0.7602768 )) to vary separately #from the others. #Smallmouth bass are arguably the most popular of these four species. #The other eigen vectors are not as easily interpreted, but again, #are beyond the elbow in the scree plot.