## Read in data and assign variable names lpga2008 <- read.fwf("http://www.stat.ufl.edu/~winner/data/lpga2008.dat", width=c(30,8,8,8,8,8,8,8,8,8,8), col.names=c("golfer","drive","frwy","grnreg", "puttrnd","sandrnd","sandsv","przrnd","lnprzrnd","rounds","golferid")) attach(lpga2008) # lpga2008 ## Create new variables for analysis that remove outlying golfer(s) drive1 <- drive[grnreg >= 50] frwy1 <- frwy[grnreg >= 50] grnreg1 <- grnreg[grnreg >= 50] puttrnd1 <- puttrnd[grnreg >= 50] ## Create new data frame with only the 4 variables of interest lpga1 <- data.frame(drive1,frwy1,grnreg1,puttrnd1) detach(lpga2008) attach(lpga1) X <- cbind(drive1, frwy1) lpga.pc1 <- princomp(X,cor=FALSE) summary(lpga.pc1, loadings=TRUE) qqnorm(lpga.pc1$scores[,2]); qqline(lpga.pc1$scores[,2]) par(pty="s") plot(lpga.pc1$scores[,2],lpga.pc1$scores[,1], xlim=range(lpga.pc1$scores[,1]),xlab="PC2",ylab="PC1") lpga.pc1$scores[,1] lpga.pc2 <- princomp(X,cor=TRUE) summary(lpga.pc2, loadings=TRUE) qqnorm(lpga.pc2$scores[,2]); qqline(lpga.pc2$scores[,2]) par(pty="s") plot(lpga.pc2$scores[,2],lpga.pc2$scores[,1], xlim=range(lpga.pc2$scores[,1]),xlab="PC2",ylab="PC1") lpga.pc2$scores[,1] lpga.pc3 <- princomp(lpga1,cor=FALSE) summary(lpga.pc3, loadings=TRUE) lpga.pc4 <- princomp(lpga1,cor=TRUE) summary(lpga.pc4, loadings=TRUE) library(graphics) par(pty="s") biplot(lpga.pc3, cex=0.5) ## Scale for vars (arrows) appears to be loading * sqrt(n-1)