## Read in data and assign variable names lpga2008 <- read.fwf("http://www.stat.ufl.edu/~winner/data/lpga2008.dat", width=c(30,8,8,8,8,8,8,8,8,8,8), col.names=c("golfer","drive","frwy","grnreg", "puttrnd","sandrnd","sandsv","przrnd","lnprzrnd","rounds","golferid")) attach(lpga2008) # lpga2008 ## Create new variables for analysis that remove outlying golfer(s) drive1 <- drive[grnreg >= 50] frwy1 <- frwy[grnreg >= 50] grnreg1 <- grnreg[grnreg >= 50] puttrnd1 <- puttrnd[grnreg >= 50] ## Create new data frame with only the 4 variables of interest lpga1 <- data.frame(drive1,frwy1,grnreg1,puttrnd1) detach(lpga2008) attach(lpga1) X <- cbind(drive1, frwy1) (S2 <- cov(X)) (xbar1 <- mean(X[,1])) (xbar2 <- mean(X[,2])) (R <- cor(X)) S2.ord <- matrix(rep(0,4),ncol=2) S2.ord[1,1] <- max(S2[1,1], S2[2,2]) S2.ord[2,2] <- min(S2[1,1], S2[2,2]) S2.ord[1,2] <- S2[1,2] S2.ord[2,1] <- S2[2,1] S2 S2.ord solve(S2.ord) eigen(S2.ord)$value eigen(S2.ord)$vector lambda1 <-((S2.ord[1,1]+S2.ord[2,2]) + sqrt((S2.ord[1,1]-S2.ord[2,2])^2+4*S2.ord[1,2]^2))/2 lambda2 <-((S2.ord[1,1]+S2.ord[2,2]) - sqrt((S2.ord[1,1]-S2.ord[2,2])^2+4*S2.ord[1,2]^2))/2 cbind(lambda1, lambda2) x11 <- 1; x12 <- (lambda1 - S2.ord[1,1]) / S2.ord[1,2] e11 <- S2.ord[1,2] / sqrt(S2.ord[1,2]^2 + (lambda1 - S2.ord[1,1])^2) e12 <- (lambda1 - S2.ord[1,1]) / sqrt(S2.ord[1,2]^2 + (lambda1 - S2.ord[1,1])^2) cbind(e11,e12) e1 <- matrix(c(e11,e12), ncol=1) x21 <- 1; x22 <- (lambda2 - S2.ord[1,1]) / S2.ord[1,2] e21 <- S2.ord[1,2] / sqrt(S2.ord[1,2]^2 + (lambda2 - S2.ord[1,1])^2) e22 <- (lambda2 - S2.ord[1,1]) / sqrt(S2.ord[1,2]^2 + (lambda2 - S2.ord[1,1])^2) e2 <- matrix(c(e21,e22), ncol=1) cbind(e1,e2) lambda1+lambda2 lambda1 / (lambda1+lambda2) #### Points a constant distance from the origin: drive distance/fairway pct n <- nrow(X); p <- ncol(X) alpha <- 0.05 crit.dist <- sqrt(qchisq(1-alpha,p)) ctr <- c(xbar1, xbar2) angles <- seq(0, 2*pi, length.out=200) eigVal <- eigen(S2.ord)$values eigVec <- eigen(S2.ord)$vectors eigScl <- eigVec %*% diag(sqrt(eigVal)) # scale eigenvectors to length = square-root xMat <- rbind(ctr[1] + eigScl[1, ]*crit.dist, ctr[1] - eigScl[1, ]*crit.dist) yMat <- rbind(ctr[2] + eigScl[2, ]*crit.dist, ctr[2] - eigScl[2, ]*crit.dist) ellBase <- cbind(sqrt(eigVal[1])*crit.dist*cos(angles), sqrt(eigVal[2])*crit.dist*sin(angles)) # normal ellipse ellRot <- eigVec %*% t(ellBase) # rotated ellipse win.graph(width=7.0,height=5.5) plot((ellRot+ctr)[1, ], (ellRot+ctr)[2, ], asp=1, type="l", lwd=2, main="95% Density Ellipsoid and Principal Components", xlab="Drive Distance", ylab="Fairway Percent") # matlines(xMat, yMat, lty=1, lwd=2, col="blue") arrows(ctr[1], ctr[2], ctr[1] + eigScl[1, ]*crit.dist*1.2, ctr[2] + eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) arrows(ctr[1], ctr[2], ctr[1] + eigScl[1, ]*crit.dist*1.2, ctr[2] + eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) arrows(ctr[1], ctr[2], ctr[1] - eigScl[1, ]*crit.dist*1.2, ctr[2] - eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) arrows(ctr[1], ctr[2], ctr[1] - eigScl[1, ]*crit.dist*1.2, ctr[2] - eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) points(ctr[1], ctr[2], pch=4, col="orange", lwd=3) points(X[,1],X[,2],col="red", pch=16) ## Plot based on standardized variables #### Points a constant distance from the origin: drive distance/fairway pct n <- nrow(X); p <- ncol(X) alpha <- 0.05 crit.dist <- sqrt(qchisq(1-alpha,p)) ctr <- c(0,0) angles <- seq(0, 2*pi, length.out=200) eigVal <- eigen(R)$values eigVec <- eigen(R)$vectors eigScl <- eigVec %*% diag(sqrt(eigVal)) # scale eigenvectors to length = square-root xMat <- rbind(ctr[1] + eigScl[1, ]*crit.dist, ctr[1] - eigScl[1, ]*crit.dist) yMat <- rbind(ctr[2] + eigScl[2, ]*crit.dist, ctr[2] - eigScl[2, ]*crit.dist) ellBase <- cbind(sqrt(eigVal[1])*crit.dist*cos(angles), sqrt(eigVal[2])*crit.dist*sin(angles)) # normal ellipse ellRot <- eigVec %*% t(ellBase) # rotated ellipse win.graph(width=7.0,height=5.5) plot((ellRot+ctr)[1, ], (ellRot+ctr)[2, ], asp=1, type="l", lwd=2, main="95% Density Ellipsoid and Principal Components", xlab="Drive Distance - Z", ylab="Fairway Percent - Z") # matlines(xMat, yMat, lty=1, lwd=2, col="blue") arrows(ctr[1], ctr[2], ctr[1] + eigScl[1, ]*crit.dist*1.2, ctr[2] + eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) arrows(ctr[1], ctr[2], ctr[1] + eigScl[1, ]*crit.dist*1.2, ctr[2] + eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) arrows(ctr[1], ctr[2], ctr[1] - eigScl[1, ]*crit.dist*1.2, ctr[2] - eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) arrows(ctr[1], ctr[2], ctr[1] - eigScl[1, ]*crit.dist*1.2, ctr[2] - eigScl[2, ]*crit.dist*1.2, col="blue", lwd=3) points(ctr[1], ctr[2], pch=4, col="orange", lwd=3) points((X[,1]-xbar1)/sqrt(S2[1,1]),(X[,2]-xbar2)/sqrt(S2[2,2]), col="red", pch=16) ### Bi-plot I.n <- diag(n) J.n <- matrix(rep(1/n,n^2),ncol=n) X.c <- (I.n - J.n) %*% X Xc.val <- eigen(t(X.c) %*% X.c)$value Xc.vec <- eigen(t(X.c) %*% X.c)$vector (Xc.V <- Xc.vec) W <- X.c %*% Xc.V l1_2 <- sqrt(diag(Xc.val)) (l12_v <- l1_2 %*% t(Xc.V)) (l12_vpd2 <- t(l12_v[1:2,])) u <- X.c %*% Xc.V %*% solve(l1_2) u ud2 <- u[,1:2] theta12 <- 57.2958*acos(sum(l12_vpd2[1,] * t(l12_vpd2[2,]))) # theta13 <- 57.2958*acos(sum(l12_vpd2[1,] * t(l12_vpd2[3,]))) # theta23 <- 57.2958*acos(sum(l12_vpd2[2,] * t(l12_vpd2[3,]))) # print(cbind(theta12,theta13,theta23)) l12_vpd2 <- cbind(l12_vpd2,matrix(rep(1,nrow(l12_vpd2)),ncol=1)) ud2 <- cbind(ud2,matrix(rep(2,nrow(ud2)),ncol=1)) lvu <- rbind(l12_vpd2,ud2) plot(ud2[,1],ud2[,2],xlim=c(-1,1),ylim=c(-1,1)) abline(h=0) abline(v=0) for (i in 1:nrow(l12_vpd2)) arrows(0,0,l12_vpd2[i,1]/(n-1),l12_vpd2[i,2]/(n-1), col="red")