install.packages("FactoMineR") install.packages("factoextra") install.packages("corrplot") library("FactoMineR") library("factoextra") ## Read in data and assign variable names lpga2008 <- read.fwf("http://www.stat.ufl.edu/~winner/data/lpga2008.dat", width=c(30,8,8,8,8,8,8,8,8,8,8), col.names=c("golfer","drive","frwy","grnreg", "puttrnd","sandrnd","sandsv","przrnd","lnprzrnd","rounds","golferid")) attach(lpga2008) # lpga2008 ## Create new variables for analysis that remove outlying golfer(s) drive1 <- drive[grnreg >= 50] frwy1 <- frwy[grnreg >= 50] grnreg1 <- grnreg[grnreg >= 50] puttrnd1 <- puttrnd[grnreg >= 50] ## Create new data frame with only the 4 variables of interest lpga1 <- data.frame(drive1,frwy1,grnreg1,puttrnd1) detach(lpga2008) attach(lpga1) X <- as.matrix(lpga1) res.pca <- PCA(X, graph=FALSE) print(res.pca) eig.val <- get_eigenvalue(res.pca) eig.val win.graph(width=7.0, height=5.5) fviz_eig(res.pca, addlabels=TRUE, ylim=c(0,50)) var <- get_pca_var(res.pca) var head(var$coord) head(var$cos2) head(var$contrib) head(var$cor) ## Plot variables - Correlation Circle win.graph(width=7.0, height=5.5) fviz_pca_var(res.pca, col.var="black") ## Quality of representation head(var$cos2, 4) library("corrplot") win.graph(width=7.0, height=5.5) corrplot(var$cos2, is.corr=FALSE) # Total cos2 for variables on PC1 and PC2 win.graph(width=7.0, height=5.5) fviz_cos2(res.pca, choice="var", axes=1:2) # Color by cos2 values: quality on factor map win.graph(width=7.0, height=5.5) fviz_pca_var(res.pca, col.var="cos2", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE # Avoid text overlap" ) # Contribution of variables to PCs head(var$contrib, 4) # Check calculations/definitions sum(var$contrib[,1]) var$coord[1,1]^2 var$cos2[1,1] var$coord[1,1]^2 / sum(var$coord[,1]^2) var$cos2[1,1] / sum(var$cos2[,1]) var$contrib[1,1] win.graph(width=7.0, height=5.5) corrplot(var$contrib, is.corr=FALSE) # Barchart of contributions by variable for PC1, PC2 # includes highest 10 vars for each PC win.graph(width=7.0, height=5.5) fviz_contrib(res.pca, choice="var", axes=1, top=10) win.graph(width=7.0, height=5.5) fviz_contrib(res.pca, choice="var", axes=2, top=10) ## Combined Contribution to first 2 PCs contrib.12 <- (eig.val[1] * var$contrib[,1] + eig.val[2] * var$contrib[,2]) / (eig.val[1] + eig.val[2]) contrib.12 # Color by contrib values: quality on factor map win.graph(width=7.0, height=5.5) fviz_pca_var(res.pca, col.var="contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07") ) # Color by custom continuous variable set.seed(123) my.cont.var <- rnorm(4) win.graph(width=7.0, height=5.5) fviz_pca_var(res.pca, col.var=my.cont.var, gradient.cols = c("blue", "yellow", "red"), legend.title = "Cont. Var." ) # Color by groups # Create 3 groups of vars (centers=3) set.seed(123) res.km <- kmeans(var$coord, centers=3, nstart=25) grp <- as.factor(res.km$cluster) win.graph(width=7.0, height=5.5) fviz_pca_var(res.pca, col.var=grp, palette = c("#0073C2FF", "#EFC000FF", "#868686FF"), legend.title = "Cluster" ) # Dimension Description res.desc <- dimdesc(res.pca, axes=c(1,2), proba=0.05) Descriptions of dimensions 1 and 2 res.desc$Dim.1 res.desc$Dim.2 #### Analysis of Individuals ind <- get_pca_ind(res.pca) ind head(ind$coord) head(ind$cos2) head(ind$contrib) # Graph of Individuals # Color by cos2 values win.graph(width=7.0, height=5.5) fviz_pca_ind(res.pca, col.ind="cos2", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE # Avoid text overlap" ) # Change point size win.graph(width=7.0, height=5.5) fviz_pca_ind(res.pca, pointsize="cos2", pointshape=21, fill = "#E7B800", repel = TRUE # Avoid text overlap" ) # Color and pointsize by cos2 values win.graph(width=7.0, height=5.5) fviz_pca_ind(res.pca, col.ind="cos2", pointsize="cos2", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE # Avoid text overlap" ) # Barchart of quality of representation win.graph(width=7.0, height=5.5) fviz_cos2(res.pca, choice="ind") # Total contribution on PC1 and PC2 win.graph(width=7.0, height=5.5) fviz_contrib(res.pca, choice="ind", axes=1:2) # Color by custom continuous variable set.seed(123) my.cont.var <- rnorm(23) win.graph(width=7.0, height=5.5) fviz_pca_ind(res.pca, col.ind=my.cont.var, gradient.cols = c("blue", "yellow", "red"), legend.title = "Cont. Var." )