lpga1 <- read.fwf("http://www.stat.ufl.edu/~winner/data/lpga2008.dat", width=c(30,8,8,8,8,8,8,8,8), col.names=c("golfer","drive","fairway","green","putts","sandshot", "sandsave","prz","logprz")) attach(lpga1) lpga <- na.exclude(lpga1) attach(lpga) ##### Obtain "training" and "validation" sets set.seed(1480) lpga.cv.samp <- sample(1:length(logprz),100,replace=FALSE) lpga.cv.in <- lpga[lpga.cv.samp,] lpga.cv.out <- lpga[-lpga.cv.samp,] ######### Perform Backward Elimination, Forward Selection, and Stepwise Regression ######### Based on Model AIC (not individual regression coefficients) ######### fit1 and fit2 represent "extreme" models library(MASS) fit1 <- lm(logprz ~ drive+fairway+green+putts+sandshot+sandsave,data=lpga.cv.in) fit2 <- lm(logprz ~ 1,data=lpga.cv.in) stepAIC(fit1,direction="backward") stepAIC(fit2,direction="forward",scope=list(upper=fit1,lower=fit2)) stepAIC(fit2,direction="both",scope=list(upper=fit1,lower=fit2)) ############### Fit best model lpga.mod1 <- lm(logprz ~ green + putts + sandshot + sandsave + drive ,data=lpga.cv.in) summary(lpga.mod1) anova(lpga.mod1) drop1(lpga.mod1) e <- resid(lpga.mod1) yhat <- predict(lpga.mod1) rstudent(lpga.mod1) influence.measures(lpga.mod1) pdf("E:\\blue_drive\\Rmisc\\graphs\\lpga1a.pdf") plot(yhat,e) #### Run one plot at a time if running in R qqnorm(e); qqline(e) #### Run one plot at a time if running in R dev.off() shapiro.test(e) ### Test for Normal errors #### Breusch-Pagan test for Constant error variance #install.packages("lmtest") # You must have already Set CRAN Mirror under Packages Tab library(lmtest) bptest(logprz ~ green + putts + sandshot + sandsave + drive ,data=lpga.cv.in,studentize=FALSE)