lpga1 <- read.fwf("http://www.stat.ufl.edu/~winner/data/lpga2008.dat",
width=c(30,8,8,8,8,8,8,8,8), 
col.names=c("golfer","drive","fairway","green","putts","sandshot",
"sandsave","prz","logprz"))
attach(lpga1)
lpga <- na.exclude(lpga1)
attach(lpga)

##### Obtain "training" and "validation" sets
set.seed(1480)
 
lpga.cv.samp <- sample(1:length(logprz),100,replace=FALSE)
lpga.cv.in <- lpga[lpga.cv.samp,]
lpga.cv.out <- lpga[-lpga.cv.samp,]


######### Perform Backward Elimination, Forward Selection, and Stepwise Regression
######### Based on Model AIC (not individual regression coefficients)
######### fit1 and fit2 represent "extreme" models

library(MASS)
fit1 <- lm(logprz ~ drive+fairway+green+putts+sandshot+sandsave,data=lpga.cv.in)
fit2 <- lm(logprz ~ 1,data=lpga.cv.in)
stepAIC(fit1,direction="backward")
stepAIC(fit2,direction="forward",scope=list(upper=fit1,lower=fit2))
stepAIC(fit2,direction="both",scope=list(upper=fit1,lower=fit2))

############### Fit best model 

lpga.mod1 <- lm(logprz ~ green + putts + sandshot + sandsave + drive ,data=lpga.cv.in)
summary(lpga.mod1)
anova(lpga.mod1)
drop1(lpga.mod1)
e <- resid(lpga.mod1)
yhat <- predict(lpga.mod1)
rstudent(lpga.mod1)
influence.measures(lpga.mod1)

pdf("E:\\blue_drive\\Rmisc\\graphs\\lpga1a.pdf")
plot(yhat,e)               #### Run one plot at a time if running in R
qqnorm(e); qqline(e)       #### Run one plot at a time if running in R
dev.off()
shapiro.test(e)   ### Test for Normal errors

#### Breusch-Pagan test for Constant error variance
#install.packages("lmtest")   # You must have already Set CRAN Mirror under Packages Tab
library(lmtest)

bptest(logprz ~ green + putts + sandshot + sandsave + drive ,data=lpga.cv.in,studentize=FALSE)