R Markdown
nba1 <- read.csv("http://www.stat.ufl.edu/~winner/data/nba_ht_wt.csv",header=T)
attach(nba1); names(nba1)
## [1] "Player" "Pos" "Height" "Weight" "Age"
nba.mod1 <- lm(Weight ~ Height)
summary(nba.mod1)
##
## Call:
## lm(formula = Weight ~ Height)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.583 -9.937 -0.260 9.417 56.079
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -279.8693 15.5512 -18.00 <2e-16 ***
## Height 6.3307 0.1965 32.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.24 on 503 degrees of freedom
## Multiple R-squared: 0.6736, Adjusted R-squared: 0.6729
## F-statistic: 1038 on 1 and 503 DF, p-value: < 2.2e-16
e <- resid(nba.mod1)
shapiro.test(e)
##
## Shapiro-Wilk normality test
##
## data: e
## W = 0.9948, p-value = 0.08593
plot(nba.mod1)




plot(Weight ~ Height, pch=16, cex=0.65,main=("NBA Y=Weight, X=Height"))
abline(nba.mod1, col="red", lwd=2)
### Breusch-Pagan Test
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric

bptest(Weight ~ Height,studentize=FALSE)
##
## Breusch-Pagan test
##
## data: Weight ~ Height
## BP = 9.0097, df = 1, p-value = 0.002686
### F-test for Lack of Fit
nba.mod1a <- lm(Weight ~ factor(Height))
anova(nba.mod1,nba.mod1a)
## Analysis of Variance Table
##
## Model 1: Weight ~ Height
## Model 2: Weight ~ factor(Height)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 503 116782
## 2 487 111756 16 5026.5 1.369 0.1521
library(MASS)
bc.mod1 <- boxcox(nba.mod1,plot=T) # Runs series of power transforms and plots

# print(cbind(bc.mod1$x,bc.mod1$y)) # Print out results (lambda,log-like)
print(bc.mod1$x[which.max(bc.mod1$y)]) # Print out "best" lambda
## [1] -0.1818182
ci.bc <- max(bc.mod1$y)-0.5*qchisq(0.95,1) # Obtain cut-off for 95% CI (in log-like)
print(bc.mod1$x[bc.mod1$y>= ci.bc]) # Print Values of lambda in 95% CI
## [1] -0.66666667 -0.62626263 -0.58585859 -0.54545455 -0.50505051 -0.46464646
## [7] -0.42424242 -0.38383838 -0.34343434 -0.30303030 -0.26262626 -0.22222222
## [13] -0.18181818 -0.14141414 -0.10101010 -0.06060606 -0.02020202 0.02020202
## [19] 0.06060606 0.10101010 0.14141414 0.18181818 0.22222222 0.26262626
## [25] 0.30303030
nba.mod2 <- lm(log(Weight) ~ Height)
summary(nba.mod2)
##
## Call:
## lm(formula = log(Weight) ~ Height)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.209443 -0.045771 0.003882 0.044487 0.220542
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0781342 0.0696329 44.20 <2e-16 ***
## Height 0.0292315 0.0008799 33.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06823 on 503 degrees of freedom
## Multiple R-squared: 0.6869, Adjusted R-squared: 0.6863
## F-statistic: 1104 on 1 and 503 DF, p-value: < 2.2e-16
e2 <- resid(nba.mod2)
shapiro.test(e2)
##
## Shapiro-Wilk normality test
##
## data: e2
## W = 0.99757, p-value = 0.679
library(lmtest)
bptest(log(Weight) ~ Height,studentize=FALSE)
##
## Breusch-Pagan test
##
## data: log(Weight) ~ Height
## BP = 0.47107, df = 1, p-value = 0.4925
nba.mod3 <- lm(log(Weight) ~ factor(Height))
anova(nba.mod2,nba.mod3)
## Analysis of Variance Table
##
## Model 1: log(Weight) ~ Height
## Model 2: log(Weight) ~ factor(Height)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 503 2.3414
## 2 487 2.2478 16 0.093642 1.268 0.2131