R Markdown

nba1 <- read.csv("http://www.stat.ufl.edu/~winner/data/nba_ht_wt.csv",header=T)
attach(nba1); names(nba1)
## [1] "Player" "Pos"    "Height" "Weight" "Age"
nba.mod1 <- lm(Weight ~ Height)
summary(nba.mod1)
## 
## Call:
## lm(formula = Weight ~ Height)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.583  -9.937  -0.260   9.417  56.079 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -279.8693    15.5512  -18.00   <2e-16 ***
## Height         6.3307     0.1965   32.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.24 on 503 degrees of freedom
## Multiple R-squared:  0.6736, Adjusted R-squared:  0.6729 
## F-statistic:  1038 on 1 and 503 DF,  p-value: < 2.2e-16
e <- resid(nba.mod1)
shapiro.test(e)
## 
##  Shapiro-Wilk normality test
## 
## data:  e
## W = 0.9948, p-value = 0.08593
plot(nba.mod1)

plot(Weight ~ Height, pch=16, cex=0.65,main=("NBA Y=Weight, X=Height"))
abline(nba.mod1, col="red", lwd=2)



### Breusch-Pagan Test
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

bptest(Weight ~ Height,studentize=FALSE)
## 
##  Breusch-Pagan test
## 
## data:  Weight ~ Height
## BP = 9.0097, df = 1, p-value = 0.002686
### F-test for Lack of Fit
nba.mod1a <- lm(Weight ~ factor(Height))
anova(nba.mod1,nba.mod1a)
## Analysis of Variance Table
## 
## Model 1: Weight ~ Height
## Model 2: Weight ~ factor(Height)
##   Res.Df    RSS Df Sum of Sq     F Pr(>F)
## 1    503 116782                          
## 2    487 111756 16    5026.5 1.369 0.1521
library(MASS)

bc.mod1 <- boxcox(nba.mod1,plot=T)   # Runs series of power transforms and plots

# print(cbind(bc.mod1$x,bc.mod1$y))      # Print out results (lambda,log-like)
print(bc.mod1$x[which.max(bc.mod1$y)])     # Print out "best" lambda
## [1] -0.1818182
ci.bc <- max(bc.mod1$y)-0.5*qchisq(0.95,1)   # Obtain cut-off for 95% CI (in log-like)
print(bc.mod1$x[bc.mod1$y>= ci.bc])    # Print Values of lambda in 95% CI
##  [1] -0.66666667 -0.62626263 -0.58585859 -0.54545455 -0.50505051 -0.46464646
##  [7] -0.42424242 -0.38383838 -0.34343434 -0.30303030 -0.26262626 -0.22222222
## [13] -0.18181818 -0.14141414 -0.10101010 -0.06060606 -0.02020202  0.02020202
## [19]  0.06060606  0.10101010  0.14141414  0.18181818  0.22222222  0.26262626
## [25]  0.30303030
nba.mod2 <- lm(log(Weight) ~ Height)
summary(nba.mod2)
## 
## Call:
## lm(formula = log(Weight) ~ Height)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.209443 -0.045771  0.003882  0.044487  0.220542 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.0781342  0.0696329   44.20   <2e-16 ***
## Height      0.0292315  0.0008799   33.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06823 on 503 degrees of freedom
## Multiple R-squared:  0.6869, Adjusted R-squared:  0.6863 
## F-statistic:  1104 on 1 and 503 DF,  p-value: < 2.2e-16
e2 <- resid(nba.mod2)
shapiro.test(e2)
## 
##  Shapiro-Wilk normality test
## 
## data:  e2
## W = 0.99757, p-value = 0.679
library(lmtest)
bptest(log(Weight) ~ Height,studentize=FALSE)
## 
##  Breusch-Pagan test
## 
## data:  log(Weight) ~ Height
## BP = 0.47107, df = 1, p-value = 0.4925
nba.mod3 <- lm(log(Weight) ~ factor(Height))
anova(nba.mod2,nba.mod3)
## Analysis of Variance Table
## 
## Model 1: log(Weight) ~ Height
## Model 2: log(Weight) ~ factor(Height)
##   Res.Df    RSS Df Sum of Sq     F Pr(>F)
## 1    503 2.3414                          
## 2    487 2.2478 16  0.093642 1.268 0.2131