> > x <- Height; y <- Weight; n <- length(y) #### Used to simplify typing > > ### Sums of squares and cross-products, OLS estimates, fitted values, residuals > (ss.xx <- sum((x-mean(x))^2)) [1] 1623.237 > (ss.yy <- sum((y-mean(y))^2)) [1] 81007.54 > (ss.xy <- sum((x-mean(x))*(y-mean(y)))) [1] 8634.835 > (b1 <- ss.xy/ss.xx); (b0 <- mean(y)-b1*mean(x)) ### Compute LS estimates [1] 5.319514 [1] -212.0523 > yhat <- b0 + b1*x; e <- y-yhat ### Compute fitted values and residuals > > #### Analysis of Variance, r, r-square, Error Variance, SE of Estimate, F-test > (ss.total <- ss.yy); (df.total <- n-1) ## Total Corrected SS, df [1] 81007.54 [1] 138 > (ss.error <- sum(e^2)); (df.error <- n-2) ## Error (Residual) SS, df [1] 35074.41 [1] 137 > (ss.reg <- sum((yhat-mean(y))^2)); (df.reg <- 1) ## Regression SS, df [1] 45933.13 [1] 1 > (r <- ss.xy/sqrt(ss.xx*ss.yy)) ## Correlation Coefficient [1] 0.7530092 > (r.square <- ss.reg/ss.total) ## Coefficient of Determination [1] 0.5670228 > (s2 <- ss.error/df.error) ## Estimate of error variance (sigma^2) [1] 256.0176 > (se.est <- sqrt(s2)) ## Std Error of Estimate (sigma estimate) [1] 16.00055 > (f.stat <- (ss.reg/df.reg)/(ss.error/df.error)) ## F-statistic [1] 179.4139 > (f.crit <- qf(0.95,df.reg,df.error)) ## Critical F-value (alpha=0.05) [1] 3.910234 > (f.pval <- 1-pf(f.stat,df.reg,df.error)) ## P-value [1] 0 > > #### Variances, Covariances, Standard Errors, t-tests/CIs for OLS estimates > (s2.b1 <- s2/ss.xx); (s.b1 <- sqrt(s2.b1)) ### Variance, SE of b1 [1] 0.1577204 [1] 0.3971403 > (s2.b0 <- s2*((1/n)+((mean(x)^2)/ss.xx))); (s.b0 <- sqrt(s2.b0)) ### Var, SE of b0 [1] 828.4757 [1] 28.78325 > (s.b0.b1 <- -s2*mean(x)/ss.xx) ### Covariance of b0,b1 [1] -11.41827 > (t.b1 <- b1/s.b1); (t.b0 <- b0/s.b0) ## t-statistics for beta1, beta0 [1] 13.39455 [1] -7.367212 > (t.crit <- qt(0.975,df.error)) ### 2-sided t-value (alpha=0.05) [1] 1.977431 > (t.pval.b1 <- 2*(1-pt(abs(t.b1),df.error))) ## P-value for H0: beta1=0 [1] 0 > (t.pval.b0 <- 2*(1-pt(abs(t.b0),df.error))) ## P-value for H0: beta0=0 [1] 1.48046e-11 > (b1.ci <- b1+qt(c(.025,.975),df.error)*s.b1) ## 95% CI for beta1 [1] 4.534197 6.104832 > (b0.ci <- b0+qt(c(.025,.975),df.error)*s.b0) ## 95% CI for beta0 [1] -268.9692 -155.1354 > > ### Plot of Data, Loess, CI for Mean, PI for Individual Player > plot(x,y,xlim=c(60,85),ylim=c(75,300),main="WNBA Data - Conf and Pred Bands and Loess") > lines(lowess(x,y)) > > xh <- 60:85 > lines(xh,b0+b1*xh - t.crit*sqrt(s2*((1/n)+((xh-mean(x))^2/ss.xx)))) > lines(xh,b0+b1*xh + t.crit*sqrt(s2*((1/n)+((xh-mean(x))^2/ss.xx)))) > lines(xh,b0+b1*xh - t.crit*sqrt(s2*(1+(1/n)+((xh-mean(x))^2/ss.xx)))) > lines(xh,b0+b1*xh + t.crit*sqrt(s2*(1+(1/n)+((xh-mean(x))^2/ss.xx)))) > > ### Built-in Procedure > wnba.ols <- lm(Weight ~ Height) > summary(wnba.ols) Call: lm(formula = Weight ~ Height) Residuals: Min 1Q Median 3Q Max -55.231 -9.994 -1.592 8.728 62.728 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -212.0523 28.7833 -7.367 1.48e-11 *** Height 5.3195 0.3971 13.395 < 2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 16 on 137 degrees of freedom Multiple R-squared: 0.567, Adjusted R-squared: 0.5639 F-statistic: 179.4 on 1 and 137 DF, p-value: < 2.2e-16 > anova(wnba.ols) Analysis of Variance Table Response: Weight Df Sum Sq Mean Sq F value Pr(>F) Height 1 45933 45933 179.41 < 2.2e-16 *** Residuals 137 35074 256 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > confint(wnba.ols) 2.5 % 97.5 % (Intercept) -268.969219 -155.135413 Height 4.534197 6.104832 > > plot(Height,Weight,xlim=c(60,85),ylim=c(75,300),main="WNBA Data - Conf and Pred Bands and Loess") > abline(wnba.ols) > lines(lowess(Height,Weight)) > > xh <- 60:85 > ycm <- predict(wnba.ols,list(Height=xh),int="c") > ycp <- predict(wnba.ols,list(Height=xh),int="p") > lines(xh,ycm[,2],lty=2) > lines(xh,ycm[,3],lty=2) > lines(xh,ycp[,2],lty=3) > lines(xh,ycp[,3],lty=3) >