wnba1 <- read.csv("http://www.stat.ufl.edu/~winner/data/wnba_spread.csv") attach(wnba1); names(wnba1) par(mfrow=c(1,2)) qqnorm(hmSpDev, main="Normal QQ Plot - Home Spread Deviation"); qqline(hmSpDev) qqnorm(OUDev, main="Normal QQ Plot - Over/Under Deviation"); qqline(OUDev) savePlot("wnba_spread1.png", type="png") par(mfrow=c(1,1)) summary(cbind(hmSpDev,OUDev)) (N <- length(OUDev)) (Sigma <- ((N-1)/N) * cov(cbind(hmSpDev,OUDev))) (mu <- colMeans(cbind(hmSpDev,OUDev))) (cor(cbind(hmSpDev,OUDev))) #### Points a constant distance from the origin: ### Home Spread / Over-Under Deviations p <- 2 alpha <- 0.05 crit.dist <- sqrt(qchisq(1-alpha,p)) A <- Sigma ctr <- c(mean(hmSpDev),mean(OUDev)) angles <- seq(0, 2*pi, length.out=200) eigVal <- eigen(A)$values eigVec <- eigen(A)$vectors eigScl <- eigVec %*% diag(sqrt(eigVal)) # scale eigenvectors to length = square-root xMat <- rbind(ctr[1] + eigScl[1, ]*crit.dist, ctr[1] - eigScl[1, ]*crit.dist) yMat <- rbind(ctr[2] + eigScl[2, ]*crit.dist, ctr[2] - eigScl[2, ]*crit.dist) ellBase <- cbind(sqrt(eigVal[1])*crit.dist*cos(angles), sqrt(eigVal[2])*crit.dist*sin(angles)) # normal ellipse ellRot <- eigVec %*% t(ellBase) # rotated ellipse win.graph(width=7.0,height=5.5) plot((ellRot+ctr)[1, ], (ellRot+ctr)[2, ], asp=1, type="l", lwd=2, main="WNBA Home Spread/Over-Under 95% Probability Ellipsoid", xlab="x1", ylab="x2") matlines(xMat, yMat, lty=1, lwd=2, col="blue") points(ctr[1], ctr[2], pch=4, col="orange", lwd=3) points(jitter(hmSpDev,3), jitter(OUDev,3), pch=16, cex=.50) savePlot("wnba_spread2.png", type="png") ### Begin sampling set.seed(12345) num.sample <- 100000 n <- 50 num.var <- p^2 wnba.dev <- cbind(hmSpDev, OUDev) dbar <- matrix(rep(0,p*num.sample),ncol=p) Sd <- matrix(rep(0,num.var*num.sample),ncol=num.var) T2 <- matrix(rep(0,num.sample), ncol=1) I.n <- diag(n) J.n <- matrix(rep(1,n^2), ncol=n) one.n <- matrix(rep(1,n),ncol=1) for (i1 in 1:num.sample) { games <- sample(N,n) X <- wnba.dev[games,] dbar.s <- as.matrix(t((1/n) * t(X) %*% one.n)) Sd.s <- (1/(n-1)) * t(X) %*% (I.n - (1/n)*J.n) %*% X T2[i1] <- n * dbar.s %*% solve(Sd.s) %*% t(dbar.s) dbar[i1,] <- t(dbar.s) var.count <- 0 for (i2 in 1:p) { for (i3 in 1:p) { var.count <- var.count + 1 Sd[i1,var.count] <- Sd.s[i2,i3] } } } CV.T2 <- (p*(n-1)/(n-p)) * qf(.95,p,n-p) sum(T2 >= CV.T2) / num.sample CV.Bon <- qt(1-.05/(2*p),n-1) dbar.1 <- dbar[,1]; dbar.2 <- dbar[,2] Sd.11 <- Sd[,1]; Sd.12 <- Sd[,2]; Sd.22 <- Sd[,4] d1.LB <- dbar.1 - sqrt(CV.T2) * sqrt(Sd.11/n) d1.UB <- dbar.1 + sqrt(CV.T2) * sqrt(Sd.11/n) d2.LB <- dbar.2 - sqrt(CV.T2) * sqrt(Sd.22/n) d2.UB <- dbar.2 + sqrt(CV.T2) * sqrt(Sd.22/n) sum((d1.LB <= mu[1]) & (d1.UB >= mu[1])) / num.sample sum((d2.LB <= mu[2]) & (d2.UB >= mu[2])) / num.sample d1.LB.B <- dbar.1 - CV.Bon * sqrt(Sd.11/n) d1.UB.B <- dbar.1 + CV.Bon * sqrt(Sd.11/n) d2.LB.B <- dbar.2 - CV.Bon * sqrt(Sd.22/n) d2.UB.B <- dbar.2 + CV.Bon * sqrt(Sd.22/n) sum((d1.LB.B <= mu[1]) & (d1.UB.B >= mu[1])) / num.sample sum((d2.LB.B <= mu[2]) & (d2.UB.B >= mu[2])) / num.sample sum((d1.LB.B <= mu[1]) & (d1.UB.B >= mu[1]) & (d2.LB.B <= mu[2]) & (d2.UB.B >= mu[2])) / num.sample mean(dbar.1); mean(dbar.2) var(dbar.1); var(dbar.2); cov(cbind(dbar.1,dbar.2)) mu; Sigma/n mean(Sd.11); mean(Sd.12); mean(Sd.22) mean((n-1)*Sd.11); mean((n-1)*Sd.12); mean((n-1)*Sd.22) (n-1)*Sigma[1,1] (n-1)*Sigma[1,2] (n-1)*Sigma[2,2] var((n-1)*Sd.11); var((n-1)*Sd.12); var((n-1)*Sd.22) (n-1)*2*Sigma[1,1]^2 (n-1)*(Sigma[1,2]^2 + Sigma[1,1]*Sigma[2,2]) (n-1)*2*Sigma[2,2]^2 ## Plots based on empirical means xv <- seq(-10,10,0.1) par(mfrow=c(2,1)) # win.graph(width=7.0,height=5.5) hist(dbar.1, breaks=seq(-10,10,0.1), main="Histogram of Mean Home Spread Dev") lines(xv,num.sample*0.1*dnorm(xv,mu[1],sqrt(Sigma[1,1]/n)), col="blue", lwd=3) hist(dbar.2, breaks=seq(-10,10,0.1), main="Histogram of Over/Under Dev") lines(xv,num.sample*0.1*dnorm(xv,mu[2],sqrt(Sigma[2,2]/n)), col="blue", lwd=3) savePlot("wnba_spread3.png", type="png") par(mfrow=c(1,1)) p <- 2 alpha <- 0.05 crit.dist <- sqrt(qchisq(1-alpha,p)) A <- Sigma/n ctr <- c(mu[1],mu[2]) angles <- seq(0, 2*pi, length.out=200) eigVal <- eigen(A)$values eigVec <- eigen(A)$vectors eigScl <- eigVec %*% diag(sqrt(eigVal)) # scale eigenvectors to length = square-root xMat <- rbind(ctr[1] + eigScl[1, ]*crit.dist, ctr[1] - eigScl[1, ]*crit.dist) yMat <- rbind(ctr[2] + eigScl[2, ]*crit.dist, ctr[2] - eigScl[2, ]*crit.dist) ellBase <- cbind(sqrt(eigVal[1])*crit.dist*cos(angles), sqrt(eigVal[2])*crit.dist*sin(angles)) # normal ellipse ellRot <- eigVec %*% t(ellBase) # rotated ellipse win.graph(width=7.0,height=5.5) plot((ellRot+ctr)[1, ], (ellRot+ctr)[2, ], asp=1, type="l", lwd=4, col="red", main="WNBA Emprical Means & 95% Probability Ellipsoid", xlab="Home Spread Dev Mean", ylab="Over/Under Dev Mean") matlines(xMat, yMat, lty=1, lwd=2, col="blue") points(ctr[1], ctr[2], pch=4, col="orange", lwd=4) points(jitter(dbar.1,3), jitter(dbar.2,3), pch=".") savePlot("wnba_spread4.png", type="png") T2s <- ((n-p)/(p*(n-1))) * T2 xv <- seq(0,8,.02) win.graph(width=7.0,height=5.5) hist(T2s[T2s <= 8.0], breaks=seq(0,8,.02), xlab="T2*(n-p)/(p*(n-1))", main="Histogram of Scaled T2 Statistic & F(2,48) Density") lines(xv,num.sample*0.02*df(xv,p,n-p),col="green",lwd=4) savePlot("wnba_spread5.png", type="png") ## Plots based on empirical (scaled) variances xv <- 0:80 Sd.11s <- (n-1)*Sd.11/Sigma[1,1] Sd.22s <- (n-1)*Sd.22/Sigma[2,2] ys2lim <- 1.1*100000*max(dchisq(0:80,n-1)) par(mfrow=c(2,1)) # win.graph(width=7.0,height=5.5) hist(Sd.11s[Sd.11s <= 80], breaks=0:80, ylim=c(0,ys2lim), main="Histogram of Scaled Variances of Home Spread Dev") lines(xv,num.sample*1*dchisq(xv,n-1), col="purple", lwd=3) hist(Sd.22s[Sd.22s <= 80], breaks=0:80, ylim=c(0,ys2lim), main="Histogram of Scaled Variances of Over/Under Dev") lines(xv,num.sample*1*dchisq(xv,n-1), col="purple", lwd=3) savePlot("wnba_spread6.png", type="png") par(mfrow=c(1,1)) r.12 <- Sd.12 / sqrt(Sd.11 * Sd.22) hist(r.12, breaks=seq(-0.6,0.6,.005)) r.12s <- r.12*sqrt((n-2)/(1-r.12^2)) xv <- seq(-3,3,.05) win.graph(width=7.0,height=5.5) hist(r.12s[(r.12s >= -3) & (r.12s <= 3)], breaks=seq(-3,3,.05), xlab="r*sqrt((n-2)/(1-r^2))", main="Histogram of Scaled Correlation Coefficient & t(n-2) Density") lines(xv,num.sample*0.05*dt(xv,n-2),col="brown",lwd=4) savePlot("wnba_spread7.png", type="png")