nba2016 <- read.csv("http://www.stat.ufl.edu/~winner/data/nba_player_201617a1.csv")
attach(nba2016); names(nba2016)

pts36 <- 36*PTS/Minutes
# pts36 <- log(36*PTS/Minutes+1)
(n.play <- max(Player_ID1))
(n.play.game <- length(PTS))
game.play <- as.vector(tapply(PTS,Player_ID1,length))
## mean.play <- as.vector(tapply(pts36,Player_ID1,mean))
eff36 <- 36*efficiency/Minutes
mean.play <- as.vector(tapply(eff36,Player_ID1,mean))
(mu <- mean(mean.play))
alpha.play <- mean.play-mu

var.e <- rep(0,n.play)
eps.play.game <- rep(0, n.play.game)
count <- 0
for (i1 in 1:n.play) {
    count1 <- count + 1
    count2 <- count + game.play[i1]
      for(i2 in 1:game.play[i1]) {
       eps.play.game[count1:count2] <- 
          eff36[Player_ID1==i1] - mean.play[i1]
      }
  count <- count2
  var.e[i1] <- var(eff36[Player_ID1==i1] - mean.play[i1])
}
sum(eps.play.game[1:game.play[1]])



par(mfrow=c(1,2))
hist(alpha.play,breaks=30,prob=T,
   main="Distribution of Player Effects")
lines(density(alpha.play))

hist(eps.play.game,breaks=100,prob=T,
   main="Distribution of Within Player Errors")
lines(density(eps.play.game))
par(mfrow=c(1,1))



plot(var.e ~ alpha.play)
abline(lm(var.e ~ alpha.play))
summary(lm(var.e ~ alpha.play))


(sigma2a <- var(alpha.play))
(sigma2e <- mean(var.e))
(rho_I <- sigma2a/(sigma2a+sigma2e))

## Set up and Take Random Samples
n.sample <- 10000
n.play.s <- 15
n.game.play.s <- 6
set.seed(2468)

mean.out <- matrix(rep(0,n.sample*n.play.s),ncol=n.play.s)
var.out  <- matrix(rep(0,n.sample*n.play.s),ncol=n.play.s)
MS_TRT.out <- rep(0,n.sample)
MS_ERR.out <- rep(0,n.sample)


for (i in 1:n.sample) {
  play.s <- sample(1:n.play,n.play.s)
    for (i1 in 1:n.play.s) {
      play.s1 <- eff36[Player_ID1 == play.s[i1]]
      play.g.s <- sample(1:game.play[play.s[i1]],n.game.play.s)
        mean.out[i,i1] <- mean(play.s1[play.g.s])
        var.out[i,i1] <- var(play.s1[play.g.s])   
     }
  MS_TRT.out[i] <- n.game.play.s *
    sum((mean.out[i,] - mean(mean.out[i,]))^2)/(n.play.s-1)
  MS_ERR.out[i] <-  mean(var.out[i,])
}

## Inference for Mean
(sigma2_muhat <- sigma2a/n.play.s + sigma2e/(n.play.s*n.game.play.s))
muhat <- rowMeans(mean.out)
summary(muhat)
var(muhat)
t.025 <- qt(.975,n.play.s-1)
mu_L <- muhat - t.025 * sqrt(MS_TRT.out/(n.play.s*n.game.play.s))
mu_U <- muhat + t.025 * sqrt(MS_TRT.out/(n.play.s*n.game.play.s))
sum((mu_L <= mu) & (mu_U >= mu)) / n.sample

# Histogram of mu-hat with Normal Super-Imposed 
# Frequency histogram (why we multiply density by sample size and binsize (1))
hist(muhat,breaks=seq(10,25,.05),xlab="muhat")
lines(seq(10,25,.05),
     0.05*n.sample*dnorm(seq(10,25,.05),mu,sqrt(sigma2_muhat)),
     lwd=4, col="green")
abline(v=mu, lwd=4, col="brown")


## Inference for Error Variance
s2e <- rowSums(var.out)/n.play.s 
df.e <- n.play.s*(n.game.play.s-1) 
summary(s2e)
var(s2e)
X2.025 <- qchisq(.025,df.e)
X2.975 <- qchisq(.975,df.e)
sigma2e_L <-df.e * s2e / X2.975
sigma2e_U <- df.e * s2e / X2.025
sum((sigma2e_L <= sigma2e) & (sigma2e_U >= sigma2e)) / n.sample

s2e_scaled <- df.e * s2e / sigma2e
### Histogram of scaled variance 
### Using s2e_scaled[s2e_scaled <= 130] leaves out very 
###      large values that "stretch plot out"
yxlim <- 1.1*n.sample*max(dchisq(0:130,df.e))
hist(s2e_scaled[s2e_scaled <= 130],breaks=30:130, ylim=c(0,yxlim),
xlab="Scaled Error Variance",
main=expression(paste("Sampling Distribution of Scaled Error Variance - ",
    X[N-g]^2)))
lines(30:130,1*n.sample*dchisq(30:130,df.e),lwd=4,col="purple")
abline(v=df.e, lwd=4, col="red")


## Inference for Between Player Variance
s2a <- (MS_TRT.out-MS_ERR.out)/n.game.play.s
df.trt <- n.play.s-1
summary(s2a)
var(s2a)
df.a1 <- s2a^2
df.a2 <- ((MS_TRT.out/n.game.play.s)^2/df.trt) +
         ((MS_ERR.out/n.game.play.s)^2/df.e)
df.a <- df.a1 / df.a2
sigma2a_L <- df.a * s2a / qchisq(.975,df.a)
sigma2a_U <- df.a * s2a / qchisq(.025,df.a)
sum((sigma2a_L <= sigma2a) & (sigma2a_U >= sigma2a)) / n.sample
hist(s2a,breaks=80)
abline(v=sigma2a, lwd=4, col="aquamarine4")



rho_I_hat <- s2a / (s2a + s2e)
summary(rho_I_hat)
var(rho_I_hat)
hist(rho_I_hat, breaks=80)
abline(v=rho_I, lwd=4, col="blue")

F_L <- qf(.025,df.trt,df.e)
F_U <- qf(.975,df.trt,df.e)
F_obs <- MS_TRT.out / MS_ERR.out
rho_L <- 1/(n.game.play.s*(1/(F_obs/F_U-1))+1)
rho_U <- 1/(n.game.play.s*(1/(F_obs/F_L-1))+1)
sum((rho_L <= rho_I) & (rho_U >= rho_I)) / n.sample


F_obs <- MS_TRT.out / MS_ERR.out
F.05 <- qf(.95,df.trt,df.e)
sum(F_obs >= F.05) / n.sample


F.theor <- (MS_TRT.out / (n.game.play.s*sigma2a + sigma2e)) /
            (MS_ERR.out / sigma2e)

### Histogram of F.theor 
### Multiplies F-density by binsize*n.sample = 0.05*10000
 
hist(F.theor,breaks=seq(0,6,0.05),main="Sampling Distribution of F")
lines(seq(0,6,0.05),0.05*n.sample*df(seq(0,6,0.05),
       df.trt,df.e), lwd=4, col="red")