epl_nhl_bmi <- read.csv("http://www.stat.ufl.edu/~winner/data/epl_nhl_bmi.csv", header=T) attach(epl_nhl_bmi); names(epl_nhl_bmi) ### Obtain population sizes, means, and variances (N1 <- length(BMI[League==1])) (N2 <- length(BMI[League==2])) (MU1 <- mean(BMI[League==1])) (MU2 <- mean(BMI[League==2])) (VAR1 <- var(BMI[League==1])) (VAR2 <- var(BMI[League==2])) (MU1_MU2 <- MU1-MU2) (VAR_Ybar1_Ybar2 <- (VAR1+VAR2)/10) par(mfrow=c(2,1)) hist(BMI[League==1], breaks=50) hist(BMI[League==2], breaks=50) par(mfrow=c(1,1)) set.seed(12345) mean.nhl <- numeric(100000) var.nhl <- numeric(100000) # 10000 Random Samples of size n=30 (Sampling without replacement) for (i in 1:100000) { ransamp <- sample(BMI[League==2],30) mean.nhl[i] <- mean(ransamp) var.nhl[i] <- var(ransamp) } # Summary statistics for sample Mean and Variance mean(mean.nhl); var(mean.nhl); sd(mean.nhl); min(mean.nhl); max(mean.nhl) mean(var.nhl); var(var.nhl); sd(var.nhl); min(var.nhl); max(var.nhl) t.samp <- (mean.nhl-MU2)/sqrt(var.nhl/30) mean(t.samp); var(t.samp); sd(t.samp); min(t.samp); max(t.samp) # Histogram of Sampling Distribution of Y-bar with Normal Super-Imposed hist(mean.nhl,breaks=seq(25,28,.01),ylim=c(0,1600),xlab="ybar", main="Sampling Distribution of Y-bar - NHL",col="red") lines(seq(25,28,0.01),(100000/100)* dnorm(seq(25,28,0.01),MU2,sqrt(VAR2/30)),col="blue",lty=1,lwd=4) # Histogram of (n-1)S^2/SIGMA^2 with Chi-squared Super-Imposed nu <- 29 ys2lim <- 1.1*(100000/10)*max(dchisq(0:81,nu)) hist(nu*var.nhl/VAR2,breaks=seq(0,81,.1),ylim=c(0,ys2lim),xlab="S2", main="Sampling Distribution of Scaled S2",col="red") lines(seq(0,81,.1),(100000/10)*dchisq(seq(0,81,.1),nu), col="blue",lty=1,lwd=4) # Histogram of t-statistics hist(t.samp,breaks=seq(-5.7,5.7,.05),ylim=c(0,2500),xlab="t-statistic", main="Sampling Distribution of t-statistic",col="red") lines(seq(-5.7,5.7,.05),(100000/20)*dt(seq(-5.7,5.7,.05),nu), col="blue",lty=1,lwd=4) set.seed(98765) ybar1_ybar2 <- numeric(100000) var1 <- numeric(100000) var2 <- numeric(100000) # 100000 Random Samples of size n1=10, n2=10 (Sampling without replacement) for (i in 1:100000) { ransamp1 <- sample(BMI[League==1],10) ransamp2 <- sample(BMI[League==2],10) ybar1_ybar2[i] <- mean(ransamp1)-mean(ransamp2) var1[i] <- var(ransamp1) var2[i] <- var(ransamp2) } mean(ybar1_ybar2); var(ybar1_ybar2); min(ybar1_ybar2); max(ybar1_ybar2); F12 <- (var1/VAR1)/(var2/VAR2) mean(F12); var(F12); min(F12); max(F12) # Histogram of Sampling Distribution of Ybar1-Ybar2 with Normal Super-Imposed hist(ybar1_ybar2,breaks=seq(-6.60,-0.30,.025),ylim=c(0,1600), xlab="ybar",main="Sampling Distribution of Ybar1-Ybar2",col="red") lines(seq(-6.60,-0.30,.025),(100000/40)* dnorm(seq(-6.60,-0.30,.025),MU1_MU2,sqrt(VAR_Ybar1_Ybar2)),col="blue", lty=1,lwd=4) # Histogram of sampling distribution of F-Ratio nu1 <- 9; nu2 <- 9 yflim <- 1.1*50000*max(df(0:60,nu1,nu2)) hist(F12,breaks=seq(0,60,.5),ylim=c(0,yflim),xlab="F", main="Sampling Distribution of F-Ratio",col="red") lines(seq(0,60,.5),(100000/2)*df(seq(0,60,.5),nu1,nu2),col="blue", lty=1) # Truncated (<=10) Histogram of sampling distribution of F-Ratio nu1 <- 9; nu2 <- 9 yflim <- 1.1*(100000/100)*max(df(seq(0,10,.01),nu1,nu2)) hist(F12[F12<=10],breaks=seq(0,10,.01),ylim=c(0,yflim),xlab="F", main="Sampling Distribution of F-Ratio",col="red") lines(seq(0,10,.01),(100000/100)*df(seq(0,10,.01),nu1,nu2),col="blue", lty=1,lwd=4)