bmi.sim <- read.csv("http://www.stat.ufl.edu/~winner/data/nhl_nba_ebl_bmi.csv", header=TRUE) attach(bmi.sim); names(bmi.sim) N <- rep(0,3) N[1] <- 717 N[2] <- 505 N[3] <- 526 NHL_BMI <- NHL_BMI[1:N[1]] NBA_BMI <- NBA_BMI[1:N[2]] EPL_BMI <- EPL_BMI[1:N[3]] par(mfrow=c(3,1)) hist(NHL_BMI,breaks=25) hist(NBA_BMI,breaks=25) hist(EPL_BMI,breaks=25) par(mfrow=c(1,1)) #NHL_RAN <- runif(N[1],-0.5,0.5) #NBA_RAN <- runif(N[2],-0.5,0.5) #EPL_RAN <- runif(N[3],-0.5,0.5) sigma <- rep(0,3) (sigma[1] <- sd(NHL_BMI)) (sigma[2] <- sd(NBA_BMI)) (sigma[3] <- sd(EPL_BMI)) mu <- rep(0,3) (mu[1] <- mean(NHL_BMI)) (mu[2] <- mean(NBA_BMI)) (mu[3] <- mean(EPL_BMI)) sampsz <- rep(3,0) sampsz[1] <- 10 sampsz[2] <- 6 sampsz[3] <- 8 sumssz <- sum(sampsz) num.trt <- length(sampsz) n.all <- sampsz[1]+sampsz[2]+sampsz[3] N.sim <- 1 set.seed(6529) ybar1 <- numeric(N.sim) sd1 <- numeric(N.sim) ybar2 <- numeric(N.sim) sd2 <- numeric(N.sim) ybar3 <- numeric(N.sim) sd3 <- numeric(N.sim) ybar <- numeric(N.sim) for (i in 1:N.sim) { samp1 <- sample(1:N[1],sampsz[1],replace=F) samp2 <- sample(1:N[2],sampsz[2],replace=F) samp3 <- sample(1:N[3],sampsz[3],replace=F) y1 <- NHL_BMI[samp1] y2 <- NBA_BMI[samp2] y3 <- EPL_BMI[samp3] y.all <- c(y1,y2,y3) ybar1[i] <- mean(y1); sd1[i] <- sd(y1) ybar2[i] <- mean(y2); sd2[i] <- sd(y2) ybar3[i] <- mean(y3); sd3[i] <- sd(y3) ybar[i] <- (sum(y1)+sum(y2)+sum(y3))/n.all } SST <- sampsz[1]*(ybar1-ybar)^2 + sampsz[2]*(ybar2-ybar)^2 + sampsz[3]*(ybar3-ybar)^2 SSE <- (sampsz[1]-1)*sd1^2 + (sampsz[2]-1)*sd2^2 + (sampsz[3]-1)*sd3^2 MST <- SST/(num.trt-1) MSE <- SSE/(sumssz-num.trt) F <- MST/MSE (f.alpha <- qf(.95,num.trt-1,sumssz-num.trt)) sum(F >= f.alpha)/N.sim ## Matrix with Cell means model (Y <- matrix(c(y1,y2,y3),ncol=1)) n.Y <- nrow(Y) X.1 <- matrix(c(rep(1,sampsz[1]),rep(0,sampsz[2]+sampsz[3]+sampsz[1]), rep(1,sampsz[2]),rep(0,sampsz[3]+sampsz[1]+sampsz[2]),rep(1,sampsz[3])), ncol=3) X.1 g.X <- ncol(X.1) (beta.1 <- solve(t(X.1) %*% X.1) %*% t(X.1) %*% Y) PX.1 <- X.1 %*% solve(t(X.1) %*% X.1) %*% t(X.1) I_n <- diag(n.Y) J_n_n <- matrix(rep((1/n.Y),n.Y*n.Y),ncol=n.Y) (ssto <- t(Y) %*% (I_n - J_n_n) %*% Y); (sse <- t(Y) %*% (I_n - PX.1) %*% Y); (mse <- sse[1,1]/(n.Y-g.X)) (sstr <- t(Y) %*% (PX.1 - J_n_n) %*% Y); (mstr <- sstr[1,1]/(g.X-1)) (F_obs <- mstr/mse); (P_F <- 1-pf(F_obs,g.X-1,n.Y-g.X)) s2_beta.1 <- mse * solve(t(X.1) %*% X.1) se_beta.1 <- sqrt(diag(s2_beta.1)) t_beta.1 <- beta.1/se_beta.1 p_t_beta.1 <- 2*(1-pt(abs(t_beta.1),n.Y-g.X)) beta.1_lo <- beta.1 - qt(.975,n.Y-g.X) beta.1_hi <- beta.1 + qt(.975,n.Y-g.X) cm.out <- cbind(beta.1,se_beta.1,t_beta.1,p_t_beta.1,beta.1_lo,beta.1_hi) colnames(cm.out) <- c("beta", "SE", "t", "Pr>|t|","LB","UB") round(cm.out,4) ### alpha1*=0 mu*=mu+alpha X.2 <- matrix(c(rep(1,n.Y),rep(0,sampsz[1]),rep(1,sampsz[2]), rep(0,sampsz[3]+sampsz[1]+sampsz[2]),rep(1,sampsz[3])), ncol=3) X.2 g.X <- ncol(X.2) (beta.2 <- solve(t(X.2) %*% X.2) %*% t(X.2) %*% Y) PX.2 <- X.2 %*% solve(t(X.2) %*% X.2) %*% t(X.2) I_n <- diag(n.Y) J_n_n <- matrix(rep((1/n.Y),n.Y*n.Y),ncol=n.Y) (ssto <- t(Y) %*% (I_n - J_n_n) %*% Y); (sse <- t(Y) %*% (I_n - PX.3) %*% Y); (mse <- sse[1,1]/(n.Y-g.X)) (sstr <- t(Y) %*% (PX.3 - J_n_n) %*% Y); (mstr <- sstr[1,1]/(g.X-1)) (F_obs <- mstr/mse); (P_F <- 1-pf(F_obs,g.X-1,n.Y-g.X)) s2_beta.2 <- mse * solve(t(X.2) %*% X.2) se_beta.2 <- sqrt(diag(s2_beta.2)) t_beta.2 <- beta.2/se_beta.2 p_t_beta.2 <- 2*(1-pt(abs(t_beta.2),n.Y-g.X)) beta.2_lo <- beta.2 - qt(.975,n.Y-g.X) beta.2_hi <- beta.2 + qt(.975,n.Y-g.X) a1.out <- cbind(beta.2,se_beta.2,t_beta.2,p_t_beta.2,beta.2_lo,beta.2_hi) colnames(a1.out) <- c("beta", "SE", "t", "Pr>|t|","LB","UB") round(a1.out,4) ### sum(a_i)=0 X.3 <- matrix(c(rep(1,n.Y),rep(1,sampsz[1]),rep(0,sampsz[2]),rep(-1,sampsz[3]), rep(0,sampsz[1]),rep(1,sampsz[2]),rep(-1,sampsz[3])), ncol=3) X.3 g.X <- ncol(X.3) (beta.3 <- solve(t(X.3) %*% X.3) %*% t(X.3) %*% Y) PX.3 <- X.3 %*% solve(t(X.3) %*% X.3) %*% t(X.3) I_n <- diag(n.Y) J_n_n <- matrix(rep((1/n.Y),n.Y*n.Y),ncol=n.Y) (ssto <- t(Y) %*% (I_n - J_n_n) %*% Y); (sse <- t(Y) %*% (I_n - PX.3) %*% Y); (mse <- sse[1,1]/(n.Y-g.X)) (sstr <- t(Y) %*% (PX.3 - J_n_n) %*% Y); (mstr <- sstr[1,1]/(g.X-1)) (F_obs <- mstr/mse); (P_F <- 1-pf(F_obs,g.X-1,n.Y-g.X)) s2_beta.3 <- mse * solve(t(X.3) %*% X.3) se_beta.3 <- sqrt(diag(s2_beta.3)) t_beta.3 <- beta.3/se_beta.3 p_t_beta.3 <- 2*(1-pt(abs(t_beta.3),n.Y-g.X)) beta.3_lo <- beta.3 - qt(.975,n.Y-g.X) beta.3_hi <- beta.3 + qt(.975,n.Y-g.X) sum_a.out <- cbind(beta.3,se_beta.3,t_beta.3,p_t_beta.3,beta.3_lo,beta.3_hi) colnames(sum_a.out) <- c("beta", "SE", "t", "Pr>|t|","LB","UB") round(sum_a.out,4) ### sum(n_ia_i) = 0 X.4 <- matrix(c(rep(1,n.Y),rep(1,sampsz[1]),rep(0,sampsz[2]), rep(-sampsz[1]/sampsz[3],sampsz[3]), rep(0,sampsz[1]),rep(1,sampsz[2]), rep(-sampsz[2]/sampsz[3],sampsz[3])), ncol=3) X.4 g.X <- ncol(X.4) (beta.4 <- solve(t(X.4) %*% X.4) %*% t(X.4) %*% Y) PX.4 <- X.4 %*% solve(t(X.4) %*% X.4) %*% t(X.4) I_n <- diag(n.Y) J_n_n <- matrix(rep((1/n.Y),n.Y*n.Y),ncol=n.Y) (ssto <- t(Y) %*% (I_n - J_n_n) %*% Y); (sse <- t(Y) %*% (I_n - PX.4) %*% Y); (mse <- sse[1,1]/(n.Y-g.X)) (sstr <- t(Y) %*% (PX.4 - J_n_n) %*% Y); (mstr <- sstr[1,1]/(g.X-1)) (F_obs <- mstr/mse); (P_F <- 1-pf(F_obs,g.X-1,n.Y-g.X)) s2_beta.4 <- mse * solve(t(X.4) %*% X.4) se_beta.4 <- sqrt(diag(s2_beta.4)) t_beta.4 <- beta.4/se_beta.4 p_t_beta.4 <- 2*(1-pt(abs(t_beta.4),n.Y-g.X)) beta.4_lo <- beta.4 - qt(.975,n.Y-g.X) beta.4_hi <- beta.4 + qt(.975,n.Y-g.X) sum_na.out <- cbind(beta.4,se_beta.4,t_beta.4,p_t_beta.4,beta.4_lo,beta.4_hi) colnames(sum_na.out) <- c("beta", "SE", "t", "Pr>|t|","LB","UB") round(sum_na.out,4) ### Helmert contrast X.5 <- matrix(c(rep(1,n.Y),rep(-1,sampsz[1]),rep(1,sampsz[2]), rep(0,sampsz[3]),rep(-1,sampsz[1]),rep(-1,sampsz[2]), rep(2,sampsz[3])), ncol=3) X.5 g.X <- ncol(X.5) (beta.5 <- solve(t(X.5) %*% X.5) %*% t(X.5) %*% Y) PX.5 <- X.5 %*% solve(t(X.5) %*% X.5) %*% t(X.5) I_n <- diag(n.Y) J_n_n <- matrix(rep((1/n.Y),n.Y*n.Y),ncol=n.Y) (ssto <- t(Y) %*% (I_n - J_n_n) %*% Y); (sse <- t(Y) %*% (I_n - PX.5) %*% Y); (mse <- sse[1,1]/(n.Y-g.X)) (sstr <- t(Y) %*% (PX.5 - J_n_n) %*% Y); (mstr <- sstr[1,1]/(g.X-1)) (F_obs <- mstr/mse); (P_F <- 1-pf(F_obs,g.X-1,n.Y-g.X)) s2_beta.5 <- mse * solve(t(X.5) %*% X.5) se_beta.5 <- sqrt(diag(s2_beta.5)) t_beta.5 <- beta.5/se_beta.5 p_t_beta.5 <- 2*(1-pt(abs(t_beta.5),n.Y-g.X)) beta.5_lo <- beta.5 - qt(.975,n.Y-g.X) beta.5_hi <- beta.5 + qt(.975,n.Y-g.X) helmert.out <- cbind(beta.5,se_beta.5,t_beta.5,p_t_beta.5,beta.5_lo,beta.5_hi) colnames(helmert.out) <- c("beta", "SE", "t", "Pr>|t|","LB","UB") round(helmert.out,4) ### aov function league <- c(rep(1,sampsz[1]),rep(2,sampsz[2]),rep(3,sampsz[3])) league <- factor(league) ### alpha1*=0 mu*=mu+alpha1 (default) bmi.aov1 <- aov(y.all ~ league) summary.lm(bmi.aov1) anova(bmi.aov1) ### Sum(alpha_i) = 0 options(contrasts=c("contr.sum","contr.poly")) bmi.aov2 <- aov(y.all ~ league) summary.lm(bmi.aov2) anova(bmi.aov2) ### Helmert options(contrasts=c("contr.helmert","contr.poly")) bmi.aov3 <- aov(y.all ~ league) summary.lm(bmi.aov3) anova(bmi.aov3) ## Cell Means options(contrasts=c("contr.treatment","contr.poly")) bmi.aov4 <- aov(y.all ~ league - 1) summary.lm(bmi.aov4) anova(bmi.aov4)