###########################################################################
###  Lec1.r   STA 4211  Script file with code for lecture notes pp. 1 - 105
###########################################################################
## Normal pdf
dnorm(x=seq(-3,3),mean=0,sd=1)
# Note the trend in the values.  Where does the max occur?
# Draw the curve.  Default is standard normal.
curve(dnorm(x),from=-3,to=3)
##
##  Distribution of number of aces in two cards
x <- c(0,1,2)
p <- c(0.8507, 0.1448, 0.0045)
### Check P(X = 0)
48/52 * 47/51
###
ev <- sum(x*p); ev
var <- sum((x-ev)^2 * p); sd <- sqrt(var); var; sd
## p. 23, t quantiles
qt(.975,df=c(5,10,20,30))
# To find the value with area .025 to the right, get the .975 quantile.
##
## p. 24, One-sample t CI
x <- c(149,151,150)
t.test(x)
# Just ignore the hypothesis test; read the line with the CI only.
#  Better:  Can you figure out a way to output just the CI.
#  Perhaps:  t.test(x)$conf.int
##
## p. 32, Two-sample t test and CI
S <- c(2,6,4,6,4,5,5,6,5,6,7,5,4,8)
NS <- c(1,5,3,5,3,4,4,5,4,5,6,4,3,7,4)
t.test(S,NS)
t.test(S,NS,var.equal=TRUE)
##
## p. 53 Obtain the randomization for a CRD.
set.seed(12)
sample(1:8, replace=FALSE)
##
## Ex. Kenton Food Company
##  Use software to obtain SS, df, and F test
kenton <- read.table("Datasets/CH16TA01.txt",
                     col.names=c("Y","package","id"))
# The predictor variable must be a factor in R, so
# convert the numeric variable package to a factor.
kenton$package <- as.factor(kenton$package)
kenton$id <- as.factor(kenton$id) # not necessary
# Get the line plot of factor level means, and the comparative
#  boxplots.
plot.design(Y ~ package, data=kenton)
plot(Y ~ package, data=kenton)
# Dotplots are better for small samples than boxplots
# Get comparative dotplots
stripchart(kenton$Y ~ kenton$package,pch=1)
# p. 74 Summary statistics.  Fcn aggregate() is useful.
cellmeans <- aggregate(kenton$Y, by=list(package=kenton$package), FUN=mean)
cellsds <- aggregate(kenton$Y, by=list(package=kenton$package), FUN=sd)
cellmeans; cellsds
# Fit the one-way ANOVA model
fit <- aov(Y ~ package, data=kenton)
# Obtain the SS, df, F test and ANOVA table.
summary(fit)
# Here's how you would use the null distribution to obtain the
# P-value and/or the level .05 critical constant
1-pf(18.591,df1=3,df2=15)
qf(.95,df1=3,df2=15) #3.2874
##
## Set up dummy variables to fit the factor-effects model using
##  the regression function lm()
x1 <- c(rep(1,5),rep(0,9),rep(-1,5))
x2 <- c(rep(0,5),rep(1,5),rep(0,4),rep(-1,5))
x3 <- c(rep(0,10),rep(1,4),rep(-1,5))
lm3 <- lm(Y ~ x1 + x2 + x3, data=kenton); summary(lm3)
## 
##
##### Rust data
rust <- read.table("Datasets/CH17TA02.txt", header=FALSE)
names(rust) <- c("Y","brand","obs")
rust$brand <- factor(rust$brand,labels=c("A","B","C","D"))
rust[1:3,] # Print the first three observations.
## Obtain the plot of factor level means, and
##  the comparative boxplots.
plot.design(Y ~ brand, data=rust)
plot(Y ~ brand, data=rust)
##  Fit the one-way ANOVA model
##   and print the ANOVA table.
rust.aov <- aov(Y~ brand, data=rust)
anova(rust.aov)
##  Also get dotplots by group
stripchart(rust$Y~rust$brand, method="jitter",jitter=0.05,pch=1)
## Reminder of how to obtain summary statistics in R
ybar <- aggregate(x=rust$Y, by=data.frame(rust$brand), mean)
##
 ##  Obtain Tukey's HSD CIs for all pairwise differences.
TukeyHSD(rust.aov, "brand")
## plot(TukeyHSD(rust.aov)) # This line gives just the plot
##
## Below we obtain the critical constant for Tukey's HSD using qtukey():
##  This is for by-hand calculations.
qtukey(.95,nmeans=4,df=36)/sqrt(2)
## p. 102 Find the Scheffe critical constant for all possible contrasts.
S <- sqrt(3*qf(.95,3,36)); S
##
## p. 105 Find the Bonferroni critical constant for all pairwise comparisons (k=6).
B <- qt(1-.05/(2*6), df=36); B
## Clean up from rust before moving on:
rm(rust,rust.aov,ybar,S)