perf <- read.csv("http://users.stat.ufl.edu/~winner/data/perfume_fakeorig.csv") attach(perf); names(perf) summary(ClMn[Original==0]); summary(ClMn[Original==1]) summary(NaMn[Original==0]); summary(NaMn[Original==1]) summary(KMn[Original==0]); summary(KMn[Original==1]) summary(EthnolMn[Original==0]); summary(EthnolMn[Original==1]) mod1 <- glm(Original ~ NaMn + KMn, binomial("logit")) summary(mod1) p.Original <- ifelse(predict(mod1, type="response") <= 0.5, 0, 1) cbind(Original, predict(mod1, type="response"), p.Original) table(Original, p.Original) ### Matrix Form (Note: all n_i=1, m=n) n <- length(Original) X <- matrix(c(rep(1, n), NaMn, KMn), ncol=3) # Set beta0 = logit(P(Orig)) = log(P(Orig) / (1 - P(Orig))) = log(0.5 / 0.5)=0 beta.old <- matrix(c(0,0,0), ncol=1) # Create nx1 vector of pi-hat values pi.hat <- exp(X %*% beta.old) / (1 + exp(X %*% beta.old)) # Create the W matrix, first with vector of pi * (1 - pi), then diagonal matrix w.v <- pi.hat * (1 - pi.hat) W.M <- matrix(rep(0, n^2), ncol=n) for (i in 1:n) W.M[i,i] <- w.v[i,1] # Create vector of predicted values pi-hat mu.Y <- pi.hat num.iter <- 0 # Count number of iterations beta.diff <- 1000 # Set beta.diff as high value Y <- matrix(Original, ncol=1) # Data Y as matrix (1=Original, 0=Fake) while(beta.diff > 0.0000001) { num.iter <- num.iter + 1 g.beta <- t(X) %*% (Y - mu.Y) # dl/dB G.beta <- -t(X) %*% W.M %*% X # d2l/(dBdB') beta.new <- beta.old - solve(G.beta) %*% g.beta # beta.new pi.hat <- exp(X %*% beta.new) / (1 + exp(X %*% beta.new)) w.v <- pi.hat * (1 - pi.hat) W.M <- matrix(rep(0, n^2), ncol=n) for (i in 1:n) W.M[i,i] <- w.v[i,1] mu.Y <- pi.hat beta.diff <- sum((beta.new - beta.old)^2) # sum of squared differences beta.old <- beta.new # Update beta.old } SE.beta <- sqrt(diag(solve(t(X) %*% W.M %*% X))) # SE's = diag(INV(X'WX)) z.beta <- beta.new / SE.beta p.beta <- 2 * (1 - pnorm(abs(z.beta))) LB.beta <- beta.new - 1.96 * SE.beta UB.beta <- beta.new + 1.96 * SE.beta beta.out <- cbind(beta.new, SE.beta, z.beta, p.beta, LB.beta, UB.beta) colnames(beta.out) <- c("Estimate", "Std Err", "z", "Pr>|z|", "LB", "UB") rownames(beta.out) <- c("Intercept", "Na", "K") round(beta.out, 4)