> crabs <- read.table("crab.dat",header=T) > crabs color spine width satellites weight 1 3 3 28.3 8 3050 2 4 3 22.5 0 1550 3 2 1 26.0 9 2300 4 4 3 24.8 0 2100 5 4 3 26.0 4 2600 6 3 3 23.8 0 2100 .... 173 3 2 24.5 0 2000 > attach(crabs) > y <- ifelse(satellites>0, 1, 0) # Y = a binary indicator of satellites > weight <- weight/1000 # weight in kilograms rather than grams > fit <- glm(y ~ weight, family=binomial(link=logit)) > summary(fit) Coefficients: Value Std. Error t value (Intercept) -3.694725 0.8799734 -4.198678 weight 1.815144 0.3765799 4.820077 Null Deviance: 225.7585 on 172 degrees of freedom Residual Deviance: 195.7371 on 171 degrees of freedom Number of Fisher Scoring Iterations: 4 > openlook() > plot(weight, fitted(fit)) > gam.fit <- gam(y ~ s(weight), family=binomial(link=logit)) > plot(weight, fitted(gam.fit)) > fit.probit <- glm(y ~ weight, family=binomial(link=probit)) > summary(fit.probit) Coefficients: Value Std. Error t value (Intercept) -2.238245 0.5114850 -4.375974 weight 1.099017 0.2150722 5.109989 Residual Deviance: 195.4621 on 171 degrees of freedom Number of Fisher Scoring Iterations: 4 > fit.linear <- glm(y ~ weight, family=gaussian()) > summary(fit.linear) Coefficients: Value Std. Error t value (Intercept) -0.1448709 0.14715447 -0.984482 weight 0.3227033 0.05876347 5.491563 Null Deviance: 39.78035 on 172 degrees of freedom Residual Deviance: 33.81652 on 171 degrees of freedom Number of Fisher Scoring Iterations: 1 > color <- color - 1 # color now takes values 1,2,3,4 > color <- factor(color) # treat color as a factor > options(contrasts=c("contr.treatment")) > fit2 <- glm(y ~ weight + color, family=binomial(link=logit)) > summary(fit2) Coefficients: Value Std. Error t value (Intercept) -3.2571667 1.1980822 -2.7186505 weight 1.6928265 0.3886584 4.3555638 color2 0.1448319 0.7363872 0.1966789 color3 -0.1861351 0.7748596 -0.2402179 color4 -1.2694283 0.8487345 -1.4956718 (Dispersion Parameter for Binomial family taken to be 1 ) Null Deviance: 225.7585 on 172 degrees of freedom Residual Deviance: 188.5423 on 168 degrees of freedom Number of Fisher Scoring Iterations: 4 > linear <- codes(color) # convert back to integer levels > fit3 <- glm(y ~ weight + linear, family=binomial(link=logit)) > summary(fit3) Coefficients: Value Std. Error t value (Intercept) -2.0315755 1.1159103 -1.820555 weight 1.6530509 0.3823248 4.323682 linear -0.5141799 0.2233187 -2.302449 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 190.2688 on 170 degrees of freedom Number of Fisher Scoring Iterations: 4 > wt <- c(1.60, 1.90, 2.15, 2.40, 2.65, 2.90, 3.20) > yes <- c(6,14,16,15,14,20,26) > no <- c(10,17,14,7,10,1,3) > data <- matrix(append(yes,no),ncol=2) # matrix of binomial counts > fit.group <- glm(data ~ wt, family=binomial(link=logit)) > summary(fit.group) Coefficients: Value Std. Error t value (Intercept) -3.541075 0.873955 -4.051781 wt 1.748317 0.372877 4.688723 Null Deviance: 32.99474 on 6 degrees of freedom Residual Deviance: 6.727199 on 5 degrees of freedom Number of Fisher Scoring Iterations: 3 > wt; yes/(yes + no); fitted(fit.group) [1] 1.60 1.90 2.15 2.40 2.65 2.90 3.20 > [1] 0.3750000 0.4516129 0.5333333 0.6818182 0.5833333 0.9523810 0.8965517 > 1 2 3 4 5 6 7 0.3221809 0.4454006 0.5542376 0.6581108 0.7487517 0.8218666 0.8863049 > level <- factor(c(1:7)) > fit.saturated <- glm(data ~ level, family=binomial(link=logit)) > summary(fit.saturated) Coefficients: Value Std. Error t value (Intercept) -0.5108256 0.5163978 -0.9892096 level2 0.3166696 0.6300149 0.5026383 level3 0.6443570 0.6329259 1.0180607 level4 1.2729657 0.6900656 1.8447025 level5 0.8472979 0.6618876 1.2801234 level6 3.5065579 1.1474604 3.0559292 level7 2.6703099 0.7990379 3.3419065 Null Deviance: 32.99474 on 6 degrees of freedom Residual Deviance: 0 on 0 degrees of freedom Number of Fisher Scoring Iterations: 4 > fit.null <- glm(data ~ 1, family=binomial(link=logit)) > summary(fit.null) Coefficients: Value Std. Error t value (Intercept) 0.5823958 0.432232 1.347415 Null Deviance: 32.99474 on 6 degrees of freedom Residual Deviance: 32.99474 on 6 degrees of freedom Number of Fisher Scoring Iterations: 0 > n <- yes + no > expected <- fitted(fit.null)*n # estimated expected numbers of "yes" > predict <- fitted(fit.null) # predicted proportions of "yes" > yes/n; predict # sample and predicted proportions [1] 0.3750000 0.4516129 0.5333333 0.6818182 0.5833333 0.9523810 0.8965517 > [1] 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185 > pearson <- (yes - expected)/sqrt(n*predict*(1-predict)) # Pearson residual > yes; expected; pearson [1] 6 14 16 15 14 20 26 > [1] 10.26590 19.89017 19.24855 14.11561 15.39884 13.47399 18.60694 > [1] -2.2240218 -2.2061549 -1.2368538 0.3932084 -0.5954598 2.9697983 2.8629530 > q()