> crabs <- read.table("crab.dat",header=T)
> crabs
color spine width satellites weight
1 3 3 28.3 8 3050
2 4 3 22.5 0 1550
3 2 1 26.0 9 2300
4 4 3 24.8 0 2100
5 4 3 26.0 4 2600
6 3 3 23.8 0 2100
....
173 3 2 24.5 0 2000
> attach(crabs)
> y <- ifelse(satellites>0, 1, 0) # Y = a binary indicator of satellites
> weight <- weight/1000 # weight in kilograms rather than grams
> fit <- glm(y ~ weight, family=binomial(link=logit))
> summary(fit)
Coefficients:
Value Std. Error t value
(Intercept) -3.694725 0.8799734 -4.198678
weight 1.815144 0.3765799 4.820077
Null Deviance: 225.7585 on 172 degrees of freedom
Residual Deviance: 195.7371 on 171 degrees of freedom
Number of Fisher Scoring Iterations: 4
> openlook()
> plot(weight, fitted(fit))
> gam.fit <- gam(y ~ s(weight), family=binomial(link=logit))
> plot(weight, fitted(gam.fit))
> fit.probit <- glm(y ~ weight, family=binomial(link=probit))
> summary(fit.probit)
Coefficients:
Value Std. Error t value
(Intercept) -2.238245 0.5114850 -4.375974
weight 1.099017 0.2150722 5.109989
Residual Deviance: 195.4621 on 171 degrees of freedom
Number of Fisher Scoring Iterations: 4
> fit.linear <- glm(y ~ weight, family=gaussian())
> summary(fit.linear)
Coefficients:
Value Std. Error t value
(Intercept) -0.1448709 0.14715447 -0.984482
weight 0.3227033 0.05876347 5.491563
Null Deviance: 39.78035 on 172 degrees of freedom
Residual Deviance: 33.81652 on 171 degrees of freedom
Number of Fisher Scoring Iterations: 1
> color <- color - 1 # color now takes values 1,2,3,4
> color <- factor(color) # treat color as a factor
> options(contrasts=c("contr.treatment"))
> fit2 <- glm(y ~ weight + color, family=binomial(link=logit))
> summary(fit2)
Coefficients:
Value Std. Error t value
(Intercept) -3.2571667 1.1980822 -2.7186505
weight 1.6928265 0.3886584 4.3555638
color2 0.1448319 0.7363872 0.1966789
color3 -0.1861351 0.7748596 -0.2402179
color4 -1.2694283 0.8487345 -1.4956718
(Dispersion Parameter for Binomial family taken to be 1 )
Null Deviance: 225.7585 on 172 degrees of freedom
Residual Deviance: 188.5423 on 168 degrees of freedom
Number of Fisher Scoring Iterations: 4
> linear <- codes(color) # convert back to integer levels
> fit3 <- glm(y ~ weight + linear, family=binomial(link=logit))
> summary(fit3)
Coefficients:
Value Std. Error t value
(Intercept) -2.0315755 1.1159103 -1.820555
weight 1.6530509 0.3823248 4.323682
linear -0.5141799 0.2233187 -2.302449
(Dispersion Parameter for Binomial family taken to be 1 )
Residual Deviance: 190.2688 on 170 degrees of freedom
Number of Fisher Scoring Iterations: 4
> wt <- c(1.60, 1.90, 2.15, 2.40, 2.65, 2.90, 3.20)
> yes <- c(6,14,16,15,14,20,26)
> no <- c(10,17,14,7,10,1,3)
> data <- matrix(append(yes,no),ncol=2) # matrix of binomial counts
> fit.group <- glm(data ~ wt, family=binomial(link=logit))
> summary(fit.group)
Coefficients:
Value Std. Error t value
(Intercept) -3.541075 0.873955 -4.051781
wt 1.748317 0.372877 4.688723
Null Deviance: 32.99474 on 6 degrees of freedom
Residual Deviance: 6.727199 on 5 degrees of freedom
Number of Fisher Scoring Iterations: 3
> wt; yes/(yes + no); fitted(fit.group)
[1] 1.60 1.90 2.15 2.40 2.65 2.90 3.20
> [1] 0.3750000 0.4516129 0.5333333 0.6818182 0.5833333 0.9523810 0.8965517
> 1 2 3 4 5 6 7
0.3221809 0.4454006 0.5542376 0.6581108 0.7487517 0.8218666 0.8863049
> level <- factor(c(1:7))
> fit.saturated <- glm(data ~ level, family=binomial(link=logit))
> summary(fit.saturated)
Coefficients:
Value Std. Error t value
(Intercept) -0.5108256 0.5163978 -0.9892096
level2 0.3166696 0.6300149 0.5026383
level3 0.6443570 0.6329259 1.0180607
level4 1.2729657 0.6900656 1.8447025
level5 0.8472979 0.6618876 1.2801234
level6 3.5065579 1.1474604 3.0559292
level7 2.6703099 0.7990379 3.3419065
Null Deviance: 32.99474 on 6 degrees of freedom
Residual Deviance: 0 on 0 degrees of freedom
Number of Fisher Scoring Iterations: 4
> fit.null <- glm(data ~ 1, family=binomial(link=logit))
> summary(fit.null)
Coefficients:
Value Std. Error t value
(Intercept) 0.5823958 0.432232 1.347415
Null Deviance: 32.99474 on 6 degrees of freedom
Residual Deviance: 32.99474 on 6 degrees of freedom
Number of Fisher Scoring Iterations: 0
> n <- yes + no
> expected <- fitted(fit.null)*n # estimated expected numbers of "yes"
> predict <- fitted(fit.null) # predicted proportions of "yes"
> yes/n; predict # sample and predicted proportions
[1] 0.3750000 0.4516129 0.5333333 0.6818182 0.5833333 0.9523810 0.8965517
> [1] 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185
> pearson <- (yes - expected)/sqrt(n*predict*(1-predict)) # Pearson residual
> yes; expected; pearson
[1] 6 14 16 15 14 20 26
> [1] 10.26590 19.89017 19.24855 14.11561 15.39884 13.47399 18.60694
> [1] -2.2240218 -2.2061549 -1.2368538 0.3932084 -0.5954598 2.9697983 2.8629530
> q()