crab

> crabs <- read.table("crab.dat",header=T)
> crabs
   color spine width satellites weight 
 1     3     3  28.3          8   3050
 2     4     3  22.5          0   1550
 3     2     1  26.0          9   2300
 4     4     3  24.8          0   2100
 5     4     3  26.0          4   2600
 6     3     3  23.8          0   2100
....
173     3     2  24.5          0   2000

> attach(crabs)
> y <- ifelse(satellites>0, 1, 0)  #  Y = a binary indicator of satellites
> weight <- weight/1000  #  weight in kilograms rather than grams

> fit <- glm(y ~ weight, family=binomial(link=logit))
> summary(fit)

Coefficients:
                Value Std. Error   t value 
(Intercept) -3.694725  0.8799734 -4.198678
     weight  1.815144  0.3765799  4.820077

    Null Deviance: 225.7585 on 172 degrees of freedom

Residual Deviance: 195.7371 on 171 degrees of freedom

Number of Fisher Scoring Iterations: 4 

> openlook()
> plot(weight, fitted(fit))

> gam.fit <- gam(y ~ s(weight), family=binomial(link=logit))
> plot(weight, fitted(gam.fit))

> fit.probit <- glm(y ~ weight, family=binomial(link=probit))
> summary(fit.probit)

Coefficients:
                Value Std. Error   t value 
(Intercept) -2.238245  0.5114850 -4.375974
     weight  1.099017  0.2150722  5.109989

Residual Deviance: 195.4621 on 171 degrees of freedom

Number of Fisher Scoring Iterations: 4 

> fit.linear <- glm(y ~ weight, family=gaussian())
> summary(fit.linear)

Coefficients:
                 Value Std. Error   t value 
(Intercept) -0.1448709 0.14715447 -0.984482
     weight  0.3227033 0.05876347  5.491563

    Null Deviance: 39.78035 on 172 degrees of freedom

Residual Deviance: 33.81652 on 171 degrees of freedom

Number of Fisher Scoring Iterations: 1 

> color <- color - 1  #  color now takes values 1,2,3,4
> color <- factor(color)  #  treat color as a factor
> options(contrasts=c("contr.treatment"))
> fit2 <- glm(y ~ weight + color, family=binomial(link=logit))
> summary(fit2)

Coefficients:
                 Value Std. Error    t value 
(Intercept) -3.2571667  1.1980822 -2.7186505
     weight  1.6928265  0.3886584  4.3555638
     color2  0.1448319  0.7363872  0.1966789
     color3 -0.1861351  0.7748596 -0.2402179
     color4 -1.2694283  0.8487345 -1.4956718

(Dispersion Parameter for Binomial family taken to be 1 )

    Null Deviance: 225.7585 on 172 degrees of freedom

Residual Deviance: 188.5423 on 168 degrees of freedom

Number of Fisher Scoring Iterations: 4 

> linear <- codes(color)  #  convert back to integer levels
> fit3 <- glm(y ~ weight + linear, family=binomial(link=logit))
> summary(fit3)

Coefficients:
                 Value Std. Error   t value 
(Intercept) -2.0315755  1.1159103 -1.820555
     weight  1.6530509  0.3823248  4.323682
     linear -0.5141799  0.2233187 -2.302449

(Dispersion Parameter for Binomial family taken to be 1 )

Residual Deviance: 190.2688 on 170 degrees of freedom

Number of Fisher Scoring Iterations: 4 

> wt <- c(1.60, 1.90, 2.15, 2.40, 2.65, 2.90, 3.20)
> yes <- c(6,14,16,15,14,20,26)
> no <- c(10,17,14,7,10,1,3)
> data <- matrix(append(yes,no),ncol=2) # matrix of binomial counts
> fit.group <- glm(data ~ wt, family=binomial(link=logit))
> summary(fit.group)

Coefficients:
                Value Std. Error   t value 
(Intercept) -3.541075   0.873955 -4.051781
         wt  1.748317   0.372877  4.688723

    Null Deviance: 32.99474 on 6 degrees of freedom

Residual Deviance: 6.727199 on 5 degrees of freedom

Number of Fisher Scoring Iterations: 3 

> wt; yes/(yes + no); fitted(fit.group)
[1] 1.60 1.90 2.15 2.40 2.65 2.90 3.20
> [1] 0.3750000 0.4516129 0.5333333 0.6818182 0.5833333 0.9523810 0.8965517
>          1         2         3         4         5         6         7 
 0.3221809 0.4454006 0.5542376 0.6581108 0.7487517 0.8218666 0.8863049
> level <- factor(c(1:7))
> fit.saturated <- glm(data ~ level, family=binomial(link=logit))
> summary(fit.saturated)

Coefficients:
                 Value Std. Error    t value 
(Intercept) -0.5108256  0.5163978 -0.9892096
     level2  0.3166696  0.6300149  0.5026383
     level3  0.6443570  0.6329259  1.0180607
     level4  1.2729657  0.6900656  1.8447025
     level5  0.8472979  0.6618876  1.2801234
     level6  3.5065579  1.1474604  3.0559292
     level7  2.6703099  0.7990379  3.3419065

    Null Deviance: 32.99474 on 6 degrees of freedom

Residual Deviance:  0 on 0 degrees of freedom

Number of Fisher Scoring Iterations: 4 

> fit.null <- glm(data ~ 1, family=binomial(link=logit))
> summary(fit.null)

Coefficients:
                Value Std. Error  t value 
(Intercept) 0.5823958   0.432232 1.347415

    Null Deviance: 32.99474 on 6 degrees of freedom

Residual Deviance: 32.99474 on 6 degrees of freedom

Number of Fisher Scoring Iterations: 0 

> n <- yes + no
> expected <- fitted(fit.null)*n  #  estimated expected numbers of "yes" 
> predict <- fitted(fit.null)     #  predicted proportions of "yes"
> yes/n; predict                  #  sample and predicted proportions 
[1] 0.3750000 0.4516129 0.5333333 0.6818182 0.5833333 0.9523810 0.8965517
> [1] 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185 0.6416185
> pearson <- (yes - expected)/sqrt(n*predict*(1-predict))  # Pearson residual
> yes; expected; pearson
[1]  6 14 16 15 14 20 26
> [1] 10.26590 19.89017 19.24855 14.11561 15.39884 13.47399 18.60694
> [1] -2.2240218 -2.2061549 -1.2368538  0.3932084 -0.5954598  2.9697983  2.8629530
> q()
About this document ...
Alan Agresti 2001-12-28