options ps=54 ls=76; data q4_2002; infile 'A:\q4_2002.out'; input city1 $ city2 $ ave_fare distance; if distance <= 250 then distgrp=1; else if distance <= 350 then distgrp=2; else if distance <= 450 then distgrp=3; else if distance <= 550 then distgrp=4; else if distance <= 650 then distgrp=5; else if distance <= 750 then distgrp=6; else if distance <= 850 then distgrp=7; else if distance <= 950 then distgrp=8; else if distance <= 1050 then distgrp=9; else if distance <= 1150 then distgrp=10; else if distance <= 1250 then distgrp=11; else if distance <= 1350 then distgrp=12; else if distance <= 1450 then distgrp=13; else if distance <= 1550 then distgrp=14; else if distance <= 1650 then distgrp=15; else if distance <= 1750 then distgrp=16; else if distance <= 1850 then distgrp=17; else if distance <= 1950 then distgrp=18; else if distance <= 2050 then distgrp=19; else if distance <= 2150 then distgrp=20; else if distance <= 2250 then distgrp=21; else if distance <= 2350 then distgrp=22; else if distance <= 2450 then distgrp=23; else if distance <= 2550 then distgrp=24; else if distance > 2550 then distgrp=25; run; proc reg; model ave_fare=distance; output out=airout p=yhat; run; proc sort data=airout; by distgrp; run; data q4_2002a; set airout; by distgrp; retain sumgrpy sumgrpyhat sumgrpx numgrp; if first.distgrp then do; sumgrpy=0; sumgrpx=0; sumgrpyhat=0; numgrp=0; end; sumgrpy=sumgrpy+ave_fare; sumgrpyhat=sumgrpyhat+yhat; sumgrpx=sumgrpx+distance; numgrp=numgrp+1; if last.distgrp then do; meangrpy=sumgrpy/numgrp; meangrpyhat=sumgrpyhat/numgrp; meangrpx=sumgrpx/numgrp; do i=1 to numgrp; output; end; end; keep distgrp meangrpy meangrpyhat meangrpx; run; data q4_2002b; merge q4_2002a airout; res=ave_fare-meangrpyhat; pe=ave_fare-meangrpy; lf=meangrpy-meangrpyhat; pe2=pe**2; lf2=lf**2; run; proc print; var res pe2 lf2; sum pe2 lf2; run; proc sort data=q4_2002b; by distgrp; run; data distgrpplt; set q4_2002b; by distgrp; if first.distgrp; run; symbol1 c=black l=1 i=join; symbol2 c=black v=:; proc gplot; plot meangrpyhat*meangrpx=1 meangrpy*meangrpx=2 / overlay frame; title 'Pseudo-Test for Lack-of-Fit'; run; quit; quit;