### Example 1 - Pharmaceutical Suspension Experiment # Read in data, view the first 6 rows data.pharm <- read.csv( "https://www.stat.ufl.edu/~winner/data/std_intensity.csv") head(data.pharm) # plot Y=peakInt (Peak Intensity) vs X=susCon (Suspension Concentration) plot(data.pharm$peakInt ~ data.pharm$susCon, pch=16, cex=0.7, col="red", xlab="Suspension Concentration", ylab="Peak Intensity") # Fit simple linear regression model: peakInt = b0 + b1*susCon + e mod.pharm <- lm(peakInt ~ susCon, data=data.pharm) summary(mod.pharm) # Add fitted equation to plot abline(mod.pharm, col="blue", lwd=1.5) # Obtain grid of susCon levels for plotting CI and PI for fitted values susCon.grid <- 99:175 # Obtain Confidence Interval for population mean peakInt at each susCon.grid CI.grid <- predict(mod.pharm, list(susCon=susCon.grid), interval="confidence") # Obtain Prediction Interval for individual peakInt at each susCon.grid PI.grid <- predict(mod.pharm, list(susCon=susCon.grid), interval="prediction") # Print out head of CI.grid head(CI.grid) # Lower Bound in column 2, upper bound in column 3 (same for PI) # Add CI for the mean response at each X as lines lines(susCon.grid, CI.grid[,2], col="green3", lwd=1.5) lines(susCon.grid, CI.grid[,3], col="green3", lwd=1.5) # Add PI for individual respose at each X as lines lines(susCon.grid, PI.grid[,2], col="purple", lwd=1.5) lines(susCon.grid, PI.grid[,3], col="purple", lwd=1.5) # Add legend in topleft corner legend("topleft", c("data", "yhat", "95% Conf Int", "95% Pred Int"), col=c("red", "blue", "green3", "purple"), lwd=c(NA,1.5,1.5,1.5), pch=c(16,NA,NA,NA)) ## Get residual plots for regression fit par(mfrow=c(2,2)) plot(mod.pharm) par(mfrow=c(1,1)) rm(list=ls(all=TRUE)) ## Example 2 - Airline Stocks ## Read in monthy airline stock orices and adjusted jet fuel price 6/07-7/22 ajf <- read.csv("https://www.stat.ufl.edu/~winner/data/airline_jetfuel.csv") attach(ajf); names(ajf) # Create simple returns for airline stocks for months 2:n and remove first JF Usrs months 1:(n-1) in denominator of simple returns n.month <- nrow(ajf) JF.pc <- JF.adj[-1] AAL.pc <- 100*diff(AAL.AC)/AAL.AC[-n.month] DAL.pc <- 100*diff(DAL.AC)/DAL.AC[-n.month] UAL.pc <- 100*diff(UAL.AC)/UAL.AC[-n.month] # Create data frame, plot the pairs of variables, and compute correlations ajf.pc <- data.frame(JF.pc, AAL.pc, DAL.pc, UAL.pc) pairs(ajf.pc) cor(ajf.pc) detach(ajf) rm(list=ls(all=TRUE)) ### Example 4 - WNBA Team Points and Vegas Prediction wnba <- read.csv( "http://users.stat.ufl.edu/~winner/data/wnba_20102019_ATS_OU.csv") attach(wnba); names(wnba) teamPred <- (OU-tmSprd)/2 ## tmSprd is negative if favored N.games <- length(teamPred) mod.wnba <- lm(teamPts ~ teamPred) summary(mod.wnba) # win.graph(height=5.5, width=7.0) plot(jitter(teamPts,0.5) ~ teamPred, pch=16, cex=.6) abline(mod.wnba, col="red", lwd=2) abline(0,1, col="blue", lwd=2) legend("bottomright", c("Data", "OLS", "E{Y}=X"), pch=c(16,NA,NA), col=c("black", "red", "blue"), lwd=2) detach(wnba) rm(list=ls(all=TRUE)) ### Example 6 - Temperatures Trends in Time ### Read in all city data data_all <- read.csv( "http://www.stat.ufl.edu/~winner/data/weather_NOAA/allcities_annual_19602020.csv") head(data_all) ### You will have 6 cities (These are read in alphabetical order) faa <- c("BOS","DFW","LAX","MIA", "ORD","SEA") ### Select the cities in your sample city_sample <- data_all[data_all$city_FAA %in% faa,] head(city_sample); tail(city_sample) attach(city_sample) year_seq <- rep(1960:2020, times=6) city_seq <- rep(1:6, each=61) city_names <- c("Boston","Dallas","Los Angeles", "Miami","Chicago","Seattle") ## Make plots for cities 1-3 with ols (trend) lines added # win.graph(height=5.5, width=7.0) # quartz(height=5.5, width=7.0) par(mfrow=c(2,3)) # Plot will have 2 rows and 3 columns # This strange sequence plots cities from East-West and North to South for (i1 in c(6,5,1,3,2,4)) { plot(aveTemp[city_seq == i1] ~ year_seq[city_seq == i1], main=paste("City = ", city_names[i1]), xlab="year", ylab="average annual temperature", col=(i1+1), pch=16) abline(lm(aveTemp[city_seq == i1] ~ year_seq[city_seq == i1]), col=(i1+1), lwd=2) } par(mfrow=c(1,1)) # Reset to 1 plot per page detach(city_sample) rm(list=ls(all=TRUE))