bolly <- read.csv("http://www.stat.ufl.edu/~winner/data/bollywood_boxoffice.csv") attach(bolly); names(bolly) ## Plot (G vs B), (logG vs B), (G vs logB), (logG vs logB) ## Goal: Linear (straight line) relation with ## approx constant spread around line par(mfrow=c(2,2)) ## Plot 4 plots on 1 screen (2 r0ws, 2 cols) plot(Gross ~ Budget, main="Y=Gross, X=Budget") abline(lm(Gross~Budget)) plot(log(Gross) ~ Budget, main="Y=log(Gross), X=Budget") abline(lm(log(Gross) ~ Budget)) plot(Gross ~ log(Budget), main="Y=Gross, X=log(Budget)") abline(lm(Gross ~ log(Budget))) plot(log(Gross) ~ log(Budget), main="Y=log(Gross), X=log(Budget)") abline(lm(log(Gross) ~ log(Budget))) ## Define Y=log(Gross) and X=log(Budget) Y <- log(Gross) X <- log(Budget) ## Q.3. Fit regression and obtain summary and save fitted values and resids bolly.reg <- lm(Y ~ X) summary(bolly.reg) yhat <- predict(bolly.reg) e <- resid(bolly.reg) ## Q.4. Obtain fitted value for "Race 2" which is the 54th film in the dataset ## Print side-by-side using "cbind": yhat_54, e_54, exp(yhat_54) cbind(yhat[54], e[54], exp(yhat[54])) ## Q.5. Correlation for: raw data, log data, spearman's rank correlation ## Spearman's Correlation obtains correlation of ranks, less effected ## by outlying observations cor(Budget, Gross) cor(Y, X) cor(Budget, Gross, method="spearman"); cor(Y, X, method="spearman") ## Q.6. Computed "manually" by definitions (SSR and SSE also obtained from anova function), see Q.10. (TSS <- sum((Y - mean(Y))^2)) ## sum(y - ybar)^2 (SSR <- sum((yhat - mean(Y))^2)) ## sum(yhat - ybar)^2 (SSE <- sum((Y - yhat)^2)) ## equivalent to sum(e^2) ## Q.7. Computed "manually", but also see summary from Q.3. (rsq1 <- SSR/TSS) (rsq2 <- 1 - (SSE/TSS)) ## Q.8. see summary from Q.3. ## Q.9. be sure to select correct parameter confint(bolly.reg) Q.10. Note: R doesn't print the "Total" line anova(bolly.reg)