# # Get the data from a file # > source("Father_Son.txt") # # Summarize the data set # > summary(father.son) fheight sheight Min. :59.01 Min. :58.51 1st Qu.:65.79 1st Qu.:66.93 Median :67.77 Median :68.62 Mean :67.69 Mean :68.68 3rd Qu.:69.60 3rd Qu.:70.47 Max. :75.43 Max. :78.36 # # > dim(father.son) [1] 1078 2 # # > attach(father.son) # # Fit a simple linear regression model # > fit.1 <- lm(sheight~fheight) > fit.1 Call: lm(formula = sheight ~ fheight) Coefficients: (Intercept) fheight 33.8866 0.5141 > summary(fit.1) Call: lm(formula = sheight ~ fheight) Residuals: Min 1Q Median 3Q Max -8.877151 -1.514415 -0.007896 1.628512 8.968479 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 33.88660 1.83235 18.49 <2e-16 *** fheight 0.51409 0.02705 19.01 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 2.437 on 1076 degrees of freedom Multiple R-Squared: 0.2513, Adjusted R-squared: 0.2506 F-statistic: 361.2 on 1 and 1076 DF, p-value: < 2.2e-16 # # compute correlations # > cor(father.son) fheight sheight fheight 1.0000000 0.5013383 sheight 0.5013383 1.0000000 # # add a quadratic term # > f2 = fheight^2 > fit.2 <-lm(sheight~fheight+f2) > summary(fit.2) Call: lm(formula = sheight ~ fheight + f2) Residuals: Min 1Q Median 3Q Max -8.86743 -1.50728 -0.01277 1.62595 8.97530 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 39.779150 33.264006 1.196 0.232 fheight 0.339381 0.985137 0.345 0.731 f2 0.001293 0.007287 0.177 0.859 Residual standard error: 2.438 on 1075 degrees of freedom Multiple R-Squared: 0.2514, Adjusted R-squared: 0.25 F-statistic: 180.5 on 2 and 1075 DF, p-value: < 2.2e-16 # # Notice that neither coefficient is significant! # BUT the overall F test is overwhelmingly significant # This happens when the two estimated coefficients are # highly correlated; # In fact the variables fheight and f2=fheight^2 are # highly correlated. > anova(fit.2) Analysis of Variance Table Response: sheight Df Sum Sq Mean Sq F value Pr(>F) fheight 1 2144.6 2144.6 360.9096 <2e-16 *** f2 1 0.2 0.2 0.0315 0.8592 Residuals 1075 6387.8 5.9 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # The ANOVA table says you probably don't need f2 # > cor(fheight,f2) [1] 0.9996227