SlideShare a Scribd company logo
Linear Regression
      with
      2: Model selection

     2012-12-10 @HSPH
    Kazuki Yoshida, M.D.
      MPH-CLE student

                           FREEDOM
                           TO	
  KNOW
Group Website is at:
http://rpubs.com/kaz_yos/useR_at_HSPH
Previously in this group
n   Introduction               n   Graphics

n   Reading Data into R (1)    n   Groupwise, continuous

n   Reading Data into R (2)    n   Linear regression

n   Descriptive, continuous

n   Descriptive, categorical

n   Deducer
Menu


n   Linear regression: Model selection
Ingredients
         Statistics                Programming
n   Selection methods      n   step()

                            n   drop1()

                            n   add1()

                            n   leaps::regsubsets()
Open
R Studio
Open the saved
   script that we
 created last time.
See also Linear Regression with R 1 slides
Create full & null models

lm.full <- lm(bwt ~ age + lwt + smoke + ht + ui + ftv.cat +
              race.cat + preterm, data = lbw)

lm.null <- lm(bwt ~ 1, data = lbw)



                 Intercept-only
Compare two models


anova(lm.full, lm.null)

      Model 1   Model 2
Models
             Partial F-test
               Difference in residual SS




    Residual sum of squares
Residual degree of freedom      Significant
Backward elimination
                    Specify full model



lm.step.bw <- step(lm.full, direction = "backward")




     Final model object
Initial
  AIC       Removing ftv.cat
for full   makes AIC smallest
 model


             Removing age
           makes AIC smallest



             Doing nothing
           makes AIC smallest
Forward selection
           Final model object
                                     Specify null model

lm.step.fw <- step(lm.null,
    scope = ~ age + lwt + smoke + ht + ui + ftv.cat +
            race.cat + preterm,
  direction = "forward")

                                formula for possible
                                     variables
Initial
 AIC
 for                              Adding ui
 null                         makes AIC smallest
model


                               Adding race.cat
                              makes AIC smallest



                                Adding smoke
                              makes AIC smallest

          Still goes on ...
Stepwise selection/elimination
           Final model object
                                     Specify null model

lm.step.both <- step(lm.null,
    scope = ~ age + lwt + smoke + ht + ui + ftv.cat +
            race.cat + preterm,
  direction = "both")

                                formula for possible
                                     variables
Initial
   AIC                                    Adding ui
   for                                makes AIC smallest
   null
  model

                                       Adding race.cat
  Removing is                         makes AIC smallest
also considered


                                        Adding smoke
  Removing is                         makes AIC smallest
also considered

                  Still goes on ...
F-test using drop1()
## age is the least significant by partial F test
drop1(lm.full, test = "F")

## After elimination, ftv.cat is the least significant
drop1(update(lm.full, ~ . -age), test = "F")

## After elimination, preterm is least significat at p = 0.12.
drop1(update(lm.full, ~ . -age -ftv.cat), test = "F")

## After elimination, all variables are significant at p < 0.1
drop1(update(lm.full, ~ . -age -ftv.cat -preterm), test = "F")

## Show summary for final model
summary(update(lm.full, ~ . -age -ftv.cat -preterm))
Updating models
## Remove age from full model
lm.age.less <- update(lm.full, ~ . -age)


                     all variables(.) minus age

## Adding ui to null model
lm.ui.only <- update(lm.null, ~ . +ui)



                       all variables (.) plus ui
test full model


age least significant
F-test comparing age-in
model to age-out model


remove age, and test




ftv.cat least significant


remove age, ftv.cat
F-test using add1()
## ui is the most significant variable
add1(lm.null, scope = ~ age + lwt + race.cat + smoke + preterm +
+ ui + ftv.cat, test = "F")

## After inclusion, race.cat is the most significant
add1(update(lm.null, ~ . +ui), scope = ~ age + lwt + race.cat +
smoke + preterm + ht + ui + ftv.cat, test = "F")

## After inclusion, smoke is the most significant
add1(update(lm.null, ~ . +ui +race.cat), scope = ~ age + lwt +
race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F")

## After inclusion, ht is the most significant
add1(update(lm.null, ~ . +ui +race.cat +smoke), scope = ~ age + l
+ race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F")
...
test null model



 ui most significant
F-test comparing ui-out
 model to ui-in model

        add ui, and test


race.cat most significant


  add ui and race.cat
All-subset regression
using leaps package
library(leaps)

regsubsets.out <-
  regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat +
             race.cat + preterm,
        data = lbw,
        nbest = 1,
        nvmax = NULL,
        force.in = NULL, force.out = NULL,
        method = "exhaustive")

summary(regsubsets.out)
library(leaps)          Result object


 regsubsets.out <-
   regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat +
                race.cat + preterm,            Full model
           data = lbw,           How many best models?
  Max
model size nbest = 1,               Forced variables
           nvmax = NULL,
           force.in = NULL, force.out = NULL,
           method = "exhaustive")

summary(regsubsets.out)
Forced variables


Variable combination    Best 1
                       predictor
                        model

                        Best 7
                       predictor
                        model
                        Best 10
                       predictor
                        model
plot(regsubsets.out, scale = "adjr2", main = "Adjusted R^2")
             the higher the better


                                     ~ lwt + smoke + ht + ui
                                      + race.cat + preterm


                                     ~ smoke + ht + ui + race


                                     ~ ui
library(car)
subsets(regsubsets.out, statistic="adjr2", legend = FALSE,
        min.size = 5, main = "Adjusted R^2")




             ~ lwt + smoke + ht + ui
              + race.cat + preterm
subsets(regsubsets.out, statistic="cp", legend = FALSE,
        min.size = 5, main = "Mallow Cp")




       First model for which Mallow
         Cp is less than number of
               regressors + 1




                                      ~ lwt + smoke + ht + ui
                                       + race.cat + preterm
Linear regression with R 2

More Related Content

PDF
Linear regression with R 1
PDF
The Ring programming language version 1.3 book - Part 26 of 88
PDF
Monad presentation scala as a category
PDF
List-based Monadic Computations for Dynamically Typed Languages (Python version)
PDF
The Ring programming language version 1.5.4 book - Part 35 of 185
PPTX
FP Day 2011 - Turning to the Functional Side (using C# & F#)
ODP
Type Parameterization
PPTX
Formal methods 5 - Pi calculus
Linear regression with R 1
The Ring programming language version 1.3 book - Part 26 of 88
Monad presentation scala as a category
List-based Monadic Computations for Dynamically Typed Languages (Python version)
The Ring programming language version 1.5.4 book - Part 35 of 185
FP Day 2011 - Turning to the Functional Side (using C# & F#)
Type Parameterization
Formal methods 5 - Pi calculus

What's hot (20)

PDF
The Ring programming language version 1.10 book - Part 45 of 212
PDF
Sequence and Traverse - Part 2
PDF
Real World Haskell: Lecture 5
PPTX
Cs1123 9 strings
PDF
Scala. Introduction to FP. Monads
PDF
The Ring programming language version 1.6 book - Part 37 of 189
PPT
Strings In OOP(Object oriented programming)
PDF
Real World Haskell: Lecture 4
PDF
Real World Haskell: Lecture 2
PPT
(Ai lisp)
PDF
Real World Haskell: Lecture 1
PPT
16 Linear data structures
PPT
Headerfiles
PDF
Real World Haskell: Lecture 6
PPTX
Столпы функционального программирования для адептов ООП, Николай Мозговой
PPT
Lect6 csp
PDF
High-Performance Haskell
PPTX
Lambda Calculus
PDF
The Ring programming language version 1.5.3 book - Part 35 of 184
The Ring programming language version 1.10 book - Part 45 of 212
Sequence and Traverse - Part 2
Real World Haskell: Lecture 5
Cs1123 9 strings
Scala. Introduction to FP. Monads
The Ring programming language version 1.6 book - Part 37 of 189
Strings In OOP(Object oriented programming)
Real World Haskell: Lecture 4
Real World Haskell: Lecture 2
(Ai lisp)
Real World Haskell: Lecture 1
16 Linear data structures
Headerfiles
Real World Haskell: Lecture 6
Столпы функционального программирования для адептов ООП, Николай Мозговой
Lect6 csp
High-Performance Haskell
Lambda Calculus
The Ring programming language version 1.5.3 book - Part 35 of 184
Ad

Viewers also liked (7)

PDF
20130215 Reading data into R
PPTX
Linear Regression using R
PDF
Variable selection for classification and regression using R
PDF
Varianzanalyse
PDF
Weather forecasting technology
PDF
Data analysis of weather forecasting
PDF
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...
20130215 Reading data into R
Linear Regression using R
Variable selection for classification and regression using R
Varianzanalyse
Weather forecasting technology
Data analysis of weather forecasting
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...
Ad

Similar to Linear regression with R 2 (8)

PDF
R Regression Models with Zelig
PDF
Mixed Effects Models - Fixed Effect Interactions
PDF
Linear Model Selection and Regularization (Article 6 - Practical exercises)
PDF
Stata cheat sheet analysis
PDF
Linear regression in R
PDF
Prospect Identification from a Credit Database using Regression, Decision Tre...
PPTX
Introduction to MARS (1999)
PPTX
Linear regression by Kodebay
R Regression Models with Zelig
Mixed Effects Models - Fixed Effect Interactions
Linear Model Selection and Regularization (Article 6 - Practical exercises)
Stata cheat sheet analysis
Linear regression in R
Prospect Identification from a Credit Database using Regression, Decision Tre...
Introduction to MARS (1999)
Linear regression by Kodebay

More from Kazuki Yoshida (20)

PDF
Graphical explanation of causal mediation analysis
PPTX
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCT
PDF
What is the Expectation Maximization (EM) Algorithm?
PDF
Propensity Score Methods for Comparative Effectiveness Research with Multiple...
PDF
Emacs Key Bindings
PDF
Visual Explanation of Ridge Regression and LASSO
PDF
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...
PDF
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
PDF
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...
PDF
Spacemacs: emacs user's first impression
PDF
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...
PDF
Multiple Imputation: Joint and Conditional Modeling of Missing Data
PDF
20130222 Data structures and manipulation in R
PDF
(Very) Basic graphing with R
PDF
Introduction to Deducer
PDF
Groupwise comparison of continuous data
PDF
Categorical data with R
PDF
Install and Configure R and RStudio
PDF
Reading Data into R REVISED
PDF
Descriptive Statistics with R
Graphical explanation of causal mediation analysis
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCT
What is the Expectation Maximization (EM) Algorithm?
Propensity Score Methods for Comparative Effectiveness Research with Multiple...
Emacs Key Bindings
Visual Explanation of Ridge Regression and LASSO
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...
Spacemacs: emacs user's first impression
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...
Multiple Imputation: Joint and Conditional Modeling of Missing Data
20130222 Data structures and manipulation in R
(Very) Basic graphing with R
Introduction to Deducer
Groupwise comparison of continuous data
Categorical data with R
Install and Configure R and RStudio
Reading Data into R REVISED
Descriptive Statistics with R

Recently uploaded (20)

PPTX
GDM (1) (1).pptx small presentation for students
PPTX
Pharma ospi slides which help in ospi learning
PPTX
202450812 BayCHI UCSC-SV 20250812 v17.pptx
PPTX
IMMUNITY IMMUNITY refers to protection against infection, and the immune syst...
PPTX
Introduction-to-Literarature-and-Literary-Studies-week-Prelim-coverage.pptx
PDF
Complications of Minimal Access Surgery at WLH
PDF
FourierSeries-QuestionsWithAnswers(Part-A).pdf
PDF
OBE - B.A.(HON'S) IN INTERIOR ARCHITECTURE -Ar.MOHIUDDIN.pdf
PDF
Classroom Observation Tools for Teachers
PDF
RTP_AR_KS1_Tutor's Guide_English [FOR REPRODUCTION].pdf
PDF
The Lost Whites of Pakistan by Jahanzaib Mughal.pdf
PPTX
Pharmacology of Heart Failure /Pharmacotherapy of CHF
PDF
O7-L3 Supply Chain Operations - ICLT Program
PPTX
school management -TNTEU- B.Ed., Semester II Unit 1.pptx
PDF
grade 11-chemistry_fetena_net_5883.pdf teacher guide for all student
PPTX
Cell Structure & Organelles in detailed.
PPTX
Orientation - ARALprogram of Deped to the Parents.pptx
PDF
Black Hat USA 2025 - Micro ICS Summit - ICS/OT Threat Landscape
PDF
Weekly quiz Compilation Jan -July 25.pdf
PDF
Computing-Curriculum for Schools in Ghana
GDM (1) (1).pptx small presentation for students
Pharma ospi slides which help in ospi learning
202450812 BayCHI UCSC-SV 20250812 v17.pptx
IMMUNITY IMMUNITY refers to protection against infection, and the immune syst...
Introduction-to-Literarature-and-Literary-Studies-week-Prelim-coverage.pptx
Complications of Minimal Access Surgery at WLH
FourierSeries-QuestionsWithAnswers(Part-A).pdf
OBE - B.A.(HON'S) IN INTERIOR ARCHITECTURE -Ar.MOHIUDDIN.pdf
Classroom Observation Tools for Teachers
RTP_AR_KS1_Tutor's Guide_English [FOR REPRODUCTION].pdf
The Lost Whites of Pakistan by Jahanzaib Mughal.pdf
Pharmacology of Heart Failure /Pharmacotherapy of CHF
O7-L3 Supply Chain Operations - ICLT Program
school management -TNTEU- B.Ed., Semester II Unit 1.pptx
grade 11-chemistry_fetena_net_5883.pdf teacher guide for all student
Cell Structure & Organelles in detailed.
Orientation - ARALprogram of Deped to the Parents.pptx
Black Hat USA 2025 - Micro ICS Summit - ICS/OT Threat Landscape
Weekly quiz Compilation Jan -July 25.pdf
Computing-Curriculum for Schools in Ghana

Linear regression with R 2

  • 1. Linear Regression with 2: Model selection 2012-12-10 @HSPH Kazuki Yoshida, M.D. MPH-CLE student FREEDOM TO  KNOW
  • 2. Group Website is at: http://rpubs.com/kaz_yos/useR_at_HSPH
  • 3. Previously in this group n Introduction n Graphics n Reading Data into R (1) n Groupwise, continuous n Reading Data into R (2) n Linear regression n Descriptive, continuous n Descriptive, categorical n Deducer
  • 4. Menu n Linear regression: Model selection
  • 5. Ingredients Statistics Programming n Selection methods n step() n drop1() n add1() n leaps::regsubsets()
  • 7. Open the saved script that we created last time. See also Linear Regression with R 1 slides
  • 8. Create full & null models lm.full <- lm(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw) lm.null <- lm(bwt ~ 1, data = lbw) Intercept-only
  • 9. Compare two models anova(lm.full, lm.null) Model 1 Model 2
  • 10. Models Partial F-test Difference in residual SS Residual sum of squares Residual degree of freedom Significant
  • 11. Backward elimination Specify full model lm.step.bw <- step(lm.full, direction = "backward") Final model object
  • 12. Initial AIC Removing ftv.cat for full makes AIC smallest model Removing age makes AIC smallest Doing nothing makes AIC smallest
  • 13. Forward selection Final model object Specify null model lm.step.fw <- step(lm.null, scope = ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, direction = "forward") formula for possible variables
  • 14. Initial AIC for Adding ui null makes AIC smallest model Adding race.cat makes AIC smallest Adding smoke makes AIC smallest Still goes on ...
  • 15. Stepwise selection/elimination Final model object Specify null model lm.step.both <- step(lm.null, scope = ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, direction = "both") formula for possible variables
  • 16. Initial AIC Adding ui for makes AIC smallest null model Adding race.cat Removing is makes AIC smallest also considered Adding smoke Removing is makes AIC smallest also considered Still goes on ...
  • 17. F-test using drop1() ## age is the least significant by partial F test drop1(lm.full, test = "F") ## After elimination, ftv.cat is the least significant drop1(update(lm.full, ~ . -age), test = "F") ## After elimination, preterm is least significat at p = 0.12. drop1(update(lm.full, ~ . -age -ftv.cat), test = "F") ## After elimination, all variables are significant at p < 0.1 drop1(update(lm.full, ~ . -age -ftv.cat -preterm), test = "F") ## Show summary for final model summary(update(lm.full, ~ . -age -ftv.cat -preterm))
  • 18. Updating models ## Remove age from full model lm.age.less <- update(lm.full, ~ . -age) all variables(.) minus age ## Adding ui to null model lm.ui.only <- update(lm.null, ~ . +ui) all variables (.) plus ui
  • 19. test full model age least significant F-test comparing age-in model to age-out model remove age, and test ftv.cat least significant remove age, ftv.cat
  • 20. F-test using add1() ## ui is the most significant variable add1(lm.null, scope = ~ age + lwt + race.cat + smoke + preterm + + ui + ftv.cat, test = "F") ## After inclusion, race.cat is the most significant add1(update(lm.null, ~ . +ui), scope = ~ age + lwt + race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F") ## After inclusion, smoke is the most significant add1(update(lm.null, ~ . +ui +race.cat), scope = ~ age + lwt + race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F") ## After inclusion, ht is the most significant add1(update(lm.null, ~ . +ui +race.cat +smoke), scope = ~ age + l + race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F") ...
  • 21. test null model ui most significant F-test comparing ui-out model to ui-in model add ui, and test race.cat most significant add ui and race.cat
  • 23. library(leaps) regsubsets.out <- regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw, nbest = 1, nvmax = NULL, force.in = NULL, force.out = NULL, method = "exhaustive") summary(regsubsets.out)
  • 24. library(leaps) Result object regsubsets.out <- regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, Full model data = lbw, How many best models? Max model size nbest = 1, Forced variables nvmax = NULL, force.in = NULL, force.out = NULL, method = "exhaustive") summary(regsubsets.out)
  • 25. Forced variables Variable combination Best 1 predictor model Best 7 predictor model Best 10 predictor model
  • 26. plot(regsubsets.out, scale = "adjr2", main = "Adjusted R^2") the higher the better ~ lwt + smoke + ht + ui + race.cat + preterm ~ smoke + ht + ui + race ~ ui
  • 27. library(car) subsets(regsubsets.out, statistic="adjr2", legend = FALSE, min.size = 5, main = "Adjusted R^2") ~ lwt + smoke + ht + ui + race.cat + preterm
  • 28. subsets(regsubsets.out, statistic="cp", legend = FALSE, min.size = 5, main = "Mallow Cp") First model for which Mallow Cp is less than number of regressors + 1 ~ lwt + smoke + ht + ui + race.cat + preterm