# Multiple possible models for predicting a dependent variable: Which one should we choose? # This is a problem for linear regression . Here are three models for Hinton's study time data.
study.time = c(40,43,18,10,25,33,27,17,30,47), exam.score=c(58,73,56,47,58,54,45,32,68,69), iq = c(118,128,110,114,138,120,106,124,132,130)) ml1 = lm(exam.score ~ study.time, data = studytimedata)
# We can inspect the models using "summary". But what do we conclude from this inspection? # We have the same problem for logistic regression, demonstrated here for the problem of # predicting the dative alternation -- what influences whether we will see "John gave Mary the book" # or "John gave the book to Mary"? library(rms) library(languageR) md1 = lrm(RealizationOfRecipient ~ AnimacyOfRec, data = dative) md2 = lrm(RealizationOfRecipient ~ DefinOfRec, data = dative) md3 = lrm(RealizationOfRecipient ~ AnimacyOfRec + DefinOfRec, data = dative) md4 = lrm(RealizationOfRecipient ~ SemanticClass, data = dative) md5 = lrm(RealizationOfRecipient ~ SemanticClass + AnimacyOfRec + DefinOfRec, data = dative) md6 = lrm(RealizationOfRecipient ~ LengthOfTheme, data = dative) # Model comparison: linear regression, nested models. Use F-test (ANOVA) anova(ml1, ml3) # Model comparison: logistic regression, nested models. Here, we can use likelihood ratio . # lrm() returns the model deviance in the "deviance" entry. # This is a vector with two members: deviance for the model with only the intercept, # and deviance for the models with all its parameters. We are interested in the latter. dev.1 = md1$deviance[2] dev.5 = md5$deviance[2] # log likelihood ratio: deviance of simpler model - deviance of more complex model. # Approximately chi-square distributed. # Null hypothesis: no difference between the two models. # lrtest() is a function of the library rlm that does likelihood ratio tests. lrtest(md1, md3) lrtest(md1, md5) # In non-nested cases, we can use the Akaike Information Criterion, AIC. # # >>> Lower AIC is better <<< # # Here it is for linear regression AIC(ml1) AIC(ml2) AIC(ml3) # And here it is for logistic regression AIC(md1) AIC(md2) AIC(md3) AIC(md4) AIC(md5) AIC(md6) |
Courses > R worksheets >