### R code: more linear regression

 `################## multiple predictorsstudytimedata = data.frame(participant = c(1,2,3,4,5,6,7,8,9,10),study.time = c(40,43,18,10,25,33,27,17,30,47),exam.score=c(58,73,56,47,58,54,45,32,68,69),iq = c(118,128,110,114,138,120,106,124,132,130))# Partial correlation:# First work out whether IQ is correlated with both study time and exam score (which it is)cor(studytimedata\$study.time, studytimedata\$iq)cor(studytimedata\$exam.score, studytimedata\$iq)# To work out the amount of variance in exam score that is predicted by study time# after IQ has been taken out of the picture, first take IQ out of the picture# by predicting both study time and exam score from IQ and using the# residuals: we want the variance in exam score and study time that is not predicted by IQ.lm.iq.st = lm(study.time ~ iq, data = studytimedata)lm.iq.es = lm(exam.score ~ iq, data = studytimedata)res.iq.st = residuals(lm.iq.st)res.iq.es = residuals(lm.iq.es)cor(res.iq.st, res.iq.es)###################### Categorial predictors# What we do *not* want to do is to encode each category# (red, blue, yellow, green cereal package) by a consecutive number,# as that suggests a linear relation between package types that does not exist.cereal = data.frame(package = c(rep(1, 5), rep(2, 5), rep(3, 5),rep(4, 5)),sold = round(runif(20, min=10, max=30)))plot(cereal\$package, cereal\$sold)# hallucinated linear relation based on progression of package typessummary(lm(cereal\$sold ~ cereal\$package))lines(abline(lm(cereal\$sold ~ cereal\$package)))# Instead, code a categorial variable as categorial to get informative results:cereal.color = data.frame(package = c(1,2,3,4), color = c("red", "blue", "green", "yellow"))cereal = merge(cereal, cereal.color)summary(lm(sold ~ color, data = cereal))# Note on how to read this: One category's mean value becomes the intercept (here: blue).# The "slope" on all other categories is the difference from the mean value for blue packages.# Study time data with fake categorial valuesstudytimedata.cat = data.frame(participant = c(1,2,3,4,5,6,7,8,9,10),studied = c("yes","yes","no","no","no","yes","yes","no","yes","no"),exam.score=c(58,73,36,27,48,64,85,32,68,49))summary(lm(exam.score~studied,data=studytimedata.cat))studytimedata.3cat = data.frame(participant = c(1,2,3,4,5,6,7,8,9,10),absencefromclass =c("never","never","always","always","occasionally","never","occasionally","never","occasionally","never"),exam.score=c(58,73,36,27,48,64,85,32,68,49))summary(lm(exam.score~absencefromclass,data=studytimedata.3cat))`