R code: Correlation and linear regression

#############
# Baayen's demo of different degrees of correlation
library(languageR)
mvrnormplot.fnc(r = -0.4)
mvrnormplot.fnc(r = 0.1)
mvrnormplot.fnc(r = 0.6)
mvrnormplot.fnc(r = 0.95)

###############
# Hinton's study time data p.265

studytimedata = data.frame(study.time=c(40,43,18,10,25,33,27,17,30,47),exam.score=c(58,73,56,47,58,54,45,32,68,69))

plot(studytimedata[,1:2])

#Calculating pearson's r via z-scores


n = length(studytimedata$exam.score)

# mean study time
mean.studytime = mean(studytimedata$study.time)
# standard deviation: use population standard deviation, not sample standard deviation
sd.studytime = sqrt(sum((studytimedata$study.time - mean.studytime)**2) / n)

studytimedata$study.time.z    =  (studytimedata$study.time - mean.studytime)/sd.studytime

# mean and SD for exam score
mean.examscore = mean(studytimedata$exam.score)
sd.examscore = sqrt(sum((studytimedata$exam.score - mean.examscore)**2) / n)


studytimedata$exam.score.z  = (studytimedata$exam.score - mean.examscore) / sd.examscore

par(mfrow = c(2,1)
plot(studytimedata[,1:2])
plot(studytimedata[,3:4])

r = sum(studytimedata$study.time.z * studytimedata$exam.score.z)/n

df = n -2

#Or use the cor command

cor(studytimedata$study.time,studytimedata$exam.score)

# Is this correlation significantly different from zero?
cor.test(studytimedata$study.time,studytimedata$exam.score)


# practical significance: What percentage of the variance in the data is explained by the correlation?

r**2

# When you don't have interval data,
# use Spearman's rho, which is
# Pearson's r on the ranks of the values
# rather than on the values themselves

# say we have a subjective attention score by the teacher
studytimedata$subjectivescore = c(90, 95, 50, 50, 60, 65, 68, 45, 70, 90)

cor.test(studytimedata$exam.score, studytimedata$subjectivescore, method = "spearman", exact = F)

#    Spearman's rank correlation rho
#
# data:  studytimedata$exam.score and studytimedata$subjectivescore
# S = 35.3205, p-value = 0.007031
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
#     rho
# 0.7859364


# regression model Y = A + B*X

lm(studytimedata$exam.score  ~ studytimedata$study.time)


Comments