############# # Baayen's demo of different degrees of correlation library(languageR) mvrnormplot.fnc(r = -0.4) mvrnormplot.fnc(r = 0.1) mvrnormplot.fnc(r = 0.6) mvrnormplot.fnc(r = 0.95) ############### # Hinton's study time data p.265 studytimedata = data.frame(study.time=c(40,43,18,10,25,33,27,17,30,47),exam.score=c(58,73,56,47,58,54,45,32,68,69)) plot(studytimedata[,1:2]) #Calculating pearson's r via z-scores n = length(studytimedata$exam.score) # mean study time mean.studytime = mean(studytimedata$study.time) # standard deviation: use population standard deviation, not sample standard deviation sd.studytime = sqrt(sum((studytimedata$study.time - mean.studytime)**2) / n) studytimedata$study.time.z = (studytimedata$study.time - mean.studytime)/sd.studytime # mean and SD for exam score mean.examscore = mean(studytimedata$exam.score) sd.examscore = sqrt(sum((studytimedata$exam.score - mean.examscore)**2) / n) studytimedata$exam.score.z = (studytimedata$exam.score - mean.examscore) / sd.examscore par(mfrow = c(2,1) plot(studytimedata[,1:2]) plot(studytimedata[,3:4]) r = sum(studytimedata$study.time.z * studytimedata$exam.score.z)/n df = n -2 #Or use the cor command cor(studytimedata$study.time,studytimedata$exam.score) # Is this correlation significantly different from zero? cor.test(studytimedata$study.time,studytimedata$exam.score) # practical significance: What percentage of the variance in the data is explained by the correlation? r**2 # When you don't have interval data, # use Spearman's rho, which is # Pearson's r on the ranks of the values # rather than on the values themselves # say we have a subjective attention score by the teacher studytimedata$subjectivescore = c(90, 95, 50, 50, 60, 65, 68, 45, 70, 90) cor.test(studytimedata$exam.score, studytimedata$subjectivescore, method = "spearman", exact = F) # Spearman's rank correlation rho # # data: studytimedata$exam.score and studytimedata$subjectivescore # S = 35.3205, p-value = 0.007031 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.7859364 # regression model Y = A + B*X lm(studytimedata$exam.score ~ studytimedata$study.time)
|
Courses > Analyzing linguistic data: an introductory statistics course > Schedule: words in a haystack >