Courses‎ > ‎R worksheets‎ > ‎

### R code: ANOVA

`# Hinton's general ANOVA example, chapter 11`

`h = data.frame(cond.1 = c(5,6,7,5,3,4), cond.2 = c(11, 10, 9, 11, 9, 10), cond.3 = c(14, 15, 17, 13, 17, 14))`

`# overall mean: 10`
`alldata = c(h\$cond.1, h\$cond.2, h\$cond.3)`
`mean(alldata)`
`# overall sum of squares`
`all.ssq = sum((alldata - mean(alldata))**2)`
`#  328`

`# sum of squares for each condition separately`
`sum((h\$cond.1 - mean(h\$cond.1))**2)`
`#  10`
`sum((h\$cond.2 - mean(h\$cond.2))**2)`
`#  4`
`sum((h\$cond.3 - mean(h\$cond.3))**2)`
`#  14`

`# Within-conditions sum of squares: sum of the above three sums of squares for each condition separately`
`within.ssq = sum((h\$cond.1 - mean(h\$cond.1))**2) +  sum((h\$cond.2 -`
`mean(h\$cond.2))**2) + sum((h\$cond.3 - mean(h\$cond.3))**2)`
`# 28 `

`# Rest of sum of squares: between conditions`
`between.ssq = all.ssq - within.ssq`

`# Or we can compute it as follows: compute the mean of the 3 condition means,`
`means.mean = mean( c(mean(h\$cond.1), mean(h\$cond.2), mean(h\$cond.3)))`

`# then add up the sums of squares for differences between condition means and the`
`# mean-of-means, and multiply it by the number of data points in each condition`
`#`
`between.ssq = ((mean(h\$cond.1) - means.mean)** 2 + (mean(h\$cond.2) - means.mean)** 2`
`+ (mean(h\$cond.3) - means.mean)** 2 ) * 6`

`# dividing up degrees of freedom in the same way: total degrees of freedom = number of datapoints - 1`
`total.df = length(alldata) - 1`
`# between conditions: number of conditions - 1`
`between.df = 3 - 1`
`# within conditions:`
`# (number of datapoints in each condition - 1``) * number of conditions`
`within.df = total.df - between.df`
`within.df = length(h\$cond.1) - 1 + length(h\$cond.2) - 1 + length(h\$cond.3) - 1`

`# within conditions: individual differences between participants, and random error`
`# between conditions: systematic difference between conditions plus individual differences between participants plus random error`
`#`
`# F-value: (systematic difference + error variance) / (error variance) = (between_conditions_variance) / (within_conditions_variance) = `
`# (between.ssq / between.df) / (within.ssq / within.df)`

### A worked full example of one factor, independent measures ANOVA

`hinton.anagram.altformat = data.frame(time=c(15,20,14,13,18,16,13,12,18,11,21,25,29,18,26,22,26,24,28,21,28,30,32,`
`28,26,30,25,36,20,25),condition=c("first","first","first","first","first","first",`
`"first","first","first","first","last","last","last","last","last","last","last",`
`"last","last","last","none","none","none","none","none","none","none","none","none",`
`"none"))`

`bwplot(time ~ condition,data=hinton.anagram.altformat) `

`# To work through this we'll use a slightly different format....`

`hinton.anagram = data.frame(`
`first.letter=c(15,20,14,13,18,16,13,12,18,11),`
`last.letter=c(21,25,29,18,26,22,26,24,28,21),`
`no.letter= c(28,30,32,28,26,30,25,36,20,25))`

`numberofdatapoints = 30`
`numberofconditions = 3`
`numberofdatapointspercondition = 10`

`# First column in the ANOVA table: degrees of freedom. `
`# total, between conditions, and within conditions`

`df.total = 30 - 1`
`df.between = 3 - 1`
`df.within = df.total - df.between`

`# Second columns: sums of squares`
`# total, between conditions, and within conditions`

`mean.total = (sum(hinton.anagram)/numberofdatapoints)`
`ssq.total = sum((hinton.anagram - mean.ana)**2)`

`ssq.within = sum((hinton.anagram\$first.letter -mean(hinton.anagram\$first.letter))**2) + `
`sum((hinton.anagram\$last.letter -mean(hinton.anagram\$last.letter))**2) + `
`sum((hinton.anagram\$no.letter -mean(hinton.anagram\$no.letter))**2) `

`ssq.between = ssq.total - ssq.within`

`MS.between= ssq.between/df.between`
`MS.error = ssq.within/df.within`

`F = MS.between/MS.error`
`1 - pf(F,df.between,df.within)`

`# We can do this in R without all the steps`

`anagram.lm = lm(time ~ condition,data=hinton.anagram.altformat)`
`anova(anagram.lm)`

`# An alternative syntax`

`anagram.aov = aov(time ~ condition,data=hinton.anagram.altformat)`
`summary(anagram.aov)`

`# Anova tells you that the factor explains more variance that expected by chance, but`
`# it doesn't tell you where the differences lie. For that we need other tests.`

`pairwise.t.test(hinton.anagram.altformat\$time,hinton.anagram.altformat\$condition,`
`p.adj="bonferroni")`

`TukeyHSD(anagram.aov)`