R code: ANOVA
# Hinton's general ANOVA example, chapter 11
h = data.frame(cond.1 = c(5,6,7,5,3,4), cond.2 = c(11, 10, 9, 11, 9, 10), cond.3 = c(14, 15, 17, 13, 17, 14))
# overall mean: 10
alldata = c(h$cond.1, h$cond.2, h$cond.3)
mean(alldata)
# overall sum of squares
all.ssq = sum((alldata - mean(alldata))**2)
# [1] 328
# sum of squares for each condition separately
sum((h$cond.1 - mean(h$cond.1))**2)
# [1] 10
sum((h$cond.2 - mean(h$cond.2))**2)
# [1] 4
sum((h$cond.3 - mean(h$cond.3))**2)
# [1] 14
# Within-conditions sum of squares: sum of the above three sums of squares for each condition separately
within.ssq = sum((h$cond.1 - mean(h$cond.1))**2) +Â sum((h$cond.2 -
mean(h$cond.2))**2) + sum((h$cond.3 - mean(h$cond.3))**2)
# 28
# Rest of sum of squares: between conditions
between.ssq = all.ssq - within.ssq
# Or we can compute it as follows: compute the mean of the 3 condition means,
means.mean = mean( c(mean(h$cond.1), mean(h$cond.2), mean(h$cond.3)))
# then add up the sums of squares for differences between condition means and the
# mean-of-means, and multiply it by the number of data points in each condition
#
between.ssq = ((mean(h$cond.1) - means.mean)** 2 + (mean(h$cond.2) - means.mean)** 2
+ (mean(h$cond.3) - means.mean)** 2 ) * 6
# dividing up degrees of freedom in the same way: total degrees of freedom = number of datapoints - 1
total.df = length(alldata) - 1
# between conditions: number of conditions - 1
between.df = 3 - 1
# within conditions:
# (number of datapoints in each condition - 1) * number of conditions
within.df = total.df - between.df
within.df = length(h$cond.1) - 1 + length(h$cond.2) - 1 + length(h$cond.3) - 1
# within conditions: individual differences between participants, and random error
# between conditions: systematic difference between conditions plus individual differences between participants plus random error
#
# F-value: (systematic difference + error variance) / (error variance) = (between_conditions_variance) / (within_conditions_variance) =
# (between.ssq / between.df) / (within.ssq / within.df)
A worked full example of one factor, independent measures ANOVA
hinton.anagram.altformat = data.frame(time=c(15,20,14,13,18,16,13,12,18,11,21,25,29,18,26,22,26,24,28,21,28,30,32,
28,26,30,25,36,20,25),condition=c("first","first","first","first","first","first",
"first","first","first","first","last","last","last","last","last","last","last",
"last","last","last","none","none","none","none","none","none","none","none","none",
"none"))
bwplot(time ~ condition,data=hinton.anagram.altformat)
# To work through this we'll use a slightly different format....
hinton.anagram = data.frame(
first.letter=c(15,20,14,13,18,16,13,12,18,11),
last.letter=c(21,25,29,18,26,22,26,24,28,21),
no.letter= c(28,30,32,28,26,30,25,36,20,25))
numberofdatapoints = 30
numberofconditions = 3
numberofdatapointspercondition = 10
# First column in the ANOVA table: degrees of freedom.
# total, between conditions, and within conditions
df.total = 30 - 1
df.between = 3 - 1
df.within = df.total - df.between
# Second columns: sums of squares
# total, between conditions, and within conditions
mean.total = (sum(hinton.anagram)/numberofdatapoints)
ssq.total = sum((hinton.anagram - mean.ana)**2)
ssq.within = sum((hinton.anagram$first.letter -mean(hinton.anagram$first.letter))**2) +
sum((hinton.anagram$last.letter -mean(hinton.anagram$last.letter))**2) +
sum((hinton.anagram$no.letter -mean(hinton.anagram$no.letter))**2)
ssq.between = ssq.total - ssq.within
MS.between= ssq.between/df.between
MS.error = ssq.within/df.within
F = MS.between/MS.error
1 - pf(F,df.between,df.within)
# We can do this in R without all the steps
anagram.lm = lm(time ~ condition,data=hinton.anagram.altformat)
anova(anagram.lm)
# An alternative syntax
anagram.aov = aov(time ~ condition,data=hinton.anagram.altformat)
summary(anagram.aov)
# Anova tells you that the factor explains more variance that expected by chance, but
# it doesn't tell you where the differences lie. For that we need other tests.
pairwise.t.test(hinton.anagram.altformat$time,hinton.anagram.altformat$condition,
p.adj="bonferroni")
TukeyHSD(anagram.aov)