R code: ANOVA

# Hinton's general ANOVA example, chapter 11

h = data.frame(cond.1 = c(5,6,7,5,3,4), cond.2 = c(11, 10, 9, 11, 9, 10), cond.3 = c(14, 15, 17, 13, 17, 14))

# overall mean: 10

alldata = c(h$cond.1, h$cond.2, h$cond.3)

mean(alldata)

# overall sum of squares

all.ssq = sum((alldata - mean(alldata))**2)

# [1] 328

# sum of squares for each condition separately

sum((h$cond.1 - mean(h$cond.1))**2)

# [1] 10

sum((h$cond.2 - mean(h$cond.2))**2)

# [1] 4

sum((h$cond.3 - mean(h$cond.3))**2)

# [1] 14

# Within-conditions sum of squares: sum of the above three sums of squares for each condition separately

within.ssq = sum((h$cond.1 - mean(h$cond.1))**2) +  sum((h$cond.2 -

mean(h$cond.2))**2) + sum((h$cond.3 - mean(h$cond.3))**2)

# 28

# Rest of sum of squares: between conditions

between.ssq = all.ssq - within.ssq

# Or we can compute it as follows: compute the mean of the 3 condition means,

means.mean = mean( c(mean(h$cond.1), mean(h$cond.2), mean(h$cond.3)))

# then add up the sums of squares for differences between condition means and the

# mean-of-means, and multiply it by the number of data points in each condition

#

between.ssq = ((mean(h$cond.1) - means.mean)** 2 + (mean(h$cond.2) - means.mean)** 2

+ (mean(h$cond.3) - means.mean)** 2 ) * 6

# dividing up degrees of freedom in the same way: total degrees of freedom = number of datapoints - 1

total.df = length(alldata) - 1

# between conditions: number of conditions - 1

between.df = 3 - 1

# within conditions:

# (number of datapoints in each condition - 1) * number of conditions

within.df = total.df - between.df

within.df = length(h$cond.1) - 1 + length(h$cond.2) - 1 + length(h$cond.3) - 1

# within conditions: individual differences between participants, and random error

# between conditions: systematic difference between conditions plus individual differences between participants plus random error

#

# F-value: (systematic difference + error variance) / (error variance) = (between_conditions_variance) / (within_conditions_variance) =

# (between.ssq / between.df) / (within.ssq / within.df)

A worked full example of one factor, independent measures ANOVA

hinton.anagram.altformat = data.frame(time=c(15,20,14,13,18,16,13,12,18,11,21,25,29,18,26,22,26,24,28,21,28,30,32,

28,26,30,25,36,20,25),condition=c("first","first","first","first","first","first",

"first","first","first","first","last","last","last","last","last","last","last",

"last","last","last","none","none","none","none","none","none","none","none","none",

"none"))

bwplot(time ~ condition,data=hinton.anagram.altformat)

# To work through this we'll use a slightly different format....

hinton.anagram = data.frame(

first.letter=c(15,20,14,13,18,16,13,12,18,11),

last.letter=c(21,25,29,18,26,22,26,24,28,21),

no.letter= c(28,30,32,28,26,30,25,36,20,25))

numberofdatapoints = 30

numberofconditions = 3

numberofdatapointspercondition = 10

# First column in the ANOVA table: degrees of freedom.

# total, between conditions, and within conditions

df.total = 30 - 1

df.between = 3 - 1

df.within = df.total - df.between

# Second columns: sums of squares

# total, between conditions, and within conditions

mean.total = (sum(hinton.anagram)/numberofdatapoints)

ssq.total = sum((hinton.anagram - mean.ana)**2)

ssq.within = sum((hinton.anagram$first.letter -mean(hinton.anagram$first.letter))**2) +

sum((hinton.anagram$last.letter -mean(hinton.anagram$last.letter))**2) +

sum((hinton.anagram$no.letter -mean(hinton.anagram$no.letter))**2)

ssq.between = ssq.total - ssq.within

MS.between= ssq.between/df.between

MS.error = ssq.within/df.within

F = MS.between/MS.error

1 - pf(F,df.between,df.within)

# We can do this in R without all the steps

anagram.lm = lm(time ~ condition,data=hinton.anagram.altformat)

anova(anagram.lm)

# An alternative syntax

anagram.aov = aov(time ~ condition,data=hinton.anagram.altformat)

summary(anagram.aov)

# Anova tells you that the factor explains more variance that expected by chance, but

# it doesn't tell you where the differences lie. For that we need other tests.

pairwise.t.test(hinton.anagram.altformat$time,hinton.anagram.altformat$condition,

p.adj="bonferroni")

TukeyHSD(anagram.aov)