hinton.attention = data.frame(child = c("susan", "linda", "john",
"mary", "peter", "ian", "trevor", "andrew", "helen", "christine"),
rating = c(67, 55, 26, 70, 36, 57, 32, 65, 59, 24)) # add column with initials hinton.attention$initial = toupper(substring(hinton.attention$child, 1,1)) # code boy / girl hinton.attention$girl = c(T, T, F, T, F, F, F, F, T, T) # plot as in the Hinton book: actual scores plot(hinton.attention$rating, rep(2, 10), xlab = "attention", ylab= "gender", type = "n", yaxt = "n") text(hinton.attention[hinton.attention$girl == T,]$rating, rep(1.5, 5), hinton.attention[hinton.attention$girl == T,]$initial) text(hinton.attention[hinton.attention$girl == F,]$rating, rep(1.3, 5), hinton.attention[hinton.attention$girl == F,]$initial) # add ranking to data frame # note the function 'rank'. # it can also handle ties hinton.attention$rank = rank(hinton.attention$rating) # demo'ing 'rank' rank(c(3,1,4,5,2,2,2,2)) rank(c(3,1,4,5,2,2,2,2), ties.method = "first") rank(c(3,1,4,5,2,2,2,2), ties.method = "average") # how many boys above each girl's rank # and vice versa hinton.attention$above = 0 for (i in 1:nrow(hinton.attention)) { hinton.attention[i,]$above = nrow(hinton.attention[hinton.attention$girl == !(hinton.attention[i,]$girl) & hinton.attention$rank > hinton.attention[i,]$rank,]) } # summed 'above' values for girls sum(hinton.attention[hinton.attention$girl == T,]$above) # and for boys sum(hinton.attention[hinton.attention$girl == F,]$above) # another way of getting at the same numbers: # compare actual summed ranks of girls to optimum number possible maxrating.girls = 5 * 5 + (5 * 6)/2 actualrating.girls = sum(hinton.attention[hinton.attention$girl == T,]$rank) mannwhitney.U.girls = maxrating.girls - actualrating.girls maxrating.boys = 5 * 5 + (5 * 6)/2 actualrating.boys = sum(hinton.attention[hinton.attention$girl == F,]$rank) mannwhitney.U.boys = maxrating.boys - actualrating.boys # doing the test: same as two-sample Wilcoxon rank sum test wilcox.test(hinton.attention[hinton.attention$girl == T,]$rating, hinton.attention[hinton.attention$girl == F,]$rating, alternative = "greater") |
Courses > Analyzing linguistic data: an introductory statistics course > Schedule: words in a haystack >