### What is "logistic" about logistic regression?

`# Logit: logarithm of odds,`

`# where the odds of a probability p is p/(1-p)`

`#`

`# The logit is`

`# f(p) = log( p / (1-p) )`

`logit = function(p) { log(p/(1-p)) }`

`# Note the sigmoid shape of the plot`

`xvals = seq(0.001,.999,.001)`

`plot(xvals,logit(xvals), type="l")`

`# The inverse logit function (aka logistic function, aka sigmoid`

`# function) maps any value into a value between 0 and 1.`

`# Note: exp(x) is e (the Euler number) to the power of x`

`#`

`# The logistic function is`

`# f(x) = exp(x) / (exp(x) + 1) = 1 / (1 + exp(-x))`

`invlogit = function(x) { 1/(1+exp(-x)) }`

`# logit and invlogit are inverse functions:`

`logit(0.8)`

`invlogit(1.386294)`

`# again, note the sigmoid shape of the plot`

`xvals = -6:6`

`plot(xvals,invlogit(xvals),type="l") `

### Why we need something different than linear regression when the dependent variable (the variable we want to predict) is categorial

`# Now remember linear regression: We model a dependent variable Y as`

`# linearly dependent on a predictor X:`

`#`

`# Y = Beta_0 + Beta_1 X`

`#`

`# Beta_0 is the intercept, and Beta_1 is the slope.`

`#`

`# When we do logistic regression, we predict Y to be`

`# Y = invlogit(Beta_0 + Beta_1 X)`

`#`

`# where invlogit is the logistic function. `

`# So Y will be a value between 0 and 1. `

`## An example using a languageR dataset.`

`# regularity: languageR dataset containing information on`

`# regular and irregular Dutch verbs.`

`# regularity$Regularity is a factor with values "regular", "irregular".`

`# We map it to a number, either 0 (for "irregular") or 1 (for "regular").`

`library(languageR)`

`reg = regularity`

`reg$Regularity = as.numeric(regularity$Regularity=="regular")`

`# Relation between regularity and written frequency of a verb:`

`# irregular verbs tend to be more frequent`

`plot(Regularity~WrittenFrequency,data=reg)`

`# Regularity has two values, 0 or 1. First, we try to fit a linear model:`

`reg.lm = lm(Regularity~WrittenFrequency,data=reg)`

`summary(reg.lm)`

`# We plot regularity and written frequency again, and`

`# add the fitted model as a line.`

`# Because this is linear regression, `

`# we predict regularity values below 0 and above 1`

`# (which makes no sense)`

`plot(Regularity~WrittenFrequency,data=reg)`

`abline(reg.lm)`

`# Now we use logistic regression.`

`# NEW: using library rms instead of the outdated library Design`

`# used in the Baayen book`

`library(rms)`

`reg.dd = datadist(reg)`

`options(datadist="reg.dd")`

`reg.lrm = lrm(Regularity~WrittenFrequency,data=reg)`

`# inspect by just typing reg.lrm`

`# We see that the coefficient for WrittenFrequency is negative,`

`# which means that the higher WrittenFrequency, the more likely Regularity is going to be`

`# near zero (that is, the more likely Regularity is going to be "irregular")`

`# We use this model to get predictions for Regularity`

`# for the WrittenFrequency values in the dataset`

`reg.predicted = predict(reg.lrm)`

`# When we inspect the results, we see that what we get is`

`# Beta_0 + Beta_1 X, rather than invlogit(Beta_0 + Beta_1 X)`

`range(reg.predicted)`

`# Compare:`

`# > reg.predicted[1]`

`# 1 `

`# 4.278845 `

`# > coef(reg.lrm)[1] + coef(reg.lrm)[2] * reg[1,]$WrittenFrequency`

`# Intercept `

`# 4.278845 `

`# > range(invlogit(reg.predicted))`

`# [1] 0.1029522 0.9944239`

`# Now let's plot the invlogit() of these points`

`points(reg$WrittenFrequency, invlogit(predict(reg.lrm)), col="orange")`

### Another logistic regression example: The Dative Alternation

`library(languageR)`

`head(verb)`

`# we would like to predict the realization of the receiver: `

`# NP: "John gave Mary the book"`

`# PP: "John gave the book to Mary"`

`# How does the length of the Theme ("book" versus "the book that I read last week") `

# affect the realization

`of the Recipient?`

`# As we would expect, longer Themes tend to come at the end:`

`aggregate(verbs$LengthOfTheme, list(verbs$RealizationOfRec), mean)`

`# Now we do logistic regression to predict Realization of Recipient from Length of Theme.`

`# As above, we first have to "register" the dataset with the rms package`

`v.dd = datadist(verbs)`

`options(datadist="v.dd")`

`# Read the output of lrm: What does the coefficient for LengthOfTheme say about the influence of this predictor?`

`lrm(RealizationOfRec ~ LengthOfTheme, data = verbs)`

`# Another possible predictor is the animacy of the Recipient: `

# "send the book to Mary" versus "send the book to the office"

`# Note that this is a categorial predictor! What would you expect this predictor to do?`

`# We first check by hand what we can expect to see`

`xtabs(~RealizationOfRec + AnimacyOfRec, data = verbs)`

`# Then we do the regression.`

`lrm(RealizationOfRec ~ AnimacyOfRec, data = verbs)`

`# Putting multiple predictors together`

`lrm(RealizationOfRec ~ LengthOfTheme + AnimacyOfRec + AnimacyOfTheme, data = verbs)`

`# The 'dative' dataset contains the data in 'verbs', plus additional information`

head(dative)

# We use it to predict the realization of the Recipient from the semantic class of the verb:

# abstract, communication, future transfer of possession, prevention of possession,

# transfer of possession

d.dd = datadist(dative)

options(datadist = "d.dd")

lrm(RealizationOfRecipient ~ SemanticClass, data = dative)

# We can now build a wide variety of models:

m1 = lrm(RealizationOfRecipient ~ AnimacyOfRec, data = dative)

m2 = lrm(RealizationOfRecipient ~ DefinOfRec, data = dative)

m3 = lrm(RealizationOfRecipient ~ AnimacyOfRec + DefinOfRec, data = dative)

m4 = lrm(RealizationOfRecipient ~ SemanticClass, data = dative)

m5 = lrm(RealizationOfRecipient ~ SemanticClass + AnimacyOfRec + DefinOfRec, data = dative)

m6 = lrm(RealizationOfRecipient ~ LengthOfTheme, data = dative)

# Which one should we choose?