Using Python dictionaries: mapping strings to lists

# For the prepositions "in", "on", "up", we collect the words
# that precede them.
# Ideally, we would collect the verbs that form particle verbs
# with these prepositions, like "check in", "take on", "look up",
# but we assume we don't have part-of-speech tags available,
# so collecting preceding words is the next best thing.

import nltk
import sys
import string

prepositions = [ "in", "on", "up"]

# we ask the user for a filename from which we can read  text
print "Please enter a filename"
filename = raw_input()

# we try to open the file,
# but are prepared for the case
# that the user may have mistyped
try:
    f = open(filename)
except IOError:
    print "sorry, could not open", filename
    sys.exit(0) # this leaves the program

# we have successfully opened the file, now we read it
contents = f.read()
f.close()

words = [ w.strip(string.punctuation).lower() for w in contents.split() ]
bigrams = nltk.bigrams(words)

prepositions_preceding = { }

for w1, w2 in bigrams:
    if w2 in prepositions:
        # store w1 as a word that preceded a preposition
        if w2 in prepositions_preceding:
            prepositions_preceding[ w2 ].append( w1)
        else:
            prepositions_preceding[ w2 ] = [ w1 ]

for preposition, preceding in prepositions_preceding.items():
    print preposition, preceding

Comments