Using Python dictionaries: mapping strings to lists
The following code collects, for each part-of-speech tag, the list of words that have been observed to occur with it.
# we obtain a list of word/tag pairs from the Brown corpus
# using news data only
import nltk
brown_news = nltk.corpus.brown.tagged_words(categories="news")
# This dictionary maps each tag to a list of words that have been observed with it
tag_wordlist = { }
# Now we fill the dictionary
for word, tag in brown_news:
if tag not in tag_wordlist:
tag_wordlist[ tag ] = [ ]
tag_wordlist[ tag ].append(word)
# what words have we observed as nouns?
print(tag_wordlist[ "NN"])
The following code maps prepositions to the words that precede them:
# For the prepositions "in", "on", "up", we collect the words
# that precede them.
# Ideally, we would collect the verbs that form particle verbs
# with these prepositions, like "check in", "take on", "look up",
# but we assume we don't have part-of-speech tags available,
# so collecting preceding words is the next best thing.
import nltk
import sys
import string
prepositions = [ "in", "on", "up"]
# we ask the user for a filename from which we can read text
print "Please enter a filename"
filename = raw_input()
# we try to open the file,
# but are prepared for the case
# that the user may have mistyped
try:
f = open(filename)
except IOError:
print "sorry, could not open", filename
sys.exit(0) # this leaves the program
# we have successfully opened the file, now we read it
contents = f.read()
f.close()
words = [ w.strip(string.punctuation).lower() for w in contents.split() ]
bigrams = nltk.bigrams(words)
prepositions_preceding = { }
for w1, w2 in bigrams:
if w2 in prepositions:
# store w1 as a word that preceded a preposition
if w2 in prepositions_preceding:
prepositions_preceding[ w2 ].append( w1)
else:
prepositions_preceding[ w2 ] = [ w1 ]
for preposition, preceding in prepositions_preceding.items():
print preposition, preceding