Using Python dictionaries: mapping strings to lists

The following code collects, for each part-of-speech tag, the list of words that have been observed to occur with it.

# we obtain a list of word/tag pairs from the Brown corpus

# using news data only

import nltk

brown_news = nltk.corpus.brown.tagged_words(categories="news")

# This dictionary maps each tag to a list of words that have been observed with it

tag_wordlist = { }

# Now we fill the dictionary

for word, tag in brown_news:

if tag not in tag_wordlist:

tag_wordlist[ tag ] = [ ]

tag_wordlist[ tag ].append(word)

# what words have we observed as nouns?

print(tag_wordlist[ "NN"])

The following code maps prepositions to the words that precede them:

# For the prepositions "in", "on", "up", we collect the words

# that precede them.

# Ideally, we would collect the verbs that form particle verbs

# with these prepositions, like "check in", "take on", "look up",

# but we assume we don't have part-of-speech tags available,

# so collecting preceding words is the next best thing.

import nltk

import sys

import string

prepositions = [ "in", "on", "up"]

# we ask the user for a filename from which we can read text

print "Please enter a filename"

filename = raw_input()

# we try to open the file,

# but are prepared for the case

# that the user may have mistyped

try:

f = open(filename)

except IOError:

print "sorry, could not open", filename

sys.exit(0) # this leaves the program

# we have successfully opened the file, now we read it

contents = f.read()

f.close()

words = [ w.strip(string.punctuation).lower() for w in contents.split() ]

bigrams = nltk.bigrams(words)

prepositions_preceding = { }

for w1, w2 in bigrams:

if w2 in prepositions:

# store w1 as a word that preceded a preposition

if w2 in prepositions_preceding:

prepositions_preceding[ w2 ].append( w1)

else:

prepositions_preceding[ w2 ] = [ w1 ]

for preposition, preceding in prepositions_preceding.items():

print preposition, preceding