Courses‎ > ‎

### Demo: a very simple distributional model

 `import nltkdef compute_space(context_size, corpus):    space = nltk.ConditionalFreqDist()    #    for index in range(len(corpus)):        # current word        current = corpus[ index ]        #             # context before the current word: count each item        # but no preceding context for index 0        if index > 0:            # don't start from a cxword_index < 0 in case index < context_size            for cxword_index in range(max(index - context_size, 0), index):                cxword = corpus[ cxword_index ]                # In a ConditionalFreqDist, if 'current' is not a condition yet,                # then accessing it creates a new empty FreqDist for 'current'                # The FreqDist method inc() increments the count for the given item by one.                space[ current ].update([cxword])        #        # context after the current word: count each item        # but no succeeding context for the last item (index len(corpus - 1))        if index < len(corpus) - 1:            # don't run until a cxword_index > len(corpus) in case            # index + context_size > len(corpus)            for cxword_index in range(index + 1, min(index + context_size + 1, len(corpus))):                cxword = corpus[ cxword_index ]                # In a ConditionalFreqDist, if 'current' is not a condition yet,                # then accessing it creates a new empty FreqDist for 'current'                # The FreqDist method inc() increments the count for the given item by one.                space[ current ].update([cxword])    #    return space###############print( "reading Brown corpus...")brown_words = list(nltk.corpus.brown.words())print( "computing space...")sp = compute_space(2, brown_words)# 10 most frequent context words: similar across many items# (what can we do about that?)print("election:\n", sp["election"].most_common(10))print("love:\n", sp["love"].most_common(10))print("car:", sp["car"].most_common(10))# 100 most frequent context words: now we are starting to see differencesprint("election:\n", sp["election"].most_common(100))print("love:\n", sp["love"].most_common(100))print("car:\n", sp["car"].most_common(100))# some ambiguous wordsprint("bat:\n", sp["bat"].most_common(100))print("bank:\n", sp["bank"].most_common(100))print("bar:\n", sp["bar"].most_common(100))print("leave:\n", sp["leave"].most_common(100))`