Topic modeling example

# demo of topic modeling with LDA

# We first use a tiny document collection that comes with gensim

import gensim

from gensim.test.utils import common_texts

print("This is the tiny corpus we use:")

print(common_texts)

# Transformation into Gensim's internal format

# make a mapping from words to IDs

gensim_dictionary = gensim.corpora.Dictionary(common_texts)

# we allow gensim to transform our corpus to its internal format

gensim_corpus = [ ]

for document in common_texts:

gensim_corpus.append( gensim_dictionary.doc2bow(document))

# now comes the topic modeling

ldamodel = gensim.models.ldamodel.LdaModel(gensim_corpus,

num_topics = 3,

id2word=gensim_dictionary)

# Let's inspect the topics

print("\n\nThese are the topics that LDA found:")

for topic in ldamodel.print_topics(num_words=8):

print(topic)

# We can also see the degree to which each document

# draws on each topic

for index, document in enumerate(common_texts):

print("Document:")

print(document)

print("Topics:", end = " ")

for topicnumber, prob in ldamodel.get_document_topics(gensim_corpus[index]):

print(topicnumber, ":", prob, end = ", ")

print("\n")