# demo of topic modeling with LDA # We first use a tiny document collection that comes with gensim import gensim from gensim.test.utils import common_texts print("This is the tiny corpus we use:") print(common_texts) # Transformation into Gensim's internal format # make a mapping from words to IDs gensim_dictionary = gensim.corpora.Dictionary(common_texts) # we allow gensim to transform our corpus to its internal format gensim_corpus = [ ] for document in common_texts: gensim_corpus.append( gensim_dictionary.doc2bow(document)) # now comes the topic modeling ldamodel = gensim.models.ldamodel.LdaModel(gensim_corpus, num_topics = 3, id2word=gensim_dictionary) # Let's inspect the topics print("\n\nThese are the topics that LDA found:") for topic in ldamodel.print_topics(num_words=8): print(topic) # We can also see the degree to which each document # draws on each topic for index, document in enumerate(common_texts): print("Document:") print(document) print("Topics:", end = " ") for topicnumber, prob in ldamodel.get_document_topics(gensim_corpus[index]): print(topicnumber, ":", prob, end = ", ") print("\n") |
Courses > Python worksheets >