python - 我需要创建一个 pandas 数据框,显示推文所代表的主题
问题描述
使用横向狄利克雷分配确定主题
tokenized = tweets['initial']
dictionary = corpora.Dictionary(tokenized)
dictionary.filter_extremes(no_below=1, no_above=0.8)
corpus = [dictionary.doc2bow(tokens) for tokens in tokenized]
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = 5, id2word=dictionary, passes=15)
ldamodel.save('mOdel.gensim')
topics = ldamodel.print_topics(num_words=10)
for topic in topics:
print(topic)
数据框,显示哪个主题代表推文数据框应该这样做但不是
def dominant_topic(ldamodel,corpus,content):
#Function to find the dominant topic in each query
sent_topics_df = pd.DataFrame()
# Get main topic in each query
for i, row in enumerate(ldamodel[corpus]):
row = sorted(row, key=lambda x: (x[1]), reverse=True)
# Get the Dominant topic, Perc Contribution and Keywords for each query
for j, (topic_num, prop_topic) in enumerate(row):
if j == 0: # => dominant topic
wp = ldamodel.show_topic(topic_num,topn=20)
topic_keywords = ", ".join([word for word, prop in wp])
sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
else:
break
sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']
contents = pd.Series(content)#noisy data
sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
return(sent_topics_df)
df_dominant_topic = dominant_topic(ldamodel=ldamodel, corpus=corpus, content=tweets['initial'])
df_dominant_topic.head(5)
我只能看到第一个。这就是问题。