#python #topic-modeling
Вопрос:
Я новичок в python, и у меня проблемы с динамическими тематическими моделями.
Я следую инструкциям в https://github.com/JiaxiangBU/dynamic_topic_modeling/blob/master/dtm.ipynb
Все в порядке, я создаю модели и Эволюцию слов, но у меня ошибка «Ошибка индекса: индекс списка вне диапазона» в теме Эволюция
Вот мой код:
import seaborn as sns
sns.set()
num_topics = 5
def document_influence_dim(num_topics, DtmModel, time_seq = []):
doc, topicId, period, distributions=[], [], [], []
for topic in range(num_topics):
for t in range(len(time_seq)):
for document in range(time_seq[t]):
distribution = round(DtmModel.influences_time[t][document][topic], 4)
period.append(t)
doc.append(document)
topicId.append(topic)
distributions.append(distribution)
return pd.DataFrame(list(zip(doc, topicId, period, distributions)), columns=['document','topicId', 'period','distribution'])
def topic_distribution(num_topics, DtmModel, time_seq = []):
"""
function to compute the topical distribution in a document
:param num_topics: number of topics
"""
doc, topicId, distributions=[], [], []
df_dim = document_influence_dim(num_topics = num_topics, DtmModel = DtmModel, time_seq = time_seq)
for document in range(0, sum(time_seq)):
for topic in range(0, num_topics):
distribution = round(DtmModel.gamma_[document][topic], 4)
doc.append(document)
topicId.append(topic)
distributions.append(distribution)
return pd.DataFrame(list(zip(doc, topicId, distributions, df_dim.period)), columns=['document','topicId', 'distribution', 'period'])
def visualize_topics(df):
"""
function to vizualise mean topic distribution over defined periods.
the topic distribution is defined by the average level by documents.
:param num_topics: number of topics
"""
fig, ax = plt.subplots(figsize=(30,10))
df.groupby(['period', 'topicId'], sort=False).mean()['distribution'].unstack().plot(ax=ax,grid=True, linewidth =3.0, sharex=True)
plt.ylabel("Topic Distribution", fontsize=16)
plt.xlabel("Period", fontsize=16)
plt.title("Topic evolution")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title = "Topics", fontsize='large', labelspacing=0.6, fancybox = True)
Это точно такая же инструкция. Когда я запускаю этот код, у него возникает проблема:
topic_df = topic_distribution(num_topics=num_topics, DtmModel=DtmModel, time_seq=time_slice)
Ошибка:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-77-138aff41df8b> in <module>
----> 1 topic_df = topic_distribution(num_topics=num_topics, DtmModel=DtmModel, time_seq=time_slice)
<ipython-input-76-279f84a320c9> in topic_distribution(num_topics, DtmModel, time_seq)
35 """
36 doc, topicId, distributions=[], [], []
---> 37 df_dim = document_influence_dim(num_topics = num_topics, DtmModel = DtmModel, time_seq = time_seq)
38 for document in range(0, sum(time_seq)):
39 for topic in range(0, num_topics):
<ipython-input-76-279f84a320c9> in document_influence_dim(num_topics, DtmModel, time_seq)
15 for t in range(len(time_seq)):
16 for document in range(time_seq[t]):
---> 17 distribution = round(DtmModel.influences_time[t][document][topic], 4)
18 # print(len(model.influences_time))
19 # print(len(model.influences_time[0]))
IndexError: list index out of range
Can anyone help me solving this error?