Ошибка индекса: список индексов вне диапазона Динамических тематических моделей

#python #topic-modeling

Вопрос:

Я новичок в python, и у меня проблемы с динамическими тематическими моделями.

Я следую инструкциям в https://github.com/JiaxiangBU/dynamic_topic_modeling/blob/master/dtm.ipynb

Все в порядке, я создаю модели и Эволюцию слов, но у меня ошибка «Ошибка индекса: индекс списка вне диапазона» в теме Эволюция

Вот мой код:

 import seaborn as sns
sns.set()
num_topics = 5

def document_influence_dim(num_topics, DtmModel, time_seq = []):
    doc, topicId, period, distributions=[], [], [], []
        for topic in range(num_topics):
            for t in range(len(time_seq)):
                for document in range(time_seq[t]):
                    distribution = round(DtmModel.influences_time[t][document][topic], 4)
                    period.append(t)
                    doc.append(document)
                    topicId.append(topic)
                    distributions.append(distribution)
        return pd.DataFrame(list(zip(doc, topicId, period, distributions)), columns=['document','topicId', 'period','distribution'])
    
    
    def topic_distribution(num_topics, DtmModel, time_seq = []):
        
        """
        function to compute the topical distribution in a document
        :param num_topics: number of topics
        
        """
        doc, topicId, distributions=[], [], []
        df_dim = document_influence_dim(num_topics = num_topics, DtmModel = DtmModel, time_seq = time_seq)
        for document in range(0, sum(time_seq)):
            for topic in range(0, num_topics):
                distribution = round(DtmModel.gamma_[document][topic], 4)
                doc.append(document)
                topicId.append(topic)
                distributions.append(distribution)
        return pd.DataFrame(list(zip(doc, topicId, distributions, df_dim.period)), columns=['document','topicId', 'distribution', 'period'])
    
    
    
    def visualize_topics(df):
        
        """
        function to vizualise mean topic distribution over defined periods.
        the topic distribution is defined by the average level by documents.
        :param num_topics: number of topics
        
        """
        fig, ax = plt.subplots(figsize=(30,10))
        df.groupby(['period', 'topicId'], sort=False).mean()['distribution'].unstack().plot(ax=ax,grid=True, linewidth =3.0, sharex=True)
        plt.ylabel("Topic Distribution", fontsize=16) 
        plt.xlabel("Period", fontsize=16) 
        plt.title("Topic evolution")
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title = "Topics", fontsize='large', labelspacing=0.6, fancybox = True)
 

Это точно такая же инструкция. Когда я запускаю этот код, у него возникает проблема:

 topic_df = topic_distribution(num_topics=num_topics, DtmModel=DtmModel, time_seq=time_slice)
 

Ошибка:

 ---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-77-138aff41df8b> in <module>
----> 1 topic_df = topic_distribution(num_topics=num_topics, DtmModel=DtmModel, time_seq=time_slice)

<ipython-input-76-279f84a320c9> in topic_distribution(num_topics, DtmModel, time_seq)
     35     """
     36     doc, topicId, distributions=[], [], []
---> 37     df_dim = document_influence_dim(num_topics = num_topics, DtmModel = DtmModel, time_seq = time_seq)
     38     for document in range(0, sum(time_seq)):
     39         for topic in range(0, num_topics):

<ipython-input-76-279f84a320c9> in document_influence_dim(num_topics, DtmModel, time_seq)
     15         for t in range(len(time_seq)):
     16             for document in range(time_seq[t]):
---> 17                 distribution = round(DtmModel.influences_time[t][document][topic], 4)
     18                 # print(len(model.influences_time))
     19                 # print(len(model.influences_time[0]))

IndexError: list index out of range
 

Can anyone help me solving this error?