#python #python-3.x #nltk
Вопрос:
Ниже приведен код, который выдает ошибку значения Python: слишком много значений для распаковки (ожидаемое решение 2)
import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd() "/nltk_data"
import nltk
from nltk import word_tokenize, ConditionalFreqDist, Text
from nltk.tokenize import regexp_tokenize
from nltk.corpus import stopwords
def performBigramsAndCollocations(textcontent, word):
# Write your code here
tokenizedwords = regexp_tokenize(textcontent, "[w] ")
tokenizedwords = [x.lower() for x in tokenizedwords if x != '']
tokenizedwordsbigrams = list(nltk.bigrams(tokenizedwords))
stop_words = stopwords.words('english')
lc_stop_words = [word.lower() for word in stop_words]
tokenizednonstopwordsbigrams = [(n1,n2) for n1, n2 in tokenizedwords if n1 not in lc_stop_words and n2 not in lc_stop_words]
cfd_bigrams = ConditionalFreqDist(tokenizednonstopwordsbigrams)
mostfrequentwordafter = cfd_bigrams[word].most_common(3)
words = Text(tokenizedwords)
collectionwords = words.collection_list()
return mostfrequentwordafter, collectionwords
if __name__ == '__main__':
textcontent = input()
word = input()
if not os.path.exists(os.getcwd() "/nltk_data"):
with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
zip_ref.extractall(os.getcwd())
mostfrequentwordafter, collocationwords = performBigramsAndCollocations(textcontent, word)
print(sorted(mostfrequentwordafter, key=lambda element: (element[1], element[0]), reverse=True))
print(sorted(collocationwords))
Сообщение об ошибке для данного кода
Произошла ошибка для следующей строки
, обозначенной как nonstopwordsbigrams = [(n1,n2) для n1, n2 в обозначенных словах, если n1 не в lc_stop_words и n2 не в lc_stop_words]
Traceback (most recent call last):
File "Solution.py", line 48, in <module>
mostfrequentwordafter, collocationwords = performBigramsAndCollocations(textcontent, word)
File "Solution.py", line 31, in performBigramsAndCollocations
tokenizednonstopwordsbigrams = [(n1,n2) for n1, n2 in tokenizedwords if n1 not in lc_stop_words and n2 not in lc_stop_words]
File "Solution.py", line 31, in <listcomp>
tokenizednonstopwordsbigrams = [(n1,n2) for n1, n2 in tokenizedwords if n1 not in lc_stop_words and n2 not in lc_stop_words]
ValueError: too many values to unpack (expected 2)
Комментарии:
1. В строке 31 вы хотели использовать
tokenizedwordsbigrams
вместоtokenizedwords
2. да, я думаю, в строке 31, использование
tokenizedwordsbigrams
вместоtokenizedwords
может сработать