#python #python-3.x #apache-spark #pyspark #py4j
Вопрос:
Когда я выполнял метод наименьших квадратов для выполнения факторизации матрицы, я столкнулся с ошибкой с Java-сервером в Spark, я не знаю, почему это происходит, ниже приведено сообщение об ошибке.
Exception happened during processing of request from ('127.0.0.1', 61711)
Traceback (most recent call last):
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/socketserver.py", line 316, in _handle_request_noblock
self.process_request(request, client_address)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/socketserver.py", line 347, in process_request
self.finish_request(request, client_address)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/socketserver.py", line 360, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/socketserver.py", line 720, in __init__
self.handle()
File "/usr/local/opt/apache-spark/libexec/python/pyspark/accumulators.py", line 262, in handle
poll(accum_updates)
File "/usr/local/opt/apache-spark/libexec/python/pyspark/accumulators.py", line 235, in poll
if func():
File "/usr/local/opt/apache-spark/libexec/python/pyspark/accumulators.py", line 239, in accum_updates
num_updates = read_int(self.rfile)
File "/usr/local/opt/apache-spark/libexec/python/pyspark/serializers.py", line 564, in read_int
raise EOFError
EOFError
----------------------------------------
ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:61698)
Traceback (most recent call last):
File "/Users/zhengyangzhang/Library/Python/3.8/lib/python/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-3-deb1baad55f8>", line 15, in <module>
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
File "/usr/local/opt/apache-spark/libexec/python/pyspark/rdd.py", line 1386, in mean
return self.stats().mean()
File "/usr/local/opt/apache-spark/libexec/python/pyspark/rdd.py", line 1245, in stats
return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(redFunc)
File "/usr/local/opt/apache-spark/libexec/python/pyspark/rdd.py", line 997, in reduce
vals = self.mapPartitions(func).collect()
File "/usr/local/opt/apache-spark/libexec/python/pyspark/rdd.py", line 949, in collect
sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
return_value = get_return_value(
File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/zhengyangzhang/Library/Python/3.8/lib/python/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
stb = value._render_traceback_()
AttributeError: 'Py4JJavaError' object has no attribute '_render_traceback_'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1207, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1033, in send_command
response = connection.send_command(command)
File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1211, in send_command
raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while receiving
Приведенное ниже сообщение повторялось сотни раз в моем блокноте Jupyter.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 977, in _get_connection
connection = self.deque.pop()
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1115, in start
self.socket.connect((self.address, self.port))
ConnectionRefusedError: [Errno 61] Connection refused
Я задаю этот вопрос, потому что я просто не понимаю, почему это происходит, если кто-то может объяснить мне, почему это происходит, и предоставить мне возможное решение этой проблемы, я был бы очень признателен.
Ниже приведен список моих пакетов импорта и настроек:
import findspark
findspark.init()
from pyspark import SparkContext
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
Я прочитал в Интернете, что пакет findspark может помочь решить эту проблему, поэтому я завариваю apache-spark и загружаю этот пакет, но все равно возникают проблемы.