Искровой соединитель mongodb на python в ноутбуке jupyter

#python #mongodb #apache-spark #pyspark #jupyter-notebook

#python #mongodb #apache-spark #pyspark #jupyter-ноутбук

Вопрос:

столкнувшись с этой проблемой:

Код

 import findspark
import os
os.environ["SPARK_HOME"]="C:Sparkspark-3.0.0-bin-hadoop2.7"
os.environ["JAVA_HOME"] = "C:Program FilesJavajava-1.8.0-openjdk"
findspark.init()
from pyspark.sql import SparkSession
from pyspark import SparkConf, SparkContext
conf = SparkConf().setAppName("mongoconnect")
    .setMaster("spark://192.168.1.102:7077")
spark = SparkSession 
    .builder 
    .config(conf=conf) 
    .config("spark.mongodb.input.uri", "mongodb://localhost:27017/?readPreference=primaryamp;appname=MongoDB Compassamp;ssl=false") 
    .config("spark.mongodb.input.database", "test") 
    .config("spark.mongodb.output.uri", "mongodb://localhost:27017/?readPreference=primaryamp;appname=MongoDB Compassamp;ssl=false") 
    .config("spark.mongodb.output.database", "test") 
    .config("spark.jars.packages", "org.mongodb.spark:mongo-spark-connector_2.12:3.0.0") 
    .getOrCreate()
    
  

Ошибка

     Exception                                 Traceback (most recent call last)
    <ipython-input-3-8471339e4966> in <module>
          6         .config("spark.mongodb.output.uri", "mongodb://localhost:27017/?readPreference=primaryamp;appname=MongoDB Compassamp;ssl=false") 
          7         .config("spark.mongodb.output.database", "test") 
    ----> 8         .config("spark.jars.packages", "org.mongodb.spark:mongo-spark-connector_2.12:3.0.0") 
          9         .getOrCreate()
    
    C:Sparkspark-3.0.0-bin-hadoop2.7pythonpysparksqlsession.py in getOrCreate(self)
        184                             sparkConf.set(key, value)
        185                         # This SparkContext may be an existing one.
    --> 186                         sc = SparkContext.getOrCreate(sparkConf)
        187                     # Do not update `SparkConf` for existing `SparkContext`, as it's shared
        188                     # by all sessions.
    
    C:Sparkspark-3.0.0-bin-hadoop2.7pythonpysparkcontext.py in getOrCreate(cls, conf)
        369         with SparkContext._lock:
        370             if SparkContext._active_spark_context is None:
    --> 371                 SparkContext(conf=conf or SparkConf())
        372             return SparkContext._active_spark_context
        373 
    
    C:Sparkspark-3.0.0-bin-hadoop2.7pythonpysparkcontext.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
        126                 " is not allowed as it is a security risk.")
        127 
    --> 128         SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
        129         try:
        130             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
    
    C:Sparkspark-3.0.0-bin-hadoop2.7pythonpysparkcontext.py in _ensure_initialized(cls, instance, gateway, conf)
        318         with SparkContext._lock:
        319             if not SparkContext._gateway:
    --> 320                 SparkContext._gateway = gateway or launch_gateway(conf)
        321                 SparkContext._jvm = SparkContext._gateway.jvm
        322 
    
    C:Sparkspark-3.0.0-bin-hadoop2.7pythonpysparkjava_gateway.py in launch_gateway(conf, popen_kwargs)
        103 
        104             if not os.path.isfile(conn_info_file):
    --> 105                 raise Exception("Java gateway process exited before sending its port number")
        106 
        107             with open(conn_info_file, "rb") as info:
    
    Exception: Java gateway process exited before sending its port number
    
        enter code here