Ошибка сбора () в автономном режиме Spark 2.0

#scala #apache-spark #rdd #apache-spark-standalone

#scala #apache-spark #rdd #apache-spark-standalone

Вопрос:

Я работаю над Spark2.0 (scala) и Play framework.Я запускаю это приложение в автономном режиме с помощью Intellij IDEA. Мое приложение отлично работает с локальным мастером

 object Utilities {

  val master = "local"
  //-------machine learning algorithm
  val modelFolder = "Model"    
  val linearModel = "LinearRegression"
  val logisticModel = "LogisticRegression"
  val kmeansModel = "Kmeans"
  val decisionTreeModel = "DecisionTree"
}
 

но когда я переключаюсь в режим автономного кластера, это не сработало

 object Utilities {
val master = "spark://ubuntu:7077"
//-------machine learning algorithm
  val modelFolder = "Model"
  val linearModel = "LinearRegression"
  val logisticModel = "LogisticRegression"
  val kmeansModel = "Kmeans"
  val decisionTreeModel = "DecisionTree"
}
 

Я перепробовал весь свой код в spark-shell кластерного режима, и все было в порядке.
Я попытался найти, что не так с моим кодом, и обнаружил, что каждое действие rdd.map.collect просто завершилось неудачей

Здесь произошел сбой моего приложения

SimpleCompute.scala

 var tick = convertDF.rdd.map(row => row.getDouble(0))
var mean =tick.collect()
 

Регрессия.scala

 val residualArr = residuals.rdd.map(r => Math.round(r(0).toString.toDouble * 1000.0) / 1000.0).collect
val resStr = "["   residualArr.mkString(",")   "]"


val predictArr = trainingSummary.predictions.select("prediction").rdd.map(r => Math.round(r(0).toString.toDouble * 1000.0) / 1000.0).collect
val predStr = "["   predictArr.mkString(",")   "]"


val labelArr = trainingSummary.predictions.select("label").rdd.map(r => r(0)).collect()
var labelStr = "["   labelArr.mkString(",")   "]"
 

Я получаю это сообщение ОБ ОШИБКЕ

 [error]o.a.s.s.TaskSetManager - Task 0 in stage 20.0 failed 4 times;     aborting job
(org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 20.0 failed 4 times, most recent failure: Lost task 0.3 in stage 20.0 (TID 24, 10.211.55.8): java.lang.ClassNotFoundException:  controllers.util.SimpleCompute$anonfun$4
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.serializer.JavaDeserializationStream$anon$1.resolveClass(JavaSerializer.scala:67)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1620)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1781)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:85)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
 

и это

 [error] o.a.s.s.TaskSetManager - Task 0 in stage 24.0 failed 4 times; aborting job
error in Linear Regressionorg.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 24.0 failed 4 times, most recent failure: Lost task 0.3 in stage 24.0 (TID 34, 10.211.55.8): java.lang.ClassNotFoundException:   controllers.Regression$anonfun$callRegression$1$anonfun$apply$2$anonfun$1
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.serializer.JavaDeserializationStream$anon$1.resolveClass(JavaSerializer.scala:67)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1620)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1781)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:85)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)

Driver stacktrace:
 

Пожалуйста, помогите мне с этой проблемой.

spark: 2.0.0

scala: 2.11.8

ВОСПРОИЗВЕДЕНИЕ: 2.5.X

Комментарии:

1. Вы решили эту проблему?

2. все еще работаю над этим