Stratio/Spark-MongoDB

Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times

odigetti opened this issue · 0 comments

I'm working on a Spark ML sentiment analysis platform using a mobile feedback APP as a data source. The Data (Feedback) re stored on MongoDB and i wanna read them with spark. I tried to connect spark to MongoDB but not lucky i keep getting this error :
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2;
at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2124)
at org.apache.spark.rdd.RDD$$anonfun$aggregate$1.apply(RDD.scala:1118)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.aggregate(RDD.scala:1111)
at com.stratio.datasource.mongodb.schema.MongodbSchema.schema(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema$lzycompute(MongodbRelation.scala:63)
at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema(MongodbRelation.scala:60)
at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65)
at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65)
at scala.Option.getOrElse(Option.scala:121)
at com.stratio.datasource.mongodb.MongodbRelation.(MongodbRelation.scala:65)
at com.stratio.datasource.mongodb.DefaultSource.createRelation(DefaultSource.scala:36)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
at com.spml.MongoDbSparkCon.main(MongoDbSparkCon.java:36)
Caused by: java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2;
at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
2018-05-28 18:54:24 INFO DAGScheduler:54 - Job 0 failed: aggregate at MongodbSchema.scala:46, took 0,855790 s
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

My spark code : Im using Java

public class MongoDbSparkCon {

public static void main(String[] args) {


    JavaSparkContext sc = new JavaSparkContext("local[*]", "test spark-mongodb java");
    SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);

    Map options = new HashMap();
    options.put("host", "localhost:27017");
    options.put("database", "feedbackuib");
    options.put("collection", "Feedback");

    Dataset<Row> df = sqlContext.read().format("com.stratio.datasource.mongodb").options(options).load();
    df.registerTempTable("Feedback");
    sqlContext.sql("SELECT * FROM Feedback");
    df.show();

}}

Pom.xml :

org.apache.spark spark-core_2.11 2.2.0 org.apache.spark spark-mllib_2.11 2.3.0 org.apache.spark spark-sql_2.11 2.2.0 org.apache.spark spark-streaming_2.11 2.3.0 provided org.mongodb.spark mongo-spark-connector_2.10 2.2.2
</dependency>

<!-- https://mvnrepository.com/artifact/com.stratio.datasource/spark-mongodb -->
<dependency>
    <groupId>com.stratio.datasource</groupId>
    <artifactId>spark-mongodb_2.11</artifactId>
    <version>0.12.0</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.mongodb/mongo-java-driver -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>mongo-java-driver</artifactId>
    <version>3.6.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb/casbah-core -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>casbah-core_2.11</artifactId>
    <version>3.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb/casbah-commons -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>casbah-commons_2.11</artifactId>
    <version>3.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb/casbah-query -->
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>casbah-query_2.11</artifactId>
    <version>3.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.scala-lang/scala-library -->
<dependency>
    <groupId>org.scala-lang</groupId>
    <artifactId>scala-library</artifactId>
    <version>2.11.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.typesafe.akka/akka-actor -->
<dependency>
    <groupId>com.typesafe.akka</groupId>
    <artifactId>akka-actor_2.11</artifactId>
    <version>2.5.12</version>
</dependency>

Please anny help !!