harsha2010/magellan

Indexing issue on spatial joins

khajaasmath786 opened this issue · 1 comments

I have used below code for creating indexes

 import sqlCont.implicits._
import org.apache.spark.sql.magellan.dsl.expressions._
import org.apache.spark.sql.types._
import magellan.Utils.injectRules;
val filteredRoadDS=geoFenceWithPointDF.index(30).join(roadsPolygonDS.select("polygon","metadata","metadata.road"),($"point" within $"polygon" )

Error that I am getting is

Exception in thread "main" java.lang.AssertionError: assertion failed: No plan for SpatialJoinHint Map(magellan.index.precision -> 30)
+- Project [vin#148, providerdesc#149, utc_time#150, longitude#151, latitude#152, route_index#153, dist_mi#154, speed_mph#155, accel_mphps#156, bearing#157, dt_sec#158, night_day#159, elevation_m#160, is_raining_ratio#161, hourlyprecip_avg#162, drybulbfarenheit_avg#163, windspeed_avg#164, winddirection_avg#165, stationpressure_avg#166, pointconverter(cast(longitude#151 as double), cast(latitude#152 as double)) AS point#232]
+- LogicalRDD [vin#148, providerdesc#149, utc_time#150, longitude#151, latitude#152, route_index#153, dist_mi#154, speed_mph#155, accel_mphps#156, bearing#157, dt_sec#158, night_day#159, elevation_m#160, is_raining_ratio#161, hourlyprecip_avg#162, drybulbfarenheit_avg#163, windspeed_avg#164, winddirection_avg#165, stationpressure_avg#166]

at scala.Predef$.assert(Predef.scala:170)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:79)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:75)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:84)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:84)
at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2791)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2112)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2327)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:248)
at org.apache.spark.sql.Dataset.show(Dataset.scala:636)
at org.apache.spark.sql.Dataset.show(Dataset.scala:595)
at org.apache.spark.sql.Dataset.show(Dataset.scala:604)
at com.navistar.telematics.datascience.drivers.GeoLocationMLDriverIndex$.joinDataPointWithRoads(GeoLocationMLDriverIndex.scala:272)
at com.navistar.telematics.datascience.drivers.GeoLocationMLDriverIndex$.apply(GeoLocationMLDriverIndex.scala:204)
at com.navistar.telematics.datascience.drivers.GeoLocationMLDriverIndex$$anonfun$main$1.apply$mcV$sp(GeoLocationMLDriverIndex.scala:124)
at com.navistar.telematics.datascience.drivers.GeoLocationMLDriverIndex$$anonfun$main$1.apply(GeoLocationMLDriverIndex.scala:124)
at com.navistar.telematics.datascience.drivers.GeoLocationMLDriverIndex$$anonfun$main$1.apply(GeoLocationMLDriverIndex.scala:124)
at scala.util.Try$.apply(Try.scala:192)
at com.navistar.telematics.utils.TimeTracker$.apply(TimeTracker.scala:12)
at com.navistar.telematics.datascience.drivers.GeoLocationMLDriverIndex$.main(GeoLocationMLDriverIndex.scala:124)
at com.navistar.telematics.datascience.drivers.GeoLocationMLDriverIndex.main(GeoLocationMLDriverIndex.scala)

can anyone guide on how to resolve this?

you should be using Utils.injectRules(sparkSession)
before using indices. I see you importing injectRules but not actually using it