apache/submarine

URI is not absolute

Closed this issue · 1 comments

I meet two errors.

  • First, URI is not absolute

I use beeline to connect and return below error log but does not affect the use but does not affect the use.

21/03/02 22:31:22 INFO policyengine.RangerPolicyRepository: This policy engine contains 1 policy evaluators
21/03/02 22:31:23 ERROR contextenricher.RangerTagEnricher$RangerTagRefresher: Encountered unexpected exception. Ignoring
submarine_spark_ranger_project.com.sun.jersey.api.client.ClientHandlerException: java.lang.IllegalArgumentException: URI is not absolute
        at submarine_spark_ranger_project.com.sun.jersey.client.urlconnection.URLConnectionClientHandler.handle(URLConnectionClientHandler.java:155)
        at submarine_spark_ranger_project.com.sun.jersey.api.client.Client.handle(Client.java:652)
        at submarine_spark_ranger_project.com.sun.jersey.api.client.WebResource.handle(WebResource.java:682)
        at submarine_spark_ranger_project.com.sun.jersey.api.client.WebResource.access$200(WebResource.java:74)
        at submarine_spark_ranger_project.com.sun.jersey.api.client.WebResource$Builder.get(WebResource.java:509)
        at org.apache.ranger.admin.client.RangerAdminRESTClient.getServiceTagsIfUpdated(RangerAdminRESTClient.java:311)
        at org.apache.ranger.plugin.contextenricher.RangerAdminTagRetriever.retrieveTags(RangerAdminTagRetriever.java:57)
        at org.apache.ranger.plugin.contextenricher.RangerTagEnricher$RangerTagRefresher.populateTags(RangerTagEnricher.java:606)
        at org.apache.ranger.plugin.contextenricher.RangerTagEnricher$RangerTagRefresher.access$000(RangerTagEnricher.java:524)
        at org.apache.ranger.plugin.contextenricher.RangerTagEnricher.init(RangerTagEnricher.java:127)
        at org.apache.ranger.plugin.policyengine.RangerPolicyRepository.buildContextEnricher(RangerPolicyRepository.java:783)
        at org.apache.ranger.plugin.policyengine.RangerPolicyRepository.init(RangerPolicyRepository.java:712)
        at org.apache.ranger.plugin.policyengine.RangerPolicyRepository.<init>(RangerPolicyRepository.java:187)
        at org.apache.ranger.plugin.policyengine.RangerPolicyEngineImpl.<init>(RangerPolicyEngineImpl.java:128)
        at org.apache.ranger.plugin.service.RangerBasePlugin.setPolicies(RangerBasePlugin.java:264)
        at org.apache.ranger.plugin.util.PolicyRefresher.loadPolicy(PolicyRefresher.java:222)
        at org.apache.ranger.plugin.util.PolicyRefresher.startRefresher(PolicyRefresher.java:149)
        at org.apache.ranger.plugin.service.RangerBasePlugin.init(RangerBasePlugin.java:222)
        at org.apache.submarine.spark.security.RangerSparkPlugin$.init(RangerSparkPlugin.scala:42)
        at org.apache.submarine.spark.security.RangerSparkPlugin$.<init>(RangerSparkPlugin.scala:57)
        at org.apache.submarine.spark.security.RangerSparkPlugin$.<clinit>(RangerSparkPlugin.scala)
        at org.apache.submarine.spark.security.RangerSparkAuthorizer$.org$apache$submarine$spark$security$RangerSparkAuthorizer$$getSparkResource(RangerSparkAuthorizer.scala:257)
        at org.apache.submarine.spark.security.RangerSparkAuthorizer$$anonfun$addAccessRequest$1$1.apply(RangerSparkAuthorizer.scala:75)
        at org.apache.submarine.spark.security.RangerSparkAuthorizer$$anonfun$addAccessRequest$1$1.apply(RangerSparkAuthorizer.scala:74)
        at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
        at org.apache.submarine.spark.security.RangerSparkAuthorizer$.addAccessRequest$1(RangerSparkAuthorizer.scala:74)
        at org.apache.submarine.spark.security.RangerSparkAuthorizer$.checkPrivileges(RangerSparkAuthorizer.scala:98)
        at org.apache.spark.sql.catalyst.optimizer.SubmarineSparkRangerAuthorizationExtension.apply(SubmarineSparkRangerAuthorizationExtension.scala:65)
        at org.apache.spark.sql.catalyst.optimizer.SubmarineSparkRangerAuthorizationExtension.apply(SubmarineSparkRangerAuthorizationExtension.scala:40)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
        at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
        at scala.collection.immutable.List.foldLeft(List.scala:84)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
        at scala.collection.immutable.List.foreach(List.scala:392)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
        at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:67)
        at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:67)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:73)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:69)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:78)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:78)
        at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3365)
        at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
        at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
        at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:643)
        at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)
        at org.apache.spark.sql.hive.thriftserver.SparkSQLSessionManager.openSession(SparkSQLSessionManager.scala:68)
        at org.apache.hive.service.cli.CLIService.openSessionWithImpersonation(CLIService.java:202)
        at org.apache.hive.service.cli.thrift.ThriftCLIService.getSessionHandle(ThriftCLIService.java:351)
        at org.apache.hive.service.cli.thrift.ThriftCLIService.OpenSession(ThriftCLIService.java:246)
        at org.apache.hive.service.cli.thrift.TCLIService$Processor$OpenSession.getResult(TCLIService.java:1253)
        at org.apache.hive.service.cli.thrift.TCLIService$Processor$OpenSession.getResult(TCLIService.java:1238)
        at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
        at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
        at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:53)
        at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: URI is not absolute
        at java.net.URI.toURL(URI.java:1088)
        at submarine_spark_ranger_project.com.sun.jersey.client.urlconnection.URLConnectionClientHandler._invoke(URLConnectionClientHandler.java:163)
        at submarine_spark_ranger_project.com.sun.jersey.client.urlconnection.URLConnectionClientHandler.handle(URLConnectionClientHandler.java:153)
        ... 61 more
21/03/02 22:31:23 INFO util.RangerResourceTrie: builderThreadCount is set to [1]
21/03/02 22:31:23 INFO resourcetrie.init: builderThreadCount is set to [1]
21/03/02 22:31:23 INFO service.RangerBasePlugin: Policies will NOT be reordered based on number of evaluations
21/03/02 22:31:23 INFO security.RangerSparkPlugin$: Policy cache directory successfully set to /opt/spark/spark-2.4.7-bin-hadoop2.6/policycache
  • Second, java.lang.ClassNotFoundException: org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory
    I execute sparksql like select * from table limit 3, and return an error. But when I did it the second time, it was fine and return the correct result:
21/03/02 22:34:14 ERROR session.SessionState: Error setting up authorization: java.lang.ClassNotFoundException: org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ClassNotFoundException: org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory
        at org.apache.hadoop.hive.ql.metadata.HiveUtils.getAuthorizeProviderManager(HiveUtils.java:391)
        at org.apache.hadoop.hive.ql.session.SessionState.setupAuth(SessionState.java:720)
        at org.apache.hadoop.hive.ql.session.SessionState.getAuthenticator(SessionState.java:1391)
        at org.apache.hadoop.hive.ql.session.SessionState.getUserFromAuthenticator(SessionState.java:984)
        at org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(Table.java:177)
        at org.apache.hadoop.hive.ql.metadata.Table.<init>(Table.java:119)
        at org.apache.spark.sql.hive.client.HiveClientImpl$.toHiveTable(HiveClientImpl.scala:922)
        at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitions$1.apply(HiveClientImpl.scala:665)
        at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitions$1.apply(HiveClientImpl.scala:664)
        at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:277)
        at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:215)
        at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:214)
        at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:260)
        at org.apache.spark.sql.hive.client.HiveClientImpl.getPartitions(HiveClientImpl.scala:664)
        at org.apache.spark.sql.hive.client.HiveClient$class.getPartitions(HiveClient.scala:210)
        at org.apache.spark.sql.hive.client.HiveClientImpl.getPartitions(HiveClientImpl.scala:84)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$listPartitions$1.apply(HiveExternalCatalog.scala:1195)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$listPartitions$1.apply(HiveExternalCatalog.scala:1193)
        at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
        at org.apache.spark.sql.hive.HiveExternalCatalog.listPartitions(HiveExternalCatalog.scala:1193)
        at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.listPartitions(ExternalCatalogWithListener.scala:246)
        at org.apache.spark.sql.catalyst.catalog.SessionCatalog.listPartitions(SessionCatalog.scala:948)
        at org.apache.spark.sql.hive.execution.HiveTableScanExec.rawPartitions$lzycompute(HiveTableScanExec.scala:178)
        at org.apache.spark.sql.hive.execution.HiveTableScanExec.rawPartitions(HiveTableScanExec.scala:166)
        at org.apache.spark.sql.hive.execution.HiveTableScanExec$$anonfun$11.apply(HiveTableScanExec.scala:192)
        at org.apache.spark.sql.hive.execution.HiveTableScanExec$$anonfun$11.apply(HiveTableScanExec.scala:192)
        at org.apache.spark.util.Utils$.withDummyCallSite(Utils.scala:2470)
        at org.apache.spark.sql.hive.execution.HiveTableScanExec.doExecute(HiveTableScanExec.scala:191)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
        at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247)
        at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:339)
        at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
        at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3389)
        at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2788)
        at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2788)
        at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
        at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
        at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
        at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
        at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3369)
        at org.apache.spark.sql.Dataset.collect(Dataset.scala:2788)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:246)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:175)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:171)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1.run(SparkExecuteStatementOperation.scala:185)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory
        at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
        at java.lang.Class.forName0(Native Method)
        at java.lang.Class.forName(Class.java:348)
        at org.apache.hadoop.hive.ql.metadata.HiveUtils.getAuthorizeProviderManager(HiveUtils.java:381)
        ... 57 more

This is happening when plugin is trying to refresh tags.

From below log, you can see plugin is creating a second RangerBasePlugin instance with plugin type ranger.plugin.hive instead of ranger.plugin.spark

21/03/06 05:00:23 DEBUG RangerPolicyRepository: ==> RangerPolicyRepository.buildContextEnricher(RangerContextEnricherDef={itemId={1} name={TagEnricher} enricher={org.apache.ranger.plugin.contextenricher.RangerTagEnricher} enricherOptions={{tagRetrieverClassName=org.apache.ranger.plugin.contextenricher.RangerAdminTagRetriever, tagRefresherPollingInterval=60000}} })
21/03/06 05:00:23 DEBUG RangerTagEnricher: ==> RangerTagEnricher.init()
21/03/06 05:00:23 DEBUG RangerAbstractContextEnricher: ==> RangerAbstractContextEnricher.init(RangerContextEnricherDef={itemId={1} name={TagEnricher} enricher={org.apache.ranger.plugin.contextenricher.RangerTagEnricher} enricherOptions={{tagRetrieverClassName=org.apache.ranger.plugin.contextenricher.RangerAdminTagRetriever, tagRefresherPollingInterval=60000}} })
21/03/06 05:00:23 DEBUG RangerAbstractContextEnricher: <== RangerAbstractContextEnricher.init(RangerContextEnricherDef={itemId={1} name={TagEnricher} enricher={org.apache.ranger.plugin.contextenricher.RangerTagEnricher} enricherOptions={{tagRetrieverClassName=org.apache.ranger.plugin.contextenricher.RangerAdminTagRetriever, tagRefresherPollingInterval=60000}} })
21/03/06 05:00:23 DEBUG RangerBasePlugin: ==> RangerBasePlugin.createAdminClient(spark_service, sparkSql, ranger.plugin.hive)
21/03/06 05:00:23 DEBUG RangerBasePlugin: Value for property[ranger.plugin.hive.policy.source.impl] was null or empty. Unexpected! Will use policy source of type[org.apache.ranger.admin.client.RangerAdminRESTClient]
21/03/06 05:00:23 DEBUG RangerAdminRESTClient: ==> RangerAdminRESTClient.init(, null)
21/03/06 05:00:23 DEBUG RangerAdminRESTClient: <== RangerAdminRESTClient.init(, null)
21/03/06 05:00:23 DEBUG RangerBasePlugin: <== RangerBasePlugin.createAdminClient(spark_service, sparkSql, ranger.plugin.hive): policySourceImpl=null, client=org.apache.ranger.admin.client.RangerAdminRESTClient@2e1e0032

so able to fix/mitigate the issue by adding following entries in ranger-spark-security.xml

    <property>
        <name>ranger.plugin.hive.policy.source.impl</name>
        <value>org.apache.ranger.admin.client.RangerAdminRESTClient</value>
    </property>
    <property>
        <name>ranger.plugin.hive.policy.rest.url</name>
        <value>http://ranger-host:6080</value>
    </property>