JohnSnowLabs/spark-nlp-workshop

Py4JJavaError : An error occurred while calling z:com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.downloadModel. : java.lang.UnsupportedOperationException: empty collection

pyturn opened this issue · 1 comments

Hi,

I am facing this error when I am trying to load Clinical Word Embedding Model. The same code use to run earlier with no errors in the same environment.

import pandas as pd
from pyspark.ml import Pipeline
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.types import *
import sparknlp
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
from sparknlp.base import *
import sparknlp_jsl

word_embeddings = WordEmbeddingsModel.pretrained('embeddings_healthcare', 'en', 'clinical/models') \ .setInputCols(['sentence', 'token']) \ .setOutputCol('embeddings')

PySpark Version - 2.4.4
SparkNLP Version - 2.6.4
Java Version - "1.8.0_45"

Here are the complete details of the Error -

embeddings_clinical download started this may take some time.
Approximate size to download 1.6 GB
[OK!]

Py4JJavaError Traceback (most recent call last)
in
15 # .setOutputCol('embeddings')
16
---> 17 word_embeddings = WordEmbeddingsModel.pretrained('embeddings_clinical', 'en', 'clinical/models')
18 .setInputCols(['sentence', 'token'])
19 .setOutputCol('embeddings')

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/sparknlp/annotator.py in pretrained(name, lang, remote_loc)
1745 def pretrained(name="glove_100d", lang="en", remote_loc=None):
1746 from sparknlp.pretrained import ResourceDownloader
-> 1747 return ResourceDownloader.downloadModel(WordEmbeddingsModel, name, lang, remote_loc)
1748
1749 @staticmethod

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/sparknlp/pretrained.py in downloadModel(reader, name, language, remote_loc, j_dwn)
39 t1.start()
40 try:
---> 41 j_obj = _internal._DownloadModel(reader.name, name, language, remote_loc, j_dwn).apply()
42 finally:
43 stop_threads = True

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/sparknlp/internal.py in init(self, reader, name, language, remote_loc, validator)
174 class _DownloadModel(ExtendedJavaWrapper):
175 def init(self, reader, name, language, remote_loc, validator):
--> 176 super(_DownloadModel, self).init("com.johnsnowlabs.nlp.pretrained."+validator+".downloadModel", reader, name, language, remote_loc)
177
178

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/sparknlp/internal.py in init(self, java_obj, *args)
127 super(ExtendedJavaWrapper, self).init(java_obj)
128 self.sc = SparkContext._active_spark_context
--> 129 self._java_obj = self.new_java_obj(java_obj, *args)
130 self.java_obj = self._java_obj
131

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/sparknlp/internal.py in new_java_obj(self, java_class, *args)
137
138 def new_java_obj(self, java_class, *args):
--> 139 return self._new_java_obj(java_class, *args)
140
141 def new_java_array(self, pylist, java_class):

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
65 java_obj = getattr(java_obj, name)
66 java_args = [_py2java(sc, arg) for arg in args]
---> 67 return java_obj(*java_args)
68
69 @staticmethod

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/py4j/java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()

~/anaconda3/envs/pytorch_p36/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(

Py4JJavaError: An error occurred while calling z:com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.downloadModel.
: java.lang.UnsupportedOperationException: empty collection
at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1380)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.first(RDD.scala:1377)
at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:615)
at org.apache.spark.ml.util.DefaultParamsReader.load(ReadWrite.scala:493)
at com.johnsnowlabs.nlp.FeaturesReader.load(ParamsAndFeaturesReadable.scala:12)
at com.johnsnowlabs.nlp.FeaturesReader.load(ParamsAndFeaturesReadable.scala:8)
at com.johnsnowlabs.nlp.pretrained.ResourceDownloader$.downloadModel(ResourceDownloader.scala:361)
at com.johnsnowlabs.nlp.pretrained.ResourceDownloader$.downloadModel(ResourceDownloader.scala:355)
at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader$.downloadModel(ResourceDownloader.scala:469)
at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.downloadModel(ResourceDownloader.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:745)

The issue got resolved after downloading the model and loading the saved model.