huggingface/notebooks

Separate model and inference script tar.gz not working.

siddharthsahu opened this issue · 0 comments

In reference to: https://github.com/huggingface/notebooks/blob/main/sagemaker/17_custom_inference_script/sagemaker-notebook.ipynb

For HF diffusers library we need to upload custom inference script. The issue with bundling model and code together is that it takes a lot of time to create and push model.tar.zip everytime you need to change code.

When I am zipping my model and code together(like below), things work fine.

huggingface_model = HuggingFaceModel(
   model_data="s3://abc/xyz/model.tar.gz",      # s3 path having both model and code
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.17",  # transformers version used
   pytorch_version="1.10",       # pytorch version used
   py_version='py38',            # python version used
)

On un-zipping, folder structure will be like below:

model.tar.gz
  |
   - stable-diffusion
  |
   - controlnet
  |
   - code

As mentioned before, this works fine.

Since creating zip of code and model together means that zipping and uploading time is very huge. So I want to de-couple model and code and tried below.

huggingface_model = HuggingFaceModel(
   model_data="s3://abc/xyz/model.tar.gz",      # path to your model
   source_dir="s3://abc/xyz/sourcedir.tar.gz",       # path to you script
   entry_point="inference.py",
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.17",  # transformers version used
   pytorch_version="1.10",       # pytorch version used
   py_version='py38',            # python version used
)

Here on unzipping, model.tar.gz will give you stable-diffusion and controlnet. And unzipping sourcedir.tar.gz will give you inference.py and requirements.txt.

This is giving following error:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
Cell In[13], line 16
      5 huggingface_model = HuggingFaceModel(
      6    model_data=s3_model_uri,      # path to your model
      7    source_dir=s3_code_uri,       # parth to you script
   (...)
     12    py_version='py38',            # python version used
     13 )
     15 # deploy the endpoint endpoint
---> 16 predictor = huggingface_model.deploy(
     17     initial_instance_count=1,
     18     instance_type="ml.g4dn.xlarge"
     19     )

File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:313, in HuggingFaceModel.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
    306     inference_tool = "neuron" if instance_type.startswith("ml.inf1") else "neuronx"
    307     self.image_uri = self.serving_image_uri(
    308         region_name=self.sagemaker_session.boto_session.region_name,
    309         instance_type=instance_type,
    310         inference_tool=inference_tool,
    311     )
--> 313 return super(HuggingFaceModel, self).deploy(
    314     initial_instance_count,
    315     instance_type,
    316     serializer,
    317     deserializer,
    318     accelerator_type,
    319     endpoint_name,
    320     tags,
    321     kms_key,
    322     wait,
    323     data_capture_config,
    324     async_inference_config,
    325     serverless_inference_config,
    326     volume_size=volume_size,
    327     model_data_download_timeout=model_data_download_timeout,
    328     container_startup_health_check_timeout=container_startup_health_check_timeout,
    329     inference_recommendation_id=inference_recommendation_id,
    330     explainer_config=explainer_config,
    331 )

File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:1406, in Model.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
   1403     if self._base_name is not None:
   1404         self._base_name = "-".join((self._base_name, compiled_model_suffix))
-> 1406 self._create_sagemaker_model(
   1407     instance_type=instance_type,
   1408     accelerator_type=accelerator_type,
   1409     tags=tags,
   1410     serverless_inference_config=serverless_inference_config,
   1411 )
   1413 serverless_inference_config_dict = (
   1414     serverless_inference_config._to_request_dict() if is_serverless else None
   1415 )
   1416 production_variant = sagemaker.production_variant(
   1417     self.name,
   1418     instance_type,
   (...)
   1424     container_startup_health_check_timeout=container_startup_health_check_timeout,
   1425 )

File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:794, in Model._create_sagemaker_model(self, instance_type, accelerator_type, tags, serverless_inference_config)
    768 def _create_sagemaker_model(
    769     self,
    770     instance_type=None,
   (...)
    773     serverless_inference_config=None,
    774 ):
    775     """Create a SageMaker Model Entity
    776 
    777     Args:
   (...)
    792             not provided in serverless inference. So this is used to find image URIs.
    793     """
--> 794     container_def = self.prepare_container_def(
    795         instance_type,
    796         accelerator_type=accelerator_type,
    797         serverless_inference_config=serverless_inference_config,
    798     )
    800     if not isinstance(self.sagemaker_session, PipelineSession):
    801         # _base_name, model_name are not needed under PipelineSession.
    802         # the model_data may be Pipeline variable
    803         # which may break the _base_name generation
    804         model_uri = None

File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:498, in HuggingFaceModel.prepare_container_def(self, instance_type, accelerator_type, serverless_inference_config, inference_tool)
    489     deploy_image = self.serving_image_uri(
    490         region_name,
    491         instance_type,
   (...)
    494         inference_tool=inference_tool,
    495     )
    497 deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
--> 498 self._upload_code(deploy_key_prefix, repack=True)
    499 deploy_env = dict(self.env)
    500 deploy_env.update(self._script_mode_env_vars())

File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:723, in Model._upload_code(self, key_prefix, repack)
    707     self.uploaded_code = fw_utils.UploadedCode(
    708         s3_prefix=repacked_model_data,
    709         script_name=os.path.basename(self.entry_point),
    710     )
    712 LOGGER.info(
    713     "Repacking model artifact (%s), script artifact "
    714     "(%s), and dependencies (%s) "
   (...)
    720     repacked_model_data,
    721 )
--> 723 utils.repack_model(
    724     inference_script=self.entry_point,
    725     source_directory=self.source_dir,
    726     dependencies=self.dependencies,
    727     model_uri=self.model_data,
    728     repacked_model_uri=repacked_model_data,
    729     sagemaker_session=self.sagemaker_session,
    730     kms_key=self.model_kms_key,
    731 )
    733 self.repacked_model_data = repacked_model_data

File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:517, in repack_model(inference_script, source_directory, dependencies, model_uri, repacked_model_uri, sagemaker_session, kms_key)
    510 local_download_dir = (
    511     None
    512     if sagemaker_session.settings is None
    513     or sagemaker_session.settings.local_download_dir is None
    514     else sagemaker_session.settings.local_download_dir
    515 )
    516 with _tmpdir(directory=local_download_dir) as tmp:
--> 517     model_dir = _extract_model(model_uri, sagemaker_session, tmp)
    519     _create_or_update_code_dir(
    520         model_dir,
    521         inference_script,
   (...)
    525         tmp,
    526     )
    528     tmp_model_path = os.path.join(tmp, "temp-model.tar.gz")

File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:607, in _extract_model(model_uri, sagemaker_session, tmp)
    605     local_model_path = model_uri.replace("file://", "")
    606 with tarfile.open(name=local_model_path, mode="r:gz") as t:
--> 607     t.extractall(path=tmp_model_dir)
    608 return tmp_model_dir

File /opt/conda/lib/python3.10/tarfile.py:2059, in TarFile.extractall(self, path, members, numeric_owner)
   2057         tarinfo.mode = 0o700
   2058     # Do not set_attrs directories, as we will do that further down
-> 2059     self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
   2060                  numeric_owner=numeric_owner)
   2062 # Reverse sort directories.
   2063 directories.sort(key=lambda a: a.name)

File /opt/conda/lib/python3.10/tarfile.py:2100, in TarFile.extract(self, member, path, set_attrs, numeric_owner)
   2097     tarinfo._link_target = os.path.join(path, tarinfo.linkname)
   2099 try:
-> 2100     self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
   2101                          set_attrs=set_attrs,
   2102                          numeric_owner=numeric_owner)
   2103 except OSError as e:
   2104     if self.errorlevel > 0:

File /opt/conda/lib/python3.10/tarfile.py:2173, in TarFile._extract_member(self, tarinfo, targetpath, set_attrs, numeric_owner)
   2170     self._dbg(1, tarinfo.name)
   2172 if tarinfo.isreg():
-> 2173     self.makefile(tarinfo, targetpath)
   2174 elif tarinfo.isdir():
   2175     self.makedir(tarinfo, targetpath)

File /opt/conda/lib/python3.10/tarfile.py:2222, in TarFile.makefile(self, tarinfo, targetpath)
   2220     target.truncate()
   2221 else:
-> 2222     copyfileobj(source, target, tarinfo.size, ReadError, bufsize)

File /opt/conda/lib/python3.10/tarfile.py:251, in copyfileobj(src, dst, length, exception, bufsize)
    249     if len(buf) < bufsize:
    250         raise exception("unexpected end of data")
--> 251     dst.write(buf)
    253 if remainder != 0:
    254     buf = src.read(remainder)

OSError: [Errno 28] No space left on device

I have confirmed that this is not a space issue by trying this on freshly setup sagemaker domain and using bigger machines.