Separate model and inference script tar.gz not working.
siddharthsahu opened this issue · 0 comments
In reference to: https://github.com/huggingface/notebooks/blob/main/sagemaker/17_custom_inference_script/sagemaker-notebook.ipynb
For HF diffusers library we need to upload custom inference script. The issue with bundling model and code together is that it takes a lot of time to create and push model.tar.zip everytime you need to change code.
When I am zipping my model and code together(like below), things work fine.
huggingface_model = HuggingFaceModel(
model_data="s3://abc/xyz/model.tar.gz", # s3 path having both model and code
role=role, # iam role with permissions to create an Endpoint
transformers_version="4.17", # transformers version used
pytorch_version="1.10", # pytorch version used
py_version='py38', # python version used
)
On un-zipping, folder structure will be like below:
model.tar.gz
|
- stable-diffusion
|
- controlnet
|
- code
As mentioned before, this works fine.
Since creating zip of code and model together means that zipping and uploading time is very huge. So I want to de-couple model and code and tried below.
huggingface_model = HuggingFaceModel(
model_data="s3://abc/xyz/model.tar.gz", # path to your model
source_dir="s3://abc/xyz/sourcedir.tar.gz", # path to you script
entry_point="inference.py",
role=role, # iam role with permissions to create an Endpoint
transformers_version="4.17", # transformers version used
pytorch_version="1.10", # pytorch version used
py_version='py38', # python version used
)
Here on unzipping, model.tar.gz
will give you stable-diffusion
and controlnet
. And unzipping sourcedir.tar.gz
will give you inference.py
and requirements.txt
.
This is giving following error:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Cell In[13], line 16
5 huggingface_model = HuggingFaceModel(
6 model_data=s3_model_uri, # path to your model
7 source_dir=s3_code_uri, # parth to you script
(...)
12 py_version='py38', # python version used
13 )
15 # deploy the endpoint endpoint
---> 16 predictor = huggingface_model.deploy(
17 initial_instance_count=1,
18 instance_type="ml.g4dn.xlarge"
19 )
File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:313, in HuggingFaceModel.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
306 inference_tool = "neuron" if instance_type.startswith("ml.inf1") else "neuronx"
307 self.image_uri = self.serving_image_uri(
308 region_name=self.sagemaker_session.boto_session.region_name,
309 instance_type=instance_type,
310 inference_tool=inference_tool,
311 )
--> 313 return super(HuggingFaceModel, self).deploy(
314 initial_instance_count,
315 instance_type,
316 serializer,
317 deserializer,
318 accelerator_type,
319 endpoint_name,
320 tags,
321 kms_key,
322 wait,
323 data_capture_config,
324 async_inference_config,
325 serverless_inference_config,
326 volume_size=volume_size,
327 model_data_download_timeout=model_data_download_timeout,
328 container_startup_health_check_timeout=container_startup_health_check_timeout,
329 inference_recommendation_id=inference_recommendation_id,
330 explainer_config=explainer_config,
331 )
File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:1406, in Model.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
1403 if self._base_name is not None:
1404 self._base_name = "-".join((self._base_name, compiled_model_suffix))
-> 1406 self._create_sagemaker_model(
1407 instance_type=instance_type,
1408 accelerator_type=accelerator_type,
1409 tags=tags,
1410 serverless_inference_config=serverless_inference_config,
1411 )
1413 serverless_inference_config_dict = (
1414 serverless_inference_config._to_request_dict() if is_serverless else None
1415 )
1416 production_variant = sagemaker.production_variant(
1417 self.name,
1418 instance_type,
(...)
1424 container_startup_health_check_timeout=container_startup_health_check_timeout,
1425 )
File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:794, in Model._create_sagemaker_model(self, instance_type, accelerator_type, tags, serverless_inference_config)
768 def _create_sagemaker_model(
769 self,
770 instance_type=None,
(...)
773 serverless_inference_config=None,
774 ):
775 """Create a SageMaker Model Entity
776
777 Args:
(...)
792 not provided in serverless inference. So this is used to find image URIs.
793 """
--> 794 container_def = self.prepare_container_def(
795 instance_type,
796 accelerator_type=accelerator_type,
797 serverless_inference_config=serverless_inference_config,
798 )
800 if not isinstance(self.sagemaker_session, PipelineSession):
801 # _base_name, model_name are not needed under PipelineSession.
802 # the model_data may be Pipeline variable
803 # which may break the _base_name generation
804 model_uri = None
File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:498, in HuggingFaceModel.prepare_container_def(self, instance_type, accelerator_type, serverless_inference_config, inference_tool)
489 deploy_image = self.serving_image_uri(
490 region_name,
491 instance_type,
(...)
494 inference_tool=inference_tool,
495 )
497 deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
--> 498 self._upload_code(deploy_key_prefix, repack=True)
499 deploy_env = dict(self.env)
500 deploy_env.update(self._script_mode_env_vars())
File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:723, in Model._upload_code(self, key_prefix, repack)
707 self.uploaded_code = fw_utils.UploadedCode(
708 s3_prefix=repacked_model_data,
709 script_name=os.path.basename(self.entry_point),
710 )
712 LOGGER.info(
713 "Repacking model artifact (%s), script artifact "
714 "(%s), and dependencies (%s) "
(...)
720 repacked_model_data,
721 )
--> 723 utils.repack_model(
724 inference_script=self.entry_point,
725 source_directory=self.source_dir,
726 dependencies=self.dependencies,
727 model_uri=self.model_data,
728 repacked_model_uri=repacked_model_data,
729 sagemaker_session=self.sagemaker_session,
730 kms_key=self.model_kms_key,
731 )
733 self.repacked_model_data = repacked_model_data
File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:517, in repack_model(inference_script, source_directory, dependencies, model_uri, repacked_model_uri, sagemaker_session, kms_key)
510 local_download_dir = (
511 None
512 if sagemaker_session.settings is None
513 or sagemaker_session.settings.local_download_dir is None
514 else sagemaker_session.settings.local_download_dir
515 )
516 with _tmpdir(directory=local_download_dir) as tmp:
--> 517 model_dir = _extract_model(model_uri, sagemaker_session, tmp)
519 _create_or_update_code_dir(
520 model_dir,
521 inference_script,
(...)
525 tmp,
526 )
528 tmp_model_path = os.path.join(tmp, "temp-model.tar.gz")
File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:607, in _extract_model(model_uri, sagemaker_session, tmp)
605 local_model_path = model_uri.replace("file://", "")
606 with tarfile.open(name=local_model_path, mode="r:gz") as t:
--> 607 t.extractall(path=tmp_model_dir)
608 return tmp_model_dir
File /opt/conda/lib/python3.10/tarfile.py:2059, in TarFile.extractall(self, path, members, numeric_owner)
2057 tarinfo.mode = 0o700
2058 # Do not set_attrs directories, as we will do that further down
-> 2059 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2060 numeric_owner=numeric_owner)
2062 # Reverse sort directories.
2063 directories.sort(key=lambda a: a.name)
File /opt/conda/lib/python3.10/tarfile.py:2100, in TarFile.extract(self, member, path, set_attrs, numeric_owner)
2097 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2099 try:
-> 2100 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2101 set_attrs=set_attrs,
2102 numeric_owner=numeric_owner)
2103 except OSError as e:
2104 if self.errorlevel > 0:
File /opt/conda/lib/python3.10/tarfile.py:2173, in TarFile._extract_member(self, tarinfo, targetpath, set_attrs, numeric_owner)
2170 self._dbg(1, tarinfo.name)
2172 if tarinfo.isreg():
-> 2173 self.makefile(tarinfo, targetpath)
2174 elif tarinfo.isdir():
2175 self.makedir(tarinfo, targetpath)
File /opt/conda/lib/python3.10/tarfile.py:2222, in TarFile.makefile(self, tarinfo, targetpath)
2220 target.truncate()
2221 else:
-> 2222 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
File /opt/conda/lib/python3.10/tarfile.py:251, in copyfileobj(src, dst, length, exception, bufsize)
249 if len(buf) < bufsize:
250 raise exception("unexpected end of data")
--> 251 dst.write(buf)
253 if remainder != 0:
254 buf = src.read(remainder)
OSError: [Errno 28] No space left on device
I have confirmed that this is not a space issue by trying this on freshly setup sagemaker domain and using bigger machines.