Broken ManagedOnlineDeployment class
fschlz opened this issue · 5 comments
- Package Name: azure-ai-ml
- Package Version: 1.15.0
- Operating System: macOS Sonoma
- Python Version: 3.10.13
Describe the bug
I am trying to create a managed endpoint and deployment.
I am able to create the endpoint, but there seems to be an issue with the parent class of the ManagedOnlineDeployment
as I get a type error even when following the examples here: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-online-endpoints?view=azureml-api-2&tabs=python#configure-a-deployment-1
I cannot see where I might have deviated much from the example. lmk
To Reproduce
When I run this code
deployment = ManagedOnlineDeployment(
name="deployment-wkszv",
endpoint_name="endoint-wkszv",
model=amlhandler.get_model(AML_MODEL_NAME, AML_MODEL_VERSION), # this return a valid Model object
env=amlhandler.get_env(AML_ENV_NAME, AML_ENV_VERION), # this return a valid Environment object
instance_type="Standard_DS3_v2",
instance_count=1,
with_package=True,
)
I get the following error:
{
"name": "TypeError",
"message": "object.__init__() takes exactly one argument (the instance to initialize)",
"stack": "---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[10], line 1
----> 1 deployment = ManagedOnlineDeployment(
2 name=\"fs-aml-wkszv\",
3 endpoint_name=\"fs-aml-wkszv\",
4 model=amlhandler.get_model(AML_MODEL_NAME, AML_MODEL_VERSION),
5 env=amlhandler.get_env(AML_ENV_NAME, AML_ENV_VERION),
6 instance_type=\"Standard_DS3_v2\",
7 instance_count=1
8 )
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/entities/_deployment/online_deployment.py:732, in ManagedOnlineDeployment.__init__(self, name, endpoint_name, tags, properties, description, model, code_configuration, environment, app_insights_enabled, scale_settings, request_settings, liveness_probe, readiness_probe, environment_variables, instance_type, instance_count, egress_public_network_access, code_path, scoring_script, data_collector, **kwargs)
729 self.private_network_connection = kwargs.pop(\"private_network_connection\", None)
730 self.package_model = kwargs.pop(\"package_model\", False)
--> 732 super(ManagedOnlineDeployment, self).__init__(
733 name=name,
734 endpoint_name=endpoint_name,
735 tags=tags,
736 properties=properties,
737 description=description,
738 model=model,
739 code_configuration=code_configuration,
740 environment=environment,
741 environment_variables=environment_variables,
742 app_insights_enabled=app_insights_enabled,
743 scale_settings=scale_settings,
744 request_settings=request_settings,
745 liveness_probe=liveness_probe,
746 readiness_probe=readiness_probe,
747 instance_count=instance_count,
748 instance_type=instance_type,
749 code_path=code_path,
750 scoring_script=scoring_script,
751 data_collector=data_collector,
752 **kwargs,
753 )
755 self.readiness_probe = readiness_probe
756 self.egress_public_network_access = egress_public_network_access
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/entities/_deployment/online_deployment.py:178, in OnlineDeployment.__init__(self, name, endpoint_name, tags, properties, description, model, data_collector, code_configuration, environment, app_insights_enabled, scale_settings, request_settings, liveness_probe, readiness_probe, environment_variables, instance_count, instance_type, model_mount_path, code_path, scoring_script, **kwargs)
131 \"\"\"Online endpoint deployment entity.
132
133 Constructor for Online endpoint deployment entity
(...)
174 :paramtype scoring_script: typing.Optional[typing.Union[str, os.PathLike]]
175 \"\"\"
176 self._provisioning_state: Optional[str] = kwargs.pop(\"provisioning_state\", None)
--> 178 super(OnlineDeployment, self).__init__(
179 name=name,
180 endpoint_name=endpoint_name,
181 tags=tags,
182 properties=properties,
183 description=description,
184 model=model,
185 code_configuration=code_configuration,
186 environment=environment,
187 environment_variables=environment_variables,
188 code_path=code_path,
189 scoring_script=scoring_script,
190 **kwargs,
191 )
193 self.app_insights_enabled = app_insights_enabled
194 self.scale_settings = scale_settings
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/entities/_deployment/deployment.py:128, in Deployment.__init__(self, name, endpoint_name, description, tags, properties, model, code_configuration, environment, environment_variables, code_path, scoring_script, **kwargs)
119 msg = \"code_path and scoring_script are not allowed if code_configuration is provided.\"
120 raise ValidationException(
121 message=msg,
122 target=ErrorTarget.DEPLOYMENT,
(...)
125 error_type=ValidationErrorType.INVALID_VALUE,
126 )
--> 128 super().__init__(name, description, tags, properties, **kwargs)
130 self.model = model
131 self.code_configuration = code_configuration
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/entities/_resource.py:67, in Resource.__init__(self, name, description, tags, properties, **kwargs)
65 self._serialize = Serializer(client_models)
66 self._serialize.client_side_validation = False
---> 67 super().__init__(**kwargs)
TypeError: object.__init__() takes exactly one argument (the instance to initialize)"
}
The above is just me trying to isolate the issue. In the end, I want to handle this via the AMLHandler class I wrote (here are the relevant methods):
class AMLHandler():
...
def create_endpoint(self, endpoint_name: str) -> Any:
self.endpoint = ManagedOnlineEndpoint(name=endpoint_name)
return self.ml_client.online_endpoints.begin_create_or_update(endpoint=self.endpoint)
def create_deployment(
self,
model: Model,
instance_type: str = "Standard_DS3_v2",
instance_count: int = 2,
env: Optional[Environment] = None,
code_path: Optional[str] = None,
scoring_script: Optional[str] = None,
) -> Any:
self.deployment = ManagedOnlineDeployment(
name=self.endpoint.name,
endpoint_name=self.endpoint.name,
model=model,
environment=self.env if env is None else env,
code_configuration=(
None if scoring_script is None else CodeConfiguration(code=code_path, scoring_script=scoring_script)
),
instance_type=instance_type,
instance_count=instance_count,
)
return self.ml_client.online_deployments.begin_create_or_update(deployment=self.deployment)
def deploy_endpoint(
self,
model: Model,
instance_type: str = "Standard_DS3_v2",
instance_count: int = 2,
env: Optional[Environment] = None,
code_path: Optional[str] = None,
scoring_script: Optional[str] = None,
) -> bool:
"""this method creates an endpoint, waits until it's created on the Azure backend, then creates the deployment
"""
# endpoint
logger.info("creating the endpoint")
endpoint_name = "-".join([self.workspace, "".join(random_choices(string.ascii_letters, k=5))])
self.endpoint_poller = self.create_endpoint(endpoint_name)
endpoint_created = False
while endpoint_created is False:
logger.info("still creating ...")
sleep(3)
endpoint_created = self.endpoint_poller.done()
# deployment
logger.info("creating the deployment")
self.deployment_poller = self.create_deployment(
model, instance_type, instance_count, env, code_path, scoring_script
)
deployment_created = False
while deployment_created is False:
logger.info("still creating ...")
sleep(3)
deployment_created = self.deployment_poller.done()
BUT, calling the deploy_endpoint()
method gives me another non-descript error.
This error goes away when I pass aCodeConfiguration
into the method. However, the function definiton tells me it's okay for it to be None
, and when deploying in the UI it's also not necessary to provide a scoring script. Therefore, I'd assume it should work either way.
Plus, when I use the method with the CodeConfiguration
, the deployment eventually fails on the backend anyway, because I am missing the azureml-inference-server-http
package. And to install that, I will have to downgrade from mlflow 2.X to mlflow 1.X ...
{
"name": "HttpResponseError",
"message": "(BadRequest) The request is invalid.
Code: BadRequest
Message: The request is invalid.
Exception Details:\t(MissingDriverProgram) Could not find driver program in the request.
\tCode: MissingDriverProgram
\tMessage: Could not find driver program in the request.
Additional Information:Type: ComponentName
Info: {
\"value\": \"managementfrontend\"
}Type: Correlation
Info: {
\"value\": {
\"operation\": \"6fc66668a41384c6a488a57733728914\",
\"request\": \"df2c34ebcb1bccb3\"
}
}Type: Environment
Info: {
\"value\": \"westeurope\"
}Type: Location
Info: {
\"value\": \"westeurope\"
}Type: Time
Info: {
\"value\": \"2024-04-26T20:00:35.6369301+00:00\"
}",
"stack": "---------------------------------------------------------------------------
HttpResponseError Traceback (most recent call last)
Cell In[5], line 1
----> 1 amlhandler.deploy_endpoint(
2 model=amlhandler.get_model(AML_MODEL_NAME, AML_MODEL_VERSION),
3 env=amlhandler.get_env(AML_ENV_NAME, AML_ENV_VERSION),
4 instance_count=1,
5 )
File ~/dev/amlhandler/src/amlhandler/azure_ml_handler.py:508, in AMLHandler.deploy_endpoint(self, model, instance_type, instance_count, env, code_path, scoring_script)
506 # deployment
507 logger.info(\"creating the deployment\")
--> 508 self.deployment_poller = self.create_deployment(
509 model, instance_type, instance_count, env, code_path, scoring_script
510 )
512 deployment_created = False
513 while deployment_created is False:
File ~/dev/amlhandler/src/amlhandler/azure_ml_handler.py:472, in AMLHandler.create_deployment(self, model, instance_type, instance_count, env, code_path, scoring_script)
451 def create_deployment(
452 self,
453 model: Model,
(...)
458 scoring_script: Optional[str] = None,
459 ) -> Any:
461 self.deployment = ManagedOnlineDeployment(
462 name=self.endpoint.name,
463 endpoint_name=self.endpoint.name,
(...)
470 instance_count=instance_count,
471 )
--> 472 return self.ml_client.online_deployments.begin_create_or_update(deployment=self.deployment)
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78, in distributed_trace.<locals>.decorator.<locals>.wrapper_use_tracer(*args, **kwargs)
76 span_impl_type = settings.tracing_implementation()
77 if span_impl_type is None:
---> 78 return func(*args, **kwargs)
80 # Merge span is parameter is set, but only if no explicit parent are passed
81 if merge_span and not passed_in_parent:
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/_telemetry/activity.py:285, in monitor_with_activity.<locals>.monitor.<locals>.wrapper(*args, **kwargs)
281 with tracer.span():
282 with log_activity(
283 logger.package_logger, activity_name or f.__name__, activity_type, custom_dimensions
284 ):
--> 285 return f(*args, **kwargs)
286 elif hasattr(logger, \"package_logger\"):
287 with log_activity(logger.package_logger, activity_name or f.__name__, activity_type, custom_dimensions):
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/operations/_online_deployment_operations.py:216, in OnlineDeploymentOperations.begin_create_or_update(self, deployment, local, vscode_debug, skip_script_validation, local_enable_gpu, **kwargs)
214 log_and_raise_error(ex)
215 else:
--> 216 raise ex
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/operations/_online_deployment_operations.py:211, in OnlineDeploymentOperations.begin_create_or_update(self, deployment, local, vscode_debug, skip_script_validation, local_enable_gpu, **kwargs)
209 return poller
210 except Exception as ex:
--> 211 raise ex
212 except Exception as ex: # pylint: disable=broad-except
213 if isinstance(ex, (ValidationException, SchemaValidationError)):
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/operations/_online_deployment_operations.py:194, in OnlineDeploymentOperations.begin_create_or_update(self, deployment, local, vscode_debug, skip_script_validation, local_enable_gpu, **kwargs)
190 module_logger.info(\"\
Starting deployment\")
192 deployment_rest = deployment._to_rest_object(location=location) # type: ignore
--> 194 poller = self._online_deployment.begin_create_or_update(
195 resource_group_name=self._resource_group_name,
196 workspace_name=self._workspace_name,
197 endpoint_name=deployment.endpoint_name,
198 deployment_name=deployment.name,
199 body=deployment_rest,
200 polling=AzureMLPolling(
201 LROConfigurations.POLL_INTERVAL,
202 path_format_arguments=path_format_arguments,
203 **self._init_kwargs,
204 ),
205 polling_interval=LROConfigurations.POLL_INTERVAL,
206 **self._init_kwargs,
207 cls=lambda response, deserialized, headers: OnlineDeployment._from_rest_object(deserialized),
208 )
209 return poller
210 except Exception as ex:
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78, in distributed_trace.<locals>.decorator.<locals>.wrapper_use_tracer(*args, **kwargs)
76 span_impl_type = settings.tracing_implementation()
77 if span_impl_type is None:
---> 78 return func(*args, **kwargs)
80 # Merge span is parameter is set, but only if no explicit parent are passed
81 if merge_span and not passed_in_parent:
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/_restclient/v2023_04_01_preview/operations/_online_deployments_operations.py:933, in OnlineDeploymentsOperations.begin_create_or_update(self, resource_group_name, workspace_name, endpoint_name, deployment_name, body, **kwargs)
931 cont_token = kwargs.pop('continuation_token', None) # type: Optional[str]
932 if cont_token is None:
--> 933 raw_result = self._create_or_update_initial(
934 resource_group_name=resource_group_name,
935 workspace_name=workspace_name,
936 endpoint_name=endpoint_name,
937 deployment_name=deployment_name,
938 body=body,
939 api_version=api_version,
940 content_type=content_type,
941 cls=lambda x,y,z: x,
942 **kwargs
943 )
944 kwargs.pop('error_map', None)
946 def get_long_running_output(pipeline_response):
File ~/dev/amlhandler/.venv/lib/python3.10/site-packages/azure/ai/ml/_restclient/v2023_04_01_preview/operations/_online_deployments_operations.py:864, in OnlineDeploymentsOperations._create_or_update_initial(self, resource_group_name, workspace_name, endpoint_name, deployment_name, body, **kwargs)
862 if response.status_code not in [200, 201]:
863 map_error(status_code=response.status_code, response=response, error_map=error_map)
--> 864 raise HttpResponseError(response=response, error_format=ARMErrorFormat)
866 response_headers = {}
867 if response.status_code == 200:
HttpResponseError: (BadRequest) The request is invalid.
Code: BadRequest
Message: The request is invalid.
Exception Details:\t(MissingDriverProgram) Could not find driver program in the request.
\tCode: MissingDriverProgram
\tMessage: Could not find driver program in the request.
Additional Information:Type: ComponentName
Info: {
\"value\": \"managementfrontend\"
}Type: Correlation
Info: {
\"value\": {
\"operation\": \"6fc66668a41384c6a488a57733728914\",
\"request\": \"df2c34ebcb1bccb3\"
}
}Type: Environment
Info: {
\"value\": \"westeurope\"
}Type: Location
Info: {
\"value\": \"westeurope\"
}Type: Time
Info: {
\"value\": \"2024-04-26T20:00:35.6369301+00:00\"
}"
}
Expected behavior
It would be nice if the examples in the docs would be correct and simply work.
There seem to be many ways to achieve any type of outcome in AzureML. For me, this seems to create more issues that if there were only one or two ways. But perhaps I am just using it wrong.
Screenshots
If applicable, add screenshots to help explain your problem.
Additional context
Add any other context about the problem here.
Same issues with the CLI btw, when trying to deploy with the YAML configs
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
name: test-1234
auth_mode: key
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: test-1234
endpoint_name: test-1234
model: azureml:<MODEL_NAME>:<MODEL_VERSION>
environment: azureml:<ENV_NAME>:<ENV_VERSION>
instance_type: Standard_DS3_v2
instance_count: 1
az ml online-deployment create --subscription xxx --resource-group xxx --workspace-name xxx --file "./aml-mo
del-deployment.yml"
Check: endpoint test-1234 exists
(BadRequest) The request is invalid.
Code: BadRequest
Message: The request is invalid.
Exception Details: (MissingDriverProgram) Could not find driver program in the request.
Code: MissingDriverProgram
Message: Could not find driver program in the request.
Additional Information:Type: ComponentName
Info: {
"value": "managementfrontend"
}Type: Correlation
Info: {
"value": {
"operation": "87e29e5d5c6a3e930c42430e734c3c59",
"request": "6fedce6468d52574"
}
}Type: Environment
Info: {
"value": "westeurope"
}Type: Location
Info: {
"value": "westeurope"
}Type: Time
Info: {
"value": "2024-04-26T21:08:30.7893691+00:00"
}
There's perhaps something wrong with the Dockerfile I used to create the training environment, so I'll add that too.
FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04:latest
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=true
ENV PYTHONUNBUFFERED=true
ENV PIP_CACHE_DIR=.cache/pip
RUN apt-get update --fix-missing && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt-get update && \
apt-get install -y python3.10 python3.10-distutils python3.10-venv && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python3 get-pip.py && \
rm get-pip.py && \
apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install -r requirements.txt && rm requirements.txt
EXPOSE 8080
Hi @fschlz, thank you for opening an issue! I'll get this routed to the ML team so they can take a look as soon as possible. cc @Azure/azure-ml-sdk @azureml-github.
Thanks for the feedback! We are routing this to the appropriate team for follow-up. cc @Azure/azure-ml-sdk @azureml-github.
I'm having the exact same issue. It does not matter if I remove the supposedly optional parameters like code_path, I still get this error.
I have tried different ways such as, adding a code configuration Class to the code_configuration parameter without including the other params, and still get an error.
I also tried just including the scoring_script path in the scoring_script without the other params and got the same error. It seems there is an issue in the Class itself?
File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/azure/ai/ml/entities/_deployment/deployment.py:124, in Deployment.init(self, name, endpoint_name, description, tags, properties, model, code_configuration, environment, environment_variables, code_path, scoring_script, **kwargs)
115 msg = "code_path and scoring_script are not allowed if code_configuration is provided."
116 raise ValidationException(
117 message=msg,
118 target=ErrorTarget.DEPLOYMENT,
(...)
121 error_type=ValidationErrorType.INVALID_VALUE,
122 )
--> 124 super().init(name, description, tags, properties, **kwargs)
126 self.model = model
127 self.code_configuration = code_configuration
File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/azure/ai/ml/entities/_resource.py:81, in Resource.init(self, name, description, tags, properties, **kwargs)
79 self._serialize = Serializer(client_models)
80 self._serialize.client_side_validation = False
---> 81 super().init(**kwargs)
TypeError: object.init() takes exactly one argument (the instance to initialize)
+1 for this error.
In my case, I only get it if I specify the "environment" field.
(It seems to me that I need to specify it because I am using a custom python package hosted privately, but I might be wrong as I am new to Azure ML).