Error querying SageMaker endpoint
lshang0311 opened this issue · 2 comments
Link to the notebook
[Retrieval-Augmented Generation: Question Answering based on Custom Dataset with Open-sourced LangChain Library](https://sagemaker-examples.readthedocs.io/en/latest/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.html)
Describe the bug
Query the endpoint
payload = {
"text_inputs": question,
"max_length": 100,
"num_return_sequences": 1,
"top_k": 50,
"top_p": 0.95,
"do_sample": True,
}
list_of_LLMs = list(_MODEL_CONFIG_.keys())
list_of_LLMs.remove("huggingface-textembedding-gpt-j-6b") # remove the embedding model
for model_id in list_of_LLMs:
endpoint_name = _MODEL_CONFIG_[model_id]["endpoint_name"]
query_response = query_endpoint_with_json_payload(
json.dumps(payload).encode("utf-8"), endpoint_name=endpoint_name
)
generated_texts = _MODEL_CONFIG_[model_id]["parse_function"](query_response)
print(f"For model: {model_id}, the generated output is: {generated_texts[0]}\n")
Gives the following error:
ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
"code": 400,
"type": "InternalServerException",
"message": "model_fn() takes 1 positional argument but 2 were given"
}
To reproduce
Dependencies:
!pip install sagemaker==2.181 !pip install ipywidgets==7.0.0 --quiet !pip install langchain==0.0.148 --quiet !pip install faiss-cpu --quiet
Logs
Hi @lshang0311 I have changed deployment to Jumpstart approach please find below my code:
from sagemaker import Session
from sagemaker.utils import name_from_base
from sagemaker.jumpstart.model import JumpStartModel
sagemaker_session = Session()
_MODEL_CONFIG_ = {
"huggingface-text2text-flan-t5-xxl": {
"model_version": "2.*",
"instance type": "ml.g5.12xlarge"
},
# Got error: "DeprecatedJumpStartModelError:
# This model is no longer available. Please try another model."
# for GPT-J Emedings models at the time of testing it
# "huggingface-textembedding-gpt-j-6b": {
# "model_version": "1.*",
# "instance type": "ml.g5.24xlarge"
# },
"huggingface-textembedding-all-MiniLM-L6-v2": {
"model_version": "1.*",
"instance type": "ml.g5.24xlarge"
}
# "huggingface-textembedding-all-MiniLM-L6-v2": {
# "model_version": "3.*",
# "instance type": "ml.g5.12xlarge"
# },
# "huggingface-text2text-flan-ul2-bf16": {
# "model_version": "2.*",
# "instance type": "ml.g5.24xlarge"
# }
}
for model_id in _MODEL_CONFIG_:
endpoint_name = name_from_base(f'jumpstart-example-raglc-{model_id}')
inference_instance_type = _MODEL_CONFIG_[model_id]['instance type']
model_version = _MODEL_CONFIG_[model_id]['model_version']
print(f'Deploying {model_id}...')
model = JumpStartModel(
model_id=model_id,
model_version=model_version
)
try:
predictor = model.deploy(
initial_instance_count=1,
instance_type=inference_instance_type,
endpoint_name=name_from_base(
f"jumpstart-example-raglc-{model_id}"
)
)
print(f"Deployed endpoint: {predictor.endpoint_name}")
_MODEL_CONFIG_[model_id]['predictor'] = predictor
except Exception as e:
print(f"Error deploying {model_id}: {str(e)}")
print("Deployment process completed.")
question = "Which instances can I use with Managed Spot Training in SageMaker?"`
list_of_LLMs = list(_MODEL_CONFIG_.keys())
list_of_LLMs = [model for model in list_of_LLMs if "textembedding" not in model]
for model_id in list_of_LLMs:
predictor = _MODEL_CONFIG_[model_id]["predictor"]
response = predictor.predict({
"inputs": question
})
print(f"For model: {model_id}, the generated output is:\n")
print(f"{response[0]['generated_text']}\n")
I'm happy to create Pull request to improve notebook, can some one more experienced check if this is ok?