alb |
Map of values passed to ALB module definition. See the ALB module for full list of supported arguments |
any |
{} |
no |
alb_https_default_action |
Default action for ALB HTTPS listener |
any |
{ "forward": { "target_group_key": "text_generation_inference" } } |
no |
alb_security_group_id |
ID of an existing security group to attach to the ALB. Required if create_alb is false |
string |
"" |
no |
alb_subnets |
A list of subnets in which the ALB will be deployed. Required if create_alb is true |
list(string) |
[] |
no |
alb_target_group_arn |
ARN of an existing ALB target group that will be used to route traffic to the Text Generation Inference service. Required if create_alb is false |
string |
"" |
no |
autoscaling |
Map of values passed to ECS autoscaling module definition. See the ECS autoscaling module for full list of arguments supported |
any |
{} |
no |
availability_zones |
The availability_zones to create objects in. |
list(string) |
n/a |
yes |
certificate_arn |
ARN of an existing ACM certificate to use with the ALB. If not provided, a new certificate will be created. Required if create_alb is true and create_certificate is false |
string |
"" |
no |
certificate_domain_name |
Route53 domain name to use for ACM certificate. Route53 zone for this domain should be created in advance. |
string |
"" |
no |
cluster |
Map of values passed to ECS cluster module definition. See the ECS cluster module for full list of supported arguments |
any |
{} |
no |
cluster_agent_log_level |
Log level for ECS cluster agent |
string |
"info" |
no |
cluster_arn |
ARN of the ECS cluster in which the service will be deployed. Required if create_cluster is false |
string |
null |
no |
create_alb |
Determines whether to create an Application Load Balancer for the ECS service |
bool |
true |
no |
create_certificate |
Determines whether to create an ACM certificate for the ALB |
bool |
true |
no |
create_cluster |
Determines whether to create an ECS cluster for the service |
bool |
true |
no |
create_route53_records |
Determines whether to create Route53 records for the ALB |
bool |
true |
no |
create_ui |
Whether you want to create the open webui |
bool |
true |
no |
dtype |
Data type to use for model weights. The default is to use the default data type for the model. This is not compatible with quantization |
string |
null |
no |
efs |
Map of values passed to EFS module definition. See the EFS module for full list of arguments supported |
any |
{} |
no |
enable_efs |
Determines whether to create an EFS volume for the ECS service, this is used for model storage if enabled |
bool |
false |
no |
hugging_face_hub_token |
Hugging Face Hub API token |
string |
"" |
no |
init_nginx |
Map of values passed to nginx init container definition. See the ECS container definition module for full list of arguments supported |
any |
{} |
no |
instance_type |
Instance type to use for ECS autoscaling group, note that currently only g4dn.* instance types are supported. When using > 7B models, be sure to select an instance type that has at least 20GB of RAM for initial loading unless using a non-default QUANTIZE setting which reduces RAM usage |
string |
"g4dn.xlarge" |
no |
model_name |
The name of the model to load. Can be a MODEL_ID as listed on https://hf.co/models like gpt2 or OpenAssistant/oasst-sft-1-pythia-12b . Or it can be a local directory containing the necessary files as saved by save_pretrained(...) methods of transformers |
string |
"teknium/OpenHermes-2.5-Mistral-7B" |
no |
name |
Common name to use on all resources |
string |
"text_generation_inference" |
no |
nginx |
Map of values passed to nginx container definition. See the ECS container definition module for full list of arguments supported |
any |
{} |
no |
quantize |
Quantize the model to reduce memory usage. This can be useful for large models that don't fit in memory. The default is to not quantize the model |
string |
null |
no |
route53_record_name |
Name of Route53 record to create ACM certificate in and main A-record. If not specified var.name will be used. Required if create_route53_records is true |
string |
null |
no |
route53_zone_id |
ID of the Route53 zone in which to create records. Required if create_route53_records is true |
string |
"" |
no |
route53_zone_name |
Name of the Route53 zone in which to create records. Required if create_route53_records is true |
string |
"" |
no |
service |
Map of values passed to ECS service module definition. See the ECS service module for full list of arguments supported |
any |
{} |
no |
service_subnets |
A list of subnets in which the ECS service for will be deployed |
list(string) |
n/a |
yes |
tags |
A map of tags to add to all resources |
map(string) |
{} |
no |
text_generation_inference |
Configuration for the text generation inference |
object({ mount_points = optional(list(any), []) command = optional(list(string), []) cpu = optional(number, null) dependencies = optional(list(map(string)), []) # depends_on is a reserved word disable_networking = optional(bool, null) dns_search_domains = optional(list(string), []) dns_servers = optional(list(string), []) docker_labels = optional(map(string), {}) docker_security_options = optional(list(string), []) enable_execute_command = optional(bool, false) entrypoint = optional(list(string), []) environment = optional(list(object({ name = string, value = string })), []) environment_files = optional(list(object({ value = string, type = string })), []) essential = optional(bool, true) extra_hosts = optional(list(object({ hostname = string, ipAddress = string })), []) firelens_configuration = optional(map(string), {}) health_check = optional(map(string), {}) hostname = optional(string, null) image_repo = optional(string, "ghcr.io/huggingface/text-generation-inference") image_version = optional(string, "latest") interactive = optional(bool, false) links = optional(list(string), []) linux_parameters = optional(any, {}) log_configuration = optional(map(string), {}) memory = optional(number, null) memory_reservation = optional(number, null) privileged = optional(bool, false) pseudo_terminal = optional(bool, false) readonly_root_filesystem = optional(bool, false) repository_credentials = optional(map(string), {}) resource_requirements = optional(list(object({ type = string, value = string })), []) secrets = optional(list(object({ name = string, valueFrom = string })), []) start_timeout = optional(number, 30) stop_timeout = optional(number, 120) system_controls = optional(list(map(string)), []) ulimits = optional(list(map(string)), []) user = optional(string, null) volumes_from = optional(list(map(string)), []) working_directory = optional(string, null) enable_cloudwatch_logging = optional(bool, true) create_cloudwatch_log_group = optional(bool, true) cloudwatch_log_group_use_name_prefix = optional(bool, true) cloudwatch_log_group_retention_in_days = optional(number, 14) cloudwatch_log_group_kms_key_id = optional(string, null) port = optional(number, 11434) }) |
n/a |
yes |
text_generation_inference_discovery_name |
Name of text-generation-inference used for service discovery |
string |
"" |
no |
text_generation_inference_discovery_namespace |
Namespace of text-generation-inference used for service discovery |
string |
"" |
no |
text_generation_inference_gid |
GID of the text_generation_inference user |
number |
1000 |
no |
text_generation_inference_uid |
UID of the text_generation_inference user |
number |
100 |
no |
use_spot_instances |
Determines whether to use spot instances for the ECS autoscaling group |
bool |
false |
no |
use_ssl_ui |
Create domain + certs for ssl connection to the UI. |
bool |
true |
no |
validate_certificate |
Determines whether to validate ACM certificate using Route53 DNS. If false, certificate will be created but not validated |
bool |
true |
no |
vpc_id |
ID of the VPC in which the resources will be created |
string |
n/a |
yes |