Failed Healthcheck
moos3 opened this issue · 3 comments
So using the latest version of the module in the following:
// Client internal client load balancer IP address
resource "google_compute_address" "es_client_ilb" {
name = "${var.cluster_name}-client-ilb"
address_type = "INTERNAL"
subnetwork = data.google_compute_subnetwork.default.self_link
project = var.project
}
// Client internal load balancer
module "es_client_ilb" {
source = "GoogleCloudPlatform/lb-internal/google"
version = "~> 2.0"
project = var.project
region = var.region
name = "${var.cluster_name}-client-ilb"
ip_address = google_compute_address.es_client_ilb.address
ports = ["9200", "9300"]
health_check = {
type = "http"
check_interval_sec = 1
healthy_threshold = 4
timeout_sec = 1
unhealthy_threshold = 5
proxy_header = "NONE"
port = 9200
port_name = "health-check-port"
request_path = "/"
}
source_tags = ["${var.cluster_name}-kibana", "${var.cluster_name}-external"]
target_tags = ["${var.cluster_name}-client"]
network = "default"
subnetwork = "default"
backends = [
{
group = module.es_client.instance_group
description = "elasticsearch-clients"
},
]
}
When I go into the console it's showing that no healthly nodes. My MIG looks like this
module "es_client" {
source = "../../../modules/terraform-elasticsearch"
cluster_name = var.cluster_name
name = "${var.cluster_name}-client"
region = var.region
zones = var.zones
num_nodes = var.client_num_nodes
machine_type = var.client_machine_type
heap_size = var.client_heap_size
masters_count = format("%d", floor(var.master_num_nodes / 2 + 1))
master_node = false
data_node = false
access_config = []
network = data.google_compute_network.default.self_link
subnetwork = data.google_compute_subnetwork.default.self_link
subnetwork_project = var.subnetwork_project_id
project = var.project
node_tags = [var.cluster_name]
hostname = "es-client"
source_image_family = "debian-9"
source_image_project = "debian-cloud"
node_labels = {
environment = "staging"
department = "engineering"
application = "elasticsearch"
terraform_created = "true"
}
named_ports = local.named_ports
service_account = {
email = "1048047287745-compute@developer.gserviceaccount.com"
scopes = ["https://www.googleapis.com/auth/cloud-platform"]
}
}
Which is calling in the terraform-elasticsearch module.
data "template_file" "node-startup-script" {
template = file("${path.module}/config/user_data.sh")
vars = {
project_id = var.project
zones = join(",", var.zones)
elasticsearch_data_dir = var.elasticsearch_data_dir
elasticsearch_logs_dir = var.elasticsearch_logs_dir
heap_size = var.heap_size
cluster_name = var.cluster_name
minimum_master_nodes = var.masters_count
master = var.master_node ? "true" : "false"
data = var.data_node ? "true" : "false"
ingest = var.ingest_node ? "true" : "false"
http_enabled = var.http_enabled ? "true" : "false"
security_enabled = var.security_enabled ? "true" : "false"
monitoring_enabled = var.monitoring_enabled ? "true" : "false"
}
}
module "instance_template" {
source = "terraform-google-modules/vm/google//modules/instance_template"
version = "1.3.0"
project_id = var.project
machine_type = var.machine_type
tags = var.node_tags
labels = var.node_labels
startup_script = data.template_file.node-startup-script.rendered
/* network */
network = var.network
subnetwork = var.subnetwork
subnetwork_project = var.subnetwork_project
can_ip_forward = var.can_ip_forward
/* image */
source_image = var.source_image
source_image_family = var.source_image_family
source_image_project = var.source_image_project
/* disks */
disk_size_gb = var.disk_size_gb
disk_type = var.disk_type
auto_delete = var.auto_delete
additional_disks = var.additional_disks
service_account = var.service_account
}
module "node" {
source = "terraform-google-modules/vm/google//modules/mig"
version = "1.3.0"
project_id = var.project
network = var.network
/* subnetwork = var.subnetwork
subnetwork_project = var.subnetwork_project */
hostname = var.hostname
region = var.region
instance_template = module.instance_template.self_link
target_size = var.num_nodes
target_pools = var.target_pools
distribution_policy_zones = var.distribution_policy_zones
update_policy = var.update_policy
named_ports = var.named_ports
min_replicas = var.num_nodes
}
data "google_compute_region_instance_group" "default" {
self_link = module.node.self_link
}
Cluster firewall rule is as such
// Cluster firewall
resource "google_compute_firewall" "cluster" {
name = var.cluster_name
network = data.google_compute_network.default.self_link
project = var.project
allow {
protocol = "tcp"
ports = ["9200", "9300"]
}
source_tags = [var.cluster_name, "${var.cluster_name}-external",var.k8s_cluster_tag,"elasticsearch"]
target_tags = [var.cluster_name, "elasticsearch"]
}
I'm at a lost on why this wouldn't work. I have confirmed that the nodes are recieving traffic if call them by IP address from other nodes in my project.
This makes me wonder if my firewall rule isn't correct or my MIG needs its own health check.
Transferring this to the lb-internal repo since it looks like that's what you're using.
In the console, can you check if the health check looks to be correctly configured?
Looking at your config in more detail, I think I see a potential issue with your firewall config:
In the LB config, you specify network tags as:
module "es_client_ilb" {
source = "GoogleCloudPlatform/lb-internal/google"
...
target_tags = ["${var.cluster_name}-client"]
}
But in the MIG config, they're set as:
module "es_client" {
source = "../../../modules/terraform-elasticsearch"
...
node_tags = [var.cluster_name]
}