EMQX k8s replicant can't connect to Core node deployed on VM
vflorescu1 opened this issue · 17 comments
Describe the bug
Trying to create a hybrid architecture using core nodes deployed on CentOS VMs and replicant nodes deployed in a k8s cluster.
The VMs register successfully into the emqx cluster but when trying to register a replicant node deployed via the emqx-operator it fails.
Got all the ports mapped on the listeners service as well.
Checked the network connectivity and the pod is able to communicate with the VM via TCP.
Also, for a few seconds the pod is visible when doing emqx_ctl cluster status but then it gets immediately evicted.
emqx_conf on the Core node:
node {
name = "emqx@core-node-name"
cookie = "emqx"
data_dir = "/var/lib/emqx"
etc_dir = "/etc"
db_role = core
}
cluster {
name = en1-qa-emqx
discovery_strategy = static
core_nodes = ["emqx@core-node-name"]
autoheal = true
static {
seeds =["emqx@core-node-name","emqx@pod_ip"]
}
}
dashboard {
listeners.http {
bind = 18083
}
default_username = admin
default_password = public
}
listeners.tcp.default {
bind = "0.0.0.0:1883"
max_connections = 1024000
}
emqx_conf on the replicant node (modified via boostrapConfig):
node {
name = "emqx@pod_ip"
cookie = "emqx"
data_dir = "/data"
etc_dir = "/etc"
db_role = replicant
}
cluster {
name = en1-qa-emqx
discovery_strategy = static
core_nodes = ["emqx@core-node-name"]
autoheal = true
static {
seeds =["emqx@core-node-name","emqx@pod_ip"]
}
}
dashboard {
listeners.http {
bind = 18083
}
default_username = admin
default_password = public
}
listeners.tcp.default {
bind = "0.0.0.0:1883"
max_connections = 1024000
}
To Reproduce
Deploy a core node by installing EMQX on a VM.
Deploy a replicant node along with the dashboard and listeners services in k8s using emqx-operator (apply the below config).
Check the logs of the pod; Check on the VM the cluster status.
apiVersion: apps.emqx.io/v2alpha1
kind: EMQX
metadata:
generation: 4
labels:
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
k8slens-edit-resource-version: v2alpha1
managedFields:
- apiVersion: apps.emqx.io/v2alpha1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
f:apps.emqx.io/last-applied: {}
f:deployment.kubernetes.io/revision: {}
f:status:
.: {}
f:conditions: {}
f:currentImage: {}
f:replicantNodeReplicas: {}
manager: manager
operation: Update
subresource: status
- apiVersion: apps.emqx.io/v2alpha1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
.: {}
f:external-dns.alpha.kubernetes.io/hostname: {}
f:kubectl.kubernetes.io/last-applied-configuration: {}
f:service.citrix.com/ipam-range: {}
f:labels:
.: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:k8slens-edit-resource-version: {}
f:spec:
.: {}
f:bootstrapConfig: {}
f:coreTemplate:
.: {}
f:metadata:
.: {}
f:labels:
.: {}
f:apps.emqx.io/db-role: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:livenessProbe:
.: {}
f:failureThreshold: {}
f:httpGet:
.: {}
f:path: {}
f:port: {}
f:scheme: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:successThreshold: {}
f:timeoutSeconds: {}
f:readinessProbe:
.: {}
f:failureThreshold: {}
f:httpGet:
.: {}
f:path: {}
f:port: {}
f:scheme: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:successThreshold: {}
f:timeoutSeconds: {}
f:replicas: {}
f:resources:
.: {}
f:requests:
.: {}
f:cpu: {}
f:ephemeral-storage: {}
f:memory: {}
f:volumeClaimTemplates:
.: {}
f:resources: {}
f:dashboardServiceTemplate:
.: {}
f:metadata:
.: {}
f:annotations:
.: {}
f:external-dns.alpha.kubernetes.io/hostname: {}
f:labels:
.: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:ports:
.: {}
k:{"port":18083,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
f:type: {}
f:status:
.: {}
f:loadBalancer: {}
f:image: {}
f:imagePullPolicy: {}
f:listenersServiceTemplate:
.: {}
f:metadata:
.: {}
f:annotations: {}
f:labels:
.: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:ports:
.: {}
k:{"port":1883,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":4370,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":5369,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":5370,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":8083,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":8084,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":8883,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
f:type: {}
f:status:
.: {}
f:loadBalancer: {}
f:replicantTemplate:
.: {}
f:metadata:
.: {}
f:labels:
.: {}
f:apps.emqx.io/db-role: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:livenessProbe:
.: {}
f:exec:
.: {}
f:command: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:readinessProbe:
.: {}
f:exec:
.: {}
f:command: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:replicas: {}
f:resources:
.: {}
f:requests:
.: {}
f:cpu: {}
f:ephemeral-storage: {}
f:memory: {}
manager: kubectl-client-side-apply
operation: Create
- apiVersion: apps.emqx.io/v2alpha1
fieldsType: FieldsV1
fieldsV1:
f:spec:
f:dashboardServiceTemplate:
f:spec:
f:selector: {}
f:listenersServiceTemplate:
f:metadata:
f:annotations:
f:external-dns.alpha.kubernetes.io/hostname: {}
manager: node-fetch
operation: Create
name: emqx
namespace: emqx-operator-system
selfLink: /apis/apps.emqx.io/v2alpha1/namespaces/emqx-operator-system/emqxes/emqx
status:
conditions:
- lastTransitionTime: '2023-07-06T14:40:06Z'
lastUpdateTime: '2023-07-06T14:40:06Z'
message: Core nodes is ready
reason: ClusterCoreReady
status: 'True'
type: CoreNodesReady
- lastTransitionTime: '2023-07-06T14:40:01Z'
lastUpdateTime: '2023-07-06T14:40:01Z'
message: Updating core nodes in cluster
reason: ClusterCoreUpdating
status: 'True'
type: CoreNodesUpdating
- lastTransitionTime: '2023-07-06T14:40:01Z'
lastUpdateTime: '2023-07-06T14:40:01Z'
message: Creating EMQX cluster
reason: ClusterCreating
status: 'True'
type: Creating
currentImage: emqx:5.0
replicantNodeReplicas: 1
spec:
bootstrapConfig: >-
node {
name = "emqx@replicant_node_name"
cookie = "emqx"
etc_dir = "etc"
data_dir = "data"
db_role = replicant
}
cluster {
name = emqx
discovery_strategy = static
core_nodes = ["emqx@core_node_name"]
autoheal = true
static {
seeds =["emqx@core_node_name","emqx@replicant_node_name"]
}
}
coreTemplate:
metadata:
labels:
apps.emqx.io/db-role: core
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
name: emqx-core
spec:
livenessProbe:
failureThreshold: 3
httpGet:
path: /status
port: 18083
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 1
readinessProbe:
failureThreshold: 12
httpGet:
path: /status
port: 18083
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
replicas: 0
resources:
requests:
cpu: '1'
ephemeral-storage: 1Gi
memory: 1Gi
volumeClaimTemplates:
resources: {}
dashboardServiceTemplate:
metadata:
annotations:
service: private
labels:
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
name: emqx-dashboard
spec:
ports:
- name: dashboard-listeners-http-bind
port: 18083
protocol: TCP
targetPort: 18083
selector:
apps.emqx.io/db-role: core
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
type: LoadBalancer
status:
loadBalancer: {}
image: emqx:5.0
imagePullPolicy: IfNotPresent
listenersServiceTemplate:
metadata:
annotations:
service: private
labels:
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
name: emqx-listeners
spec:
ports:
- name: listeners-tcp-bind
port: 1883
protocol: TCP
targetPort: 1883
- name: listeners-tcp-bind-3
port: 8083
protocol: TCP
targetPort: 8083
- name: listeners-tcp-bind-4
port: 8084
protocol: TCP
targetPort: 8084
- name: listeners-tcp-bind-5
port: 8883
protocol: TCP
targetPort: 8883
- name: listeners-tcp-bind-10
port: 8081
protocol: TCP
targetPort: 8081
- name: listeners-tcp-bind-6
port: 4370
protocol: TCP
targetPort: 4370
- name: listeners-tcp-bind-9
port: 5370
protocol: TCP
targetPort: 5370
type: LoadBalancer
status:
loadBalancer: {}
replicantTemplate:
metadata:
labels:
apps.emqx.io/db-role: replicant
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
annotations:
service: private
name: emqx-replicant
spec:
env:
- name: EMQX_NODE__COOKIE
value: emqx
livenessProbe:
exec:
command:
- cat
- /opt/emqx/etc/emqx.conf
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
exec:
command:
- cat
- /opt/emqx/etc/emqx.conf
initialDelaySeconds: 5
periodSeconds: 5
replicas: 1
resources:
requests:
cpu: '1'
ephemeral-storage: 1Gi
memory: 1Gi
Expected behavior
k8s replicant registers successfully into the cluster with the core nodes on VMs.
Anything else we need to know?:
Environment details:: dev
- Kubernetes version: 1.24
- Cloud-provider/provisioner: on-prem
- emqx-operator version:
- Install method: e.g. helm/static manifests
Sorry,the EMQX Operator is designed to deploy and manage EMQX clusters running in k8s. I'm not sure if it can meet your needs.
Could you please format your issue context, it looks too messy, I can't provide useful help.
sorry for that, reformatted
Thing is that I want to test a hybrid architecture consisting of one static core node on a VM and a replicant node in k8s with the possibility to scale the replicants. Issue basically is that the replicant does not register properly. I've tested also with a VM as replicant and it works fine with the same configs. Basically it throws econnrefused and I guess that actually the VM rejects it.
Also, I've created a centos pod and manually installed emqx on it but when the emqx service is trying to start it gets stuck at the starting step. Did a telnet from this pod to the VM on a couple of ports EMQX is using (1883, 5370 etc) and it connects for a couple of second but then it errors with "Connection closed by foreign host"
Hello, could you please try this:
- make sure the Pod can be access VM's address
- make sure the VM can be access Pod's address
- make sure they can access all ports between them without any extra firewall policies.
- set
EMQX_RPC__PORT_DISCOVERY = manual
for VM's env
This is a brand new attempt, and we have never done it before.
Thanks, I'll try it out. Points 1-3 are already met. I guess when setting the manual port discovery I'll also have to set the ports manually in the emqx.conf for the VM right?
Tried the above but it still does not connect properly to the cluster. Had the following errors:
2023-07-13T12:03:14.396314+00:00 [error] State machine '$mria_meta_shard' terminating. Reason: {timeout,{gen_server,call,[mria_lb,{probe,','$mria_meta_shard'}]}}. Stack: [{gen_server,call,2,[{file,"gen_server.erl"},{line,239}]},{mria_rlog,subscribe,4,[{file,"mria_rlog.erl"},{line,167}]},{mria_rlog_replica,try_connect,3,[{file,"mria_rlog_replica.erl"},{line,395}]},{mria_rlog_replica,handle_reconnect,1,[{file,"mria_rlog_replica.erl"},{line,341}]},{gen_statem,loop_state_callback,11,[{file,"gen_statem.erl"},{line,1205}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,226}]}]. Last event: {state_timeout,reconnect}. State: {disconnected,{d,'$mria_meta_shard',<0.1979.0>,undefined,undefined,undefined,0,undefined,undefined,false}}.
2023-07-13T12:03:14.396737+00:00 [error] crasher: initial call: mria_rlog_replica:init/1, pid: <0.1980.0>, registered_name: '$mria_meta_shard', exit: {{timeout,{gen_server,call,[mria_lb,{probe,'emqx@en1-qa1a-emqx01.en1.whitepj.net','$mria_meta_shard'}]}},[{gen_server,call,2,[{file,"gen_server.erl"},{line,239}]},{mria_rlog,subscribe,4,[{file,"mria_rlog.erl"},{line,167}]},{mria_rlog_replica,try_connect,3,[{file,"mria_rlog_replica.erl"},{line,395}]},{mria_rlog_replica,handle_reconnect,1,[{file,"mria_rlog_replica.erl"},{line,341}]},{gen_statem,loop_state_callback,11,[{file,"gen_statem.erl"},{line,1205}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,226}]}]}, ancestors: [<0.1979.0>,mria_shards_sup,mria_rlog_sup,mria_sup,<0.1898.0>], message_queue_len: 0, messages: [], links: [<0.1979.0>], dictionary: [{'$logger_metadata$',#{domain => [mria,rlog,replica],shard => '$mria_meta_shard'}},{rand_seed,{#{bits => 58,jump => #Fun<rand.3.92093067>,next => #Fun<rand.0.92093067>,type => exsss,uniform => #Fun<rand.1.92093067>,uniform_n => #Fun<rand.2.92093067>},[5142563473631579|282317591639871924]}}], trap_exit: true, status: running, heap_size: 6772, stack_size: 29, reductions: 12019; neighbours:
2023-07-13T12:03:14.397233+00:00 [error] Supervisor: {<0.1979.0>,mria_replicant_shard_sup}. Context: child_terminated. Reason: {timeout,{gen_server,call,[mria_lb,{probe,'','$mria_meta_shard'}]}}. Offender: id=replica,pid=<0.1980.0>.
2023-07-13T12:03:14.397383+00:00 [error] Supervisor: {<0.1979.0>,mria_replicant_shard_sup}. Context: shutdown. Reason: reached_max_restart_intensity. Offender: id=replica,pid=<0.1980.0>.
2023-07-13T12:03:14.397562+00:00 [error] Supervisor: {local,mria_shards_sup}. Context: child_terminated. Reason: shutdown. Offender: id='$mria_meta_shard',pid=<0.1979.0>.
2023-07-13T12:11:24.704108+00:00 [warning] msg: Dashboard monitor error, mfa: emqx_dashboard_monitor:current_rate/1, line: 144, reason: {noproc,{gen_server,call,[emqx_dashboard_monitor,current_rate,5000]}}
However the replicant appears as connected when doing emqx_ctl cluster status and inside the pod emqx service is stuck in starting. (below pics)
@ieQu1 @SergeTupchiy Could you please take a look
@vflorescu1 EMQX uses two different TCP ports for the backplane communication. https://www.emqx.io/docs/en/v5.1/configuration/configuration-manual.html#rpc
Can you check that the values of rpc.tcp_server_port
configuration parameter are the same in both k8s and VM, and make sure that the port is open in both environments?
@ieQu1 I've set the tcp_server_port explicitly on 5369 in emqx.conf for the VM and same as env var in k8s. Ports are open and checked connectivity. Error below, seems the same:
Also, in VM's emqx.conf I've set the discovery mechanism as static and added the pod IP in the seeds (emqx@198.x.x.x) not the listener-service IP. Is that the right config? I see that doing telnet on the pod IP from the VM it does not work (expecting this) but when checking with the service ext IP it connects and after a few seconds throws "Connection closed by foreign host".
I am not a kubernetes expert, but I doubt that one can access pod IP externally. Also in general we don't recommend using IPs as node names:
https://www.emqx.io/docs/en/v5.1/deploy/cluster/create-cluster.html#node-names
EMQX node names are immutable, as they are baked into the database schema and data files. Therefore, it is recommended to use static FQDNs for EMQX node names.
service ext IP
Could you clarify this part? Do you create a service for each EMQX node in the k8s cluster? Please note that EMQX backplane network operates in peer to peer fashion: every node should be able to communicate with each node directly.
Indeed the pod IP can't be accessed externally but when creating the replicant node via the emqx-operator it forces the node.name to be emqx@pod_ip even if I change in the boostrapConfig the node.name to something else in order to modify the emqx.conf of the pod.
I've tried also to modify the env var for the node.name. In this situation it changes it, but when the emqx starts it says that 'emqx@changed_node_name' not responding to pings (via emqx_ctl status).
I have created two services - emqx-listeners and emqx-dashboard for the replicant pod to use. The emqx-listeners has also all the needed ports mapped.
- name: listeners-tcp-bind-9
port: 5370
protocol: TCP
targetPort: 5370
Can you add TCP5369 port to the list as well?
But generally speaking, the EMQX cluster assumes that all the nodes reside in the same network. There is direct mapping between the host FQDN (or the IP) and the node name. If for some reason it doesn't hold (e.g. when nodes are deployed in different private networks), then RPC either won't work at all or in rare cases it will work half-way and lead to all kinds of strange behaviors.
I've added that port to the listeners already.
Yeah, that seems to be the case, indeed the VM and the k8s are in different networks. It seems like the RPC is struggling to create the proper connection.
I've modified the config a bit to make to modify the node name in k8s and also used env vars to set the properties for emqx. Now when starting this is the errors that I get:
2023-07-14T13:52:04.471451+00:00 [error] State machine '$mria_meta_shard' terminating. Reason: {timeout,{gen_server,call,[mria_lb,core_nodes,30000]}}. Stack: [{gen_server,call,3,[{file,"gen_server.erl"},{line,247}]},{mria_rlog_replica,try_connect,2,[{file,"mria_rlog_replica.erl"},{line,378}]},{mria_rlog_replica,handle_reconnect,1,[{file,"mria_rlog_replica.erl"},{line,341}]},{gen_statem,loop_state_callback,11,[{file,"gen_statem.erl"},{line,1205}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,226}]}]. Last event: {state_timeout,reconnect}. State: {disconnected,{d,'$mria_meta_shard',<0.1979.0>,undefined,undefined,undefined,0,undefined,undefined,false}}.
2023-07-14T13:52:04.471802+00:00 [error] crasher: initial call: mria_rlog_replica:init/1, pid: <0.1980.0>, registered_name: '$mria_meta_shard', exit: {{timeout,{gen_server,call,[mria_lb,core_nodes,30000]}},[{gen_server,call,3,[{file,"gen_server.erl"},{line,247}]},{mria_rlog_replica,try_connect,2,[{file,"mria_rlog_replica.erl"},{line,378}]},{mria_rlog_replica,handle_reconnect,1,[{file,"mria_rlog_replica.erl"},{line,341}]},{gen_statem,loop_state_callback,11,[{file,"gen_statem.erl"},{line,1205}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,226}]}]}, ancestors: [<0.1979.0>,mria_shards_sup,mria_rlog_sup,mria_sup,<0.1898.0>], message_queue_len: 0, messages: [], links: [<0.1979.0>], dictionary: [{'$logger_metadata$',#{domain => [mria,rlog,replica],shard => '$mria_meta_shard'}}], trap_exit: true, status: running, heap_size: 4185, stack_size: 29, reductions: 10709; neighbours:
2023-07-14T13:52:04.472218+00:00 [error] Supervisor: {<0.1979.0>,mria_replicant_shard_sup}. Context: child_terminated. Reason: {timeout,{gen_server,call,[mria_lb,core_nodes,30000]}}. Offender: id=replica,pid=<0.1980.0>.
2023-07-14T13:52:04.472381+00:00 [error] Supervisor: {<0.1979.0>,mria_replicant_shard_sup}. Context: shutdown. Reason: reached_max_restart_intensity. Offender: id=replica,pid=<0.1980.0>.
2023-07-14T13:52:04.472519+00:00 [error] Supervisor: {local,mria_shards_sup}. Context: child_terminated. Reason: shutdown. Offender: id='$mria_meta_shard',pid=<0.1979.0>.
When running emqx_ctl status inside the pod I get: Node 'emqx@node_name' not responding to pings.
When running emqx_ctl cluster status inside the VM I can see the correct name of the replicant pod added to the list.
Current config in k8s:
apiVersion: apps.emqx.io/v2alpha1
kind: EMQX
metadata:
generation: 4
labels:
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
k8slens-edit-resource-version: v2alpha1
managedFields:
- apiVersion: apps.emqx.io/v2alpha1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
f:apps.emqx.io/last-applied: {}
f:deployment.kubernetes.io/revision: {}
f:status:
.: {}
f:conditions: {}
f:currentImage: {}
f:replicantNodeReplicas: {}
manager: manager
operation: Update
subresource: status
- apiVersion: apps.emqx.io/v2alpha1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
.: {}
f:external-dns.alpha.kubernetes.io/hostname: {}
f:kubectl.kubernetes.io/last-applied-configuration: {}
f:service.citrix.com/ipam-range: {}
f:labels:
.: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:k8slens-edit-resource-version: {}
f:spec:
.: {}
f:bootstrapConfig: {}
f:coreTemplate:
.: {}
f:metadata:
.: {}
f:labels:
.: {}
f:apps.emqx.io/db-role: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:livenessProbe:
.: {}
f:failureThreshold: {}
f:httpGet:
.: {}
f:path: {}
f:port: {}
f:scheme: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:successThreshold: {}
f:timeoutSeconds: {}
f:readinessProbe:
.: {}
f:failureThreshold: {}
f:httpGet:
.: {}
f:path: {}
f:port: {}
f:scheme: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:successThreshold: {}
f:timeoutSeconds: {}
f:replicas: {}
f:resources:
.: {}
f:requests:
.: {}
f:cpu: {}
f:ephemeral-storage: {}
f:memory: {}
f:volumeClaimTemplates:
.: {}
f:resources: {}
f:dashboardServiceTemplate:
.: {}
f:metadata:
.: {}
f:annotations:
.: {}
f:external-dns.alpha.kubernetes.io/hostname: {}
f:labels:
.: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:ports:
.: {}
k:{"port":18083,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
f:type: {}
f:status:
.: {}
f:loadBalancer: {}
f:image: {}
f:imagePullPolicy: {}
f:listenersServiceTemplate:
.: {}
f:metadata:
.: {}
f:annotations: {}
f:labels:
.: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:ports:
.: {}
k:{"port":1883,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":4370,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":5369,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":5370,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":8083,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":8084,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
k:{"port":8883,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
f:type: {}
f:status:
.: {}
f:loadBalancer: {}
f:replicantTemplate:
.: {}
f:metadata:
.: {}
f:labels:
.: {}
f:apps.emqx.io/db-role: {}
f:apps.emqx.io/instance: {}
f:apps.emqx.io/managed-by: {}
f:name: {}
f:spec:
.: {}
f:livenessProbe:
.: {}
f:exec:
.: {}
f:command: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:readinessProbe:
.: {}
f:exec:
.: {}
f:command: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:replicas: {}
f:resources:
.: {}
f:requests:
.: {}
f:cpu: {}
f:ephemeral-storage: {}
f:memory: {}
manager: kubectl-client-side-apply
operation: Create
- apiVersion: apps.emqx.io/v2alpha1
fieldsType: FieldsV1
fieldsV1:
f:spec:
f:dashboardServiceTemplate:
f:spec:
f:selector: {}
f:listenersServiceTemplate:
f:metadata:
f:annotations:
f:external-dns.alpha.kubernetes.io/hostname: {}
manager: node-fetch
operation: Create
name: emqx
namespace: emqx-operator-system
selfLink: /apis/apps.emqx.io/v2alpha1/namespaces/emqx-operator-system/emqxes/emqx
status:
conditions:
- lastTransitionTime: '2023-07-06T14:40:06Z'
lastUpdateTime: '2023-07-06T14:40:06Z'
message: Core nodes is ready
reason: ClusterCoreReady
status: 'True'
type: CoreNodesReady
- lastTransitionTime: '2023-07-06T14:40:01Z'
lastUpdateTime: '2023-07-06T14:40:01Z'
message: Updating core nodes in cluster
reason: ClusterCoreUpdating
status: 'True'
type: CoreNodesUpdating
- lastTransitionTime: '2023-07-06T14:40:01Z'
lastUpdateTime: '2023-07-06T14:40:01Z'
message: Creating EMQX cluster
reason: ClusterCreating
status: 'True'
type: Creating
currentImage: emqx:5.0
replicantNodeReplicas: 1
spec:
coreTemplate:
metadata:
labels:
apps.emqx.io/db-role: core
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
name: emqx-core
spec:
livenessProbe:
failureThreshold: 3
httpGet:
path: /status
port: 18083
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 1
readinessProbe:
failureThreshold: 12
httpGet:
path: /status
port: 18083
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
replicas: 0
resources:
requests:
cpu: '1'
ephemeral-storage: 1Gi
memory: 1Gi
volumeClaimTemplates:
resources: {}
dashboardServiceTemplate:
metadata:
labels:
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
name: emqx-dashboard
spec:
ports:
- name: dashboard-listeners-http-bind
port: 18083
protocol: TCP
targetPort: 18083
selector:
apps.emqx.io/db-role: core
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
type: LoadBalancer
status:
loadBalancer: {}
image: emqx:5.0
imagePullPolicy: IfNotPresent
listenersServiceTemplate:
metadata:
labels:
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
name: emqx-listeners
spec:
ports:
- name: listeners-tcp-bind
port: 1883
protocol: TCP
targetPort: 1883
- name: listeners-tcp-bind-3
port: 8083
protocol: TCP
targetPort: 8083
- name: listeners-tcp-bind-4
port: 8084
protocol: TCP
targetPort: 8084
- name: listeners-tcp-bind-5
port: 8883
protocol: TCP
targetPort: 8883
- name: listeners-tcp-bind-10
port: 8081
protocol: TCP
targetPort: 8081
- name: listeners-tcp-bind-6
port: 4370
protocol: TCP
targetPort: 4370
- name: listeners-tcp-bind-9
port: 5370
protocol: TCP
targetPort: 5370
- name: listeners-tcp-bind-11
port: 5369
protocol: TCP
targetPort: 5369
type: LoadBalancer
status:
loadBalancer: {}
replicantTemplate:
metadata:
labels:
apps.emqx.io/db-role: replicant
apps.emqx.io/instance: emqx
apps.emqx.io/managed-by: emqx-operator
name: emqx-replicant
spec:
env:
- name: EMQX_NODE__COOKIE
value: en1-qa-emqx
- name: EMQX_RPC__PORT_DISCOVERY
value: manual
- name: EMQX_RPC__TCP_SERVER_PORT
value: "5369"
- name: EMQX_NODE_NAME
value: "emqx@replicant_name"
- name: EMQX_NODE__DB_ROLE
value: replicant
- name : EMQX_NODE__DATA_DIR
value: "/opt/emqx/var/lib/emqx"
- name : EMQX_NODE__ETC_DIR
value: "/opt/emqx/etc/emqx"
- name: EMQX_CLUSTER__NAME
value: en1-qa-emqx
- name: EMQX_CLUSTER__DISCOVERY_STRATEGY
value: static
- name: EMQX_CLUSTER__CORE_NODES
value: "emqx@core_node_name"
- name: EMQX_CLUSTER__STATIC__SEEDS
value: "emqx@core_node_name,emqx@replicant_name"
livenessProbe:
exec:
command:
- cat
- /opt/emqx/etc/emqx.conf
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
exec:
command:
- cat
- /opt/emqx/etc/emqx.conf
initialDelaySeconds: 5
periodSeconds: 5
replicas: 1
resources:
requests:
cpu: '1'
ephemeral-storage: 1Gi
memory: 1Gi
I have some doubts. @vflorescu1 Can you please double-check your network?
In your example, please make sure that the VM node ( EMQX core node ) can ping the core_node_name
and and replicant_name
and Pod IP
, and make sure the Pod ( EMQX replicant node ) can ping the core_node_name
and replicant_name
?
@vflorescu1 In your example, is the replicant_name
means the listeners service address ?
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.