components stuck in init
Closed this issue · 5 comments
karmab commented
Hello, after deploying operator and a lvmcluster, the pods fail to spawn
[root@cnf10-worker-0 ~]# oc get pod -n odf-lvm
NAME READY STATUS RESTARTS AGE
controller-manager-765f44745b-hgcpn 3/3 Running 0 31m
topolvm-controller-5ffdc8cd9f-sktg9 4/4 Running 8 (7m41s ago) 31m
topolvm-node-8ffm7 0/4 Init:0/1 0 31m
topolvm-node-w2rvd 0/4 Pending 0 31m
topolvm-node-w5s7w 0/4 Init:0/1 0 31m
vg-manager-8mvn8 0/1 CrashLoopBackOff 7 (4m4s ago) 31m
vg-manager-mg9xj 0/1 CrashLoopBackOff 7 (4m19s ago) 31m
vg-manager-wvhbv 0/1 CrashLoopBackOff 7 (3m53s ago) 31m
[root@cnf10-worker-0 ~]# oc describe pod -n odf-lvm topolvm-node-8ffm7
Name: topolvm-node-8ffm7
Namespace: odf-lvm
Priority: 0
Node: ci-ovirt-master-0.karmalabs.com/10.19.135.249
Start Time: Wed, 20 Apr 2022 13:53:03 -0400
Labels: app=topolvm-node
controller-revision-hash=5685697cf9
pod-template-generation=1
Annotations: k8s.v1.cni.cncf.io/network-status:
[{
"name": "openshift-sdn",
"interface": "eth0",
"ips": [
"10.133.0.246"
],
"default": true,
"dns": {}
}]
k8s.v1.cni.cncf.io/networks-status:
[{
"name": "openshift-sdn",
"interface": "eth0",
"ips": [
"10.133.0.246"
],
"default": true,
"dns": {}
}]
openshift.io/scc: odf-lvm-topolvm-node
Status: Pending
IP: 10.133.0.246
IPs:
IP: 10.133.0.246
Controlled By: DaemonSet/topolvm-node
Init Containers:
file-checker:
Container ID: cri-o://a8a17b40bc03851f13063e7bb245e4a0214b39411a54ab1ebfabec0b634ef14b
Image: registry.redhat.io/odf4/odf-lvm-rhel8-operator@sha256:2bad9a3ab52faf43f8f5258c64ea6734ab40114addfdde116c0bd27d9088bf49
Image ID: registry.redhat.io/odf4/odf-lvm-rhel8-operator@sha256:2bad9a3ab52faf43f8f5258c64ea6734ab40114addfdde116c0bd27d9088bf49
Port: <none>
Host Port: <none>
Command:
/usr/bin/bash
-c
until [ -f /etc/topolvm/lvmd.yaml ]; do echo waiting for lvmd config file; sleep 5; done
State: Running
Started: Wed, 20 Apr 2022 13:53:14 -0400
Ready: False
Restart Count: 0
Environment: <none>
Mounts:
/etc/topolvm from lvmd-config-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pck6f (ro)
Containers:
lvmd:
Container ID:
Image: registry.redhat.io/odf4/odf-topolvm-rhel8@sha256:4fb7b673d4a14021df0ad89cd99eed68dd837163bfc32aa8dc8b3eb10d60acee
Image ID:
Port: <none>
Host Port: <none>
Command:
/lvmd
--config=/etc/topolvm/lvmd.yaml
--container=true
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Limits:
cpu: 250m
memory: 250Mi
Requests:
cpu: 250m
memory: 250Mi
Environment: <none>
Mounts:
/etc/topolvm from lvmd-config-dir (rw)
/run/lvmd from lvmd-socket-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pck6f (ro)
topolvm-node:
Container ID:
Image: registry.redhat.io/odf4/odf-topolvm-rhel8@sha256:4fb7b673d4a14021df0ad89cd99eed68dd837163bfc32aa8dc8b3eb10d60acee
Image ID:
Port: 9808/TCP
Host Port: 0/TCP
Command:
/topolvm-node
--lvmd-socket=/run/lvmd/lvmd.sock
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Limits:
cpu: 250m
memory: 250Mi
Requests:
cpu: 250m
memory: 250Mi
Liveness: http-get http://:healthz/healthz delay=10s timeout=3s period=60s #success=1 #failure=3
Environment:
NODE_NAME: (v1:spec.nodeName)
Mounts:
/run/lvmd from lvmd-socket-dir (rw)
/run/topolvm from node-plugin-dir (rw)
/var/lib/kubelet/plugins/kubernetes.io/csi from csi-plugin-dir (rw)
/var/lib/kubelet/pods from pod-volumes-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pck6f (ro)
csi-registrar:
Container ID:
Image: registry.redhat.io/openshift4/ose-csi-node-driver-registrar@sha256:3308ef98afab494b80aa1a702924407cf114bce6e0ad92436e508d7dc951521c
Image ID:
Port: <none>
Host Port: <none>
Args:
--csi-address=/run/topolvm/csi-topolvm.sock
--kubelet-registration-path=/var/lib/kubelet/plugins/topolvm.cybozu.com/node/csi-topolvm.sock
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Environment: <none>
Mounts:
/registration from registration-dir (rw)
/run/topolvm from node-plugin-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pck6f (ro)
liveness-probe:
Container ID:
Image: registry.redhat.io/openshift4/ose-csi-livenessprobe@sha256:6b40bb1cb5bffc8e8689b8d01e43096a2d57981aa20ae7859618054ed3800bd7
Image ID:
Port: <none>
Host Port: <none>
Args:
--csi-address=/run/topolvm/csi-topolvm.sock
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Environment: <none>
Mounts:
/run/topolvm from node-plugin-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pck6f (ro)
Conditions:
Type Status
Initialized False
Ready False
ContainersReady False
PodScheduled True
Volumes:
registration-dir:
Type: HostPath (bare host directory volume)
Path: /var/lib/kubelet/plugins_registry/
HostPathType: Directory
node-plugin-dir:
Type: HostPath (bare host directory volume)
Path: /var/lib/kubelet/plugins/topolvm.cybozu.com/node
HostPathType: DirectoryOrCreate
csi-plugin-dir:
Type: HostPath (bare host directory volume)
Path: /var/lib/kubelet/plugins/kubernetes.io/csi
HostPathType: DirectoryOrCreate
pod-volumes-dir:
Type: HostPath (bare host directory volume)
Path: /var/lib/kubelet/pods/
HostPathType: DirectoryOrCreate
lvmd-config-dir:
Type: HostPath (bare host directory volume)
Path: /etc/topolvm
HostPathType: Directory
lvmd-socket-dir:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium: Memory
SizeLimit: <unset>
kube-api-access-pck6f:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
ConfigMapName: openshift-service-ca.crt
ConfigMapOptional: <nil>
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/disk-pressure:NoSchedule op=Exists
node.kubernetes.io/memory-pressure:NoSchedule op=Exists
node.kubernetes.io/not-ready:NoExecute op=Exists
node.kubernetes.io/pid-pressure:NoSchedule op=Exists
node.kubernetes.io/unreachable:NoExecute op=Exists
node.kubernetes.io/unschedulable:NoSchedule op=Exists
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 31m default-scheduler Successfully assigned odf-lvm/topolvm-node-8ffm7 to ci-ovirt-master-0.karmalabs.com
Normal AddedInterface 31m multus Add eth0 [10.133.0.246/23] from openshift-sdn
Normal Pulling 31m kubelet Pulling image "registry.redhat.io/odf4/odf-lvm-rhel8-operator@sha256:2bad9a3ab52faf43f8f5258c64ea6734ab40114addfdde116c0bd27d9088bf49"
Normal Pulled 30m kubelet Successfully pulled image "registry.redhat.io/odf4/odf-lvm-rhel8-operator@sha256:2bad9a3ab52faf43f8f5258c64ea6734ab40114addfdde116c0bd27d9088bf49" in 7.936196491s
Normal Created 30m kubelet Created container file-checker
Normal Started 30m kubelet Started container file-checker
karmab commented
issue with vgmanager pod is
I0420 19:13:21.737554 2360300 request.go:665] Waited for 1.026197652s due to client-side throttling, not priority and fairness, request: GET:https://172.30.0.1:443/apis/snapshot.kubevirt.io/v1alpha1?timeout=32s
{"level":"info","ts":1650482004.4948282,"logger":"controller-runtime.metrics","msg":"metrics server is starting to listen","addr":":8080"}
{"level":"info","ts":1650482004.4951057,"logger":"setup","msg":"starting manager"}
{"level":"info","ts":1650482004.4952693,"msg":"starting metrics server","path":"/metrics"}
{"level":"info","ts":1650482004.4954143,"logger":"controller.lvmvolumegroup","msg":"Starting EventSource","reconciler group":"lvm.topolvm.io","reconciler kind":"LVMVolumeGroup","source":"kind source: /, Kind="}
{"level":"info","ts":1650482004.4955132,"logger":"controller.lvmvolumegroup","msg":"Starting Controller","reconciler group":"lvm.topolvm.io","reconciler kind":"LVMVolumeGroup"}
E0420 19:13:24.498715 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
E0420 19:13:25.647058 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
E0420 19:13:28.740610 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
E0420 19:13:33.447567 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
E0420 19:13:43.227955 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
E0420 19:14:03.151838 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
E0420 19:14:42.636428 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
E0420 19:15:21.048957 2360300 reflector.go:138] sigs.k8s.io/controller-runtime/pkg/cache/internal/informers_map.go:250: Failed to watch *v1alpha1.LVMVolumeGroup: failed to list *v1alpha1.LVMVolumeGroup: lvmvolumegroups.lvm.topolvm.io is forbidden: User "system:serviceaccount:odf-lvm:vg-manager" cannot list resource "lvmvolumegroups" in API group "lvm.topolvm.io" in the namespace "odf-lvm"
{"level":"error","ts":1650482124.502881,"logger":"controller.lvmvolumegroup","msg":"Could not wait for Cache to sync","reconciler group":"lvm.topolvm.io","reconciler kind":"LVMVolumeGroup","error":"failed to wait for lvmvolumegroup caches to sync: timed out waiting for cache to be synced","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:234\nsigs.k8s.io/controller-runtime/pkg/manager.(*controllerManager).startRunnable.func1\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/manager/internal.go:696"}
{"level":"error","ts":1650482124.5031688,"logger":"setup","msg":"problem running manager","error":"failed to wait for lvmvolumegroup caches to sync: timed out waiting for cache to be synced"}
karmab commented
the following did the trick
oc adm policy add-cluster-role-to-user cluster-admin -z vg-manager -n odf-lvm
oc adm policy add-cluster-role-to-user cluster-admin -z topolvm-controller -n odf-lvm
oc adm policy add-cluster-role-to-user cluster-admin -z topolvm-node -n odf-lvm
sp98 commented
@karmab our rbacs are restricted to openshift-storage
only as of now. Please try with openshift-storage
namespace instead of odf-lvm
namespace for now and let us know if that fixes that issue for you.
We will work on making the operator deploy-able with other namespaces.
karmab commented
ok, could you please populate operatorframework.io/suggested-namespace in the csv of your olm metadata to reflect that.
That's what i use to target a given namespace