Ques: etcd backup & restore
backup:
export ETCDCTL_API=3
etcdctl --endpoints=https://172.18.0.2:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key \
snapshot save snapshot.db
restore:
etcdctl --data-dir /var/lib/etcd-backup snapshot restore snapshot.db
Scenario: Create a user "pulak" and grant him permission to cluster. "pulak" should only able to create, get, list, delete pods.
- Create a key/csr for the user:
openssl genrsa -out pulak.key 2048
openssl req -new -key pulak.key -out pulak.csr -subj "/CN=pulak"
- Create a CSR object:
Example:
apiVersion: certificates.k8s.io/v1
kind: CertificateSigningRequest
metadata:
name: pulak
spec:
request: <base 64 encoded pulak.csr>
signerName: kubernetes.io/kube-apiserver-client
expirationSeconds: 86400 # one day
usages:
- client auth
- Approve the csr
kubectl certificate approve pulak
- Collect the signed certificate from the .status.certificate from the csr object
kubectl get csr pulak -o jsonpath='{.status.certificate}'| base64 -d > pulak.crt
- Create Role/Cluster Role & RoleBinding/ClusterBinding accordinly
Example using kubectl:
kubectl create role developer --verb=create --verb=get --verb=list --verb=update --verb=delete --resource=pods
kubectl create rolebinding developer-binding-myuser --role=developer --user=pulak
- Add the user to the kubeconfig
kubectl config set-credentials pulak --client-key=pulak.key --client-certificate=pulak.crt --embed-certs=true
kubectl config set-context pulak --cluster=kubernetes --user=pulak
kubectl config use-context pulak
Scenario: A pod is not running state because of available nodes have some taints. Fix it.
To simulate the process:
- Add taint to nodes:
kubectl taint nodes node1 key1=value1:NoSchedule
- Create deployment and pod should be pending because of the taint.
kubectl create deploy nginx --image=nginx:alpine
- Edit deploy to tolerate the taint:
tolerations:
- key: "key1"
operator: "Equal"
value: "value1"
effect: "NoSchedule"
## or
tolerations:
- key: "key1"
operator: "Exists"
effect: "NoSchedule"
Doc Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
Question: Create a pod "nginx-cka" using image "nginx" and initContainer "git-cka" with image "alpine/git". Volume mount path of the main container "/usr/share/nginx/html". Nginx index.html need to be override with shared volume. index.html file cloned from path "https://github.com/jhawithu/k8s-nginx.git".
Pod yaml:
apiVersion: v1
kind: Pod
metadata:
name: nginx-cka
spec:
containers:
- name: nginx
image: nginx:alpine
ports:
- containerPort: 80
volumeMounts:
- name: data
mountPath: /usr/share/nginx/html
initContainers:
- name: git-k8s
image: alpine/git
args:
- clone
- --single-branch
- --
- https://github.com/jhawithu/k8s-nginx.git
- /data
volumeMounts:
- mountPath: /data
name: data
volumes:
- name: data
emptyDir: {}
Question: Upgrade k8s cluster version from 1.27.x to 1.28.x
- Install Kubeadm
apt update
apt install kubeadm=1.28.0-00
- Drain controlplane
kubectl drain controlplane --ignore-daemonsets
- Upgrade controlplane node
kubeadm upgrade apply v1.28.2
- Update and restart kubectl
apt install kubelet=1.28.2-00
systemctl restart kubelet
- Uncordon controlplane
kubectl uncordon controlplane
For each worker node:
- Drain the node
kubectl drain <node-name> --ignore-daemonsets
- Upgrade worker node
kubeadm upgrade node
- Upgrade & restart kubelet
apt install kubelet=1.28.2-00
systemctl restart kubelet
- Uncordon the node
kubectl uncordon <node-name>
Question: Create a new pod called "admin-pod" with image busybox. Allow the pod to be able to set system_time.
The container should sleep for 3200 seconds.
- Create pod yaml using imperative style:
kubectl run admin-pod --image=busybox --command sleep 3200 --dry-run=client -o yaml
- Add capabilities to container
apiVersion: v1
kind: Pod
metadata:
labels:
run: admin-pod
name: admin-pod
spec:
containers:
- command:
- sleep
- "3200"
image: busybox
name: admin-pod
securityContext:
capabilities:
add: ["SYS_TIME"]
Doc Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
Question: Create a namespace "devops" and create a NetworkPolicy that blocks all trafic to pods in devops namespace, except for traffic from pods in the same namespace on port 8080.
- Create namespace with a label:
kubectl create ns devops
kubectl label ns app=devops
kubectl get ns devops --show-labels
- Create NetworkPolicy yaml:
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: devops-np
namespace: devops
spec:
podSelector: {}
policyTypes:
- Ingress
ingress:
- from:
- namespaceSelector:
matchLabels:
app: devops
ports:
- protocol: TCP
port: 8080
Create a NetworkPolicy that denies all access to to the payroll Pod in the accounting namespace.
- Create network policy assuming the payroll pod has app=payroll label.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: payroll-network-policy
namespace: accounting
spec:
podSelector:
matchLabels:
app: payroll
policyTypes:
- Ingress
- Egress
Note: No rule for Ingress & Egress means all denied.
Doc Ref: https://kubernetes.io/docs/concepts/services-networking/network-policies/
Setup Liveness, Readiness & Startup Probes
Liveness probe example:
apiVersion: v1
kind: Pod
metadata:
labels:
test: liveness
name: liveness-exec
spec:
containers:
- name: liveness
image: registry.k8s.io/busybox
args:
- /bin/sh
- -c
- touch /tmp/healthy; sleep 30; rm -f /tmp/healthy; sleep 600
livenessProbe:
exec:
command:
- cat
- /tmp/healthy
initialDelaySeconds: 5
periodSeconds: 5
kubelet will restart the pod if liveness probe fails.
Readiness probe example:
apiVersion: v1
kind: Pod
metadata:
labels:
test: readiness
name: readiness-exec
spec:
containers:
- name: readiness
image: registry.k8s.io/busybox
args:
- /bin/sh
- -c
- touch /tmp/healthy; sleep 30; rm -f /tmp/healthy; sleep 600
readinessProbe:
exec:
command:
- cat
- /tmp/healthy
initialDelaySeconds: 5
periodSeconds: 5
readiness probes are configured exactly same way as liveness probes.
kubelet won't restart the pod in this case rather marks the pod as Not Ready. So, the service won't send any traffic to this unhealthy pod.
Startup probe example:
startupProbe:
httpGet:
path: /healthz
port: liveness-port
failureThreshold: 30
periodSeconds: 10
Startup probe will save the lazy applications from liveness probe failure. It will give apps some breathing room at the startup before liveness probe check starts.
Question: Create a pod with name project-tiger
of image httpd:2.4.41-alpine
. Find out in which node the pod in scheduled.
Using command crictl
, findout:
- info.runtimeType of the pod
- container logs
Solution:
- Run the pod and find the node
k run project-tiger --image=httpd:2.4.41-alpine
- ssh into that node and find the container id
$ crictl ps | grep project-tiger
030b066c10cb3 54b0995a63052 50 seconds ago Running project-tiger 0 ed43bf9847f06 project-tiger
- copy the container id & inspect the runtimeType
$ crictl inspect 030b066c10cb3 | grep runtimeType
"runtimeType": "io.containerd.runc.v2",
- check the logs using
crictl logs
$ crictl logs 030b066c10cb3
AH00558: httpd: Could not reliably determine the server's fully qualified domain name, using 192.168.1.3. Set the 'ServerName' directive globally to suppress this message
AH00558: httpd: Could not reliably determine the server's fully qualified domain name, using 192.168.1.3. Set the 'ServerName' directive globally to suppress this message
[Wed Dec 06 08:22:44.832652 2023] [mpm_event:notice] [pid 1:tid 139768630623560] AH00489: Apache/2.4.41 (Unix) configured -- resuming normal operations
[Wed Dec 06 08:22:44.832899 2023] [core:notice] [pid 1:tid 139768630623560] AH00094: Command line: 'httpd -D FOREGROUND'
Question: One of the nodes is in NotReady state because of the kubelet. Fix the issue and make the node as Ready state.
- Check the nodes
controlplane $ k get nodes
NAME STATUS ROLES AGE VERSION
controlplane Ready control-plane 22d v1.28.1
node01 NotReady <none> 22d v1.28.1
- ssh into node01 & check the kubelet status. In error scenario kubelet status won't be active.
node01 $ systemctl status kubelet
● kubelet.service - kubelet: The Kubernetes Node Agent
Loaded: loaded (/lib/systemd/system/kubelet.service; enabled; vendor preset: enabled)
Drop-In: /etc/systemd/system/kubelet.service.d
└─10-kubeadm.conf
Active: active (running) since Tue 2023-11-14 10:59:39 UTC; 3 weeks 1 days ago
Docs: https://kubernetes.io/docs/home/
Main PID: 26490 (kubelet)
Tasks: 11 (limit: 2339)
Memory: 37.2M
CGroup: /system.slice/kubelet.service
└─26490 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kub>
Dec 07 06:31:27 node01 kubelet[26490]: I1207 06:31:27.604142 26490 reconciler_common.go:300] "Volume detached for volum>
- Make sure kubelet is running with proper binary path
node01 $ whereis kubelet
kubelet: /usr/bin/kubelet
node01 $ cat /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
# Note: This dropin only works with kubeadm and kubelet v1.11+
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
EnvironmentFile=-/etc/default/kubelet
ExecStart=
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
- If any change is needed in the config file then
node01 $ systemctl daemon-reload & systemctl restart kubelet
[1] 34600
Warning: The unit file, source configuration file or drop-ins of kubelet.service changed on disk. Run 'systemctl daemon-reload' to reload units.
Question: Create a pod name secret-pod
of image busybox:1.31.1
which should keep running for some time.
Secret1 yaml is provided below. Create it and mount into the pod at /tmp/secret
apiVersion: v1
kind: Secret
metadata:
name: secret1
data:
somedata: UG91cmluZzYlRW1vdGljb24lU2N1YmE=
Create a secret secret2
which should contain user=pulak, pass=1234. These entries should be available inside the pod
as APP_USER and APP_PASS env.
Solution:
- Create the pod
k run secret-pod --image=busybox:1.31.1 sleep 1d
- Create the secret from the above yaml
controlplane $ cat 12.yaml
apiVersion: v1
kind: Secret
metadata:
name: secret1
data:
somedata: UG91cmluZzYlRW1vdGljb24lU2N1YmE=
controlplane $ k apply -f 12.yaml
secret/secret1 created
- Create the other secret
secret2
k create secret generic secret2 --from-literal="user=pulak" --from-literal="pass=1234"
- Update pod with secret ref. Key parts that will be added:
apiVersion: v1
kind: Pod
...
...
spec:
volumes:
- name: secret-volume
secret:
secretName: secret1
containers:
- name: container1
...
...
volumeMounts:
- name: secret-volume
readOnly: true
mountPath: "/tmp/secret"
env:
- name: APP_USER
valueFrom:
secretKeyRef:
name: secret2
key: user
- name: APP_PASS
valueFrom:
secretKeyRef:
name: secret2
key: pass
Doc Ref: https://kubernetes.io/docs/concepts/configuration/secret/
Question: Create a Static Pod
with image nginx:alpine
and have resource requests for 10m
CPU and 20Mi
memory.
Create a NodePort service to expose that static Pod on port 80 and check it has endpoints and reachablt through the internal ip address.
Solution:
- Get a pod yaml using dry-run:
controlplane $ k run static-pod --image=nginx:alpine --dry-run=client -o yaml > 13.yaml
controlplane $ cat 13.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
run: static-pod
name: static-pod
spec:
containers:
- image: nginx:alpine
name: static-pod
resources: {}
dnsPolicy: ClusterFirst
restartPolicy: Always
status: {}
- Setting up resource request accordingly:
controlplane $ cat 13.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
run: static-pod
name: static-pod
spec:
containers:
- image: nginx:alpine
name: static-pod
resources:
requests:
cpu: 10m
memory: 20Mi
dnsPolicy: ClusterFirst
restartPolicy: Always
status: {}
- Create the pod. As this is a static pod, we just need to put this inside manifests folder. Pod will be automatically created.
controlplane $ pwd
/etc/kubernetes/manifests
controlplane $ cat static-pod.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
run: static-pod
name: static-pod
spec:
containers:
- image: nginx:alpine
name: static-pod
resources:
requests:
cpu: 10m
memory: 20Mi
dnsPolicy: ClusterFirst
restartPolicy: Always
controlplane $ k get pods
NAME READY STATUS RESTARTS AGE
static-pod-controlplane 1/1 Running 0 91s
- Create the NodePort service and check the Endpoint
controlplane $ k expose pod static-pod-controlplane --port=80 --type=NodePort
service/static-pod-controlplane exposed
controlplane $ k get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 6d23h
static-pod-controlplane NodePort 10.109.9.187 <none> 80:32032/TCP 5s
controlplane $ k get ep
NAME ENDPOINTS AGE
kubernetes 172.30.1.2:6443 6d23h
static-pod-controlplane 192.168.0.7:80 15s
controlplane $ k get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
static-pod-controlplane 1/1 Running 0 2m55s 192.168.0.7 controlplane <none> <none>
- Check the service is accessible via NodePort:
controlplane $ k get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
controlplane Ready control-plane 6d23h v1.28.4 172.30.1.2 <none> Ubuntu 20.04.5 LTS 5.4.0-131-generic containerd://1.6.12
node01 Ready <none> 6d23h v1.28.4 172.30.2.2 <none> Ubuntu 20.04.5 LTS 5.4.0-131-generic containerd://1.6.12
controlplane $ curl 172.30.1.2:32032
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
html { color-scheme: light dark; }
body { width: 35em; margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif; }
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>
<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>
<p><em>Thank you for using nginx.</em></p>
</body>
</html>
Doc Ref:
- https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/
Question: Please join node01
worker node to the cluster, and you have to deploy a pod in the node01
, pod name should be web
and image should be nginx
.
Solution:
- Get a join command from the controlplane node:
controlplane $ kubeadm token create --print-join-command
kubeadm join 172.30.1.2:6443 --token e2ohcp.trxxo6qxxzriqmwe --discovery-token-ca-cert-hash sha256:533673b654759980b932c982ffe4fb647dab69687385004889743982ff9f8eee
- ssh to node01 and run the join command
node01 $ kubeadm join 172.30.1.2:6443 --token e2ohcp.trxxo6qxxzriqmwe --discovery-token-ca-cert-hash sha256:533673b654759980b932c982ffe4fb647dab69687385004889743982ff9f8eee
[preflight] Running pre-flight checks
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR FileAvailable--etc-kubernetes-kubelet.conf]: /etc/kubernetes/kubelet.conf already exists
[ERROR Port-10250]: Port 10250 is in use
[ERROR FileAvailable--etc-kubernetes-pki-ca.crt]: /etc/kubernetes/pki/ca.crt already exists
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
To see the stack trace of this error execute with --v=5 or higher
note: you can ignore those error but always go to check
- Check the kubelet status and if not running run it
node01 $ systemctl status kubelet
node01 $ systemctl start kubelet
Docs: https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-token/
Question: Create a PV and PVC. Use it in a Deployment. Given that: PV name: web-pv, capacity: 2Gi, hostPath: /vol/data, accessMode: ReadWriteOnce, no Storage class defined PVC name: web-pvc, ns: production, capacity: 2Gi, accessMode: ReadWriteOnce, no Storage class defined Deployment name: web-deploy, ns: production, image: nginx:1.14.2, Volume mount path: /tmp/web-data
Solution:
- Create the PV
apiVersion: v1
kind: PersistentVolume
metadata:
name: web-pv
labels:
type: local
spec:
capacity:
storage: 2Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/vol/data"
controlplane $ k apply -f 15-pv.yaml
persistentvolume/web-pv created
controlplane $ k get pv
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS VOLUMEATTRIBUTESCLASS REASON AGE
web-pv 2Gi RWO Retain Available <unset> 13s
- Create PVC
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: web-pvc
namespace: production
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi
controlplane $ k create ns production
namespace/production created
controlplane $ k apply -f 15-pvc.yaml
persistentvolumeclaim/web-pvc created
controlplane $ k get pvc -n production
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS VOLUMEATTRIBUTESCLASS AGE
web-pvc Pending local-path <unset> 8s
- Create deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
namespace: production
labels:
app: nginx
spec:
replicas: 3
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
volumes:
- name: data
persistentVolumeClaim:
claimName: web-pvc
containers:
- name: nginx
image: nginx:1.14.2
ports:
- containerPort: 80
volumeMounts:
- mountPath: "/tmp/web-data"
name: data
controlplane $ k apply -f 15-dpl.yaml
deployment.apps/nginx-deployment created
controlplane $ k get deploy -n production
NAME READY UP-TO-DATE AVAILABLE AGE
nginx-deployment 0/3 3 0 9s
controlplane $ k get pvc -n production
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS VOLUMEATTRIBUTESCLASS AGE
web-pvc Bound pvc-badf3c7e-413b-4907-93ad-2aeff2e626d3 2Gi RWO local-path <unset> 5m38s
controlplane $ k get pods -n production
NAME READY STATUS RESTARTS AGE
nginx-deployment-5f599f4f8b-7jfpz 1/1 Running 0 35s
nginx-deployment-5f599f4f8b-dn9lw 1/1 Running 0 35s
nginx-deployment-5f599f4f8b-ftff5 1/1 Running 0 35s
Question: Create a DaemonSet and it should run all nodes including control-plane. Maintains: name: daemon-imp, namespace: project-1, image: httpd:2.4-alpine, labels: id=daemon-imp, resouce request: 20 millicore cpu, 20 mi memory
Solution:
- Prepare the yaml and create the daemonset:
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: daemon-imp
namespace: project-1
labels:
id: daemon-imp
spec:
selector:
matchLabels:
id: daemon-imp
template:
metadata:
labels:
id: daemon-imp
spec:
tolerations:
# these tolerations are to have the daemonset runnable on control plane nodes
# remove them if your control plane nodes should not run pods
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: httpd
image: httpd:2.4-alpine
resources:
requests:
cpu: 20m
memory: 20Mi
controlplane $ k create ns project-1
namespace/project-1 created
controlplane $ k apply -f 16.yaml
daemonset.apps/daemon-imp created
controlplane $ k get nodes
NAME STATUS ROLES AGE VERSION
controlplane Ready control-plane 10d v1.30.0
node01 Ready <none> 10d v1.30.0
controlplane $ k get pods -n project-1
NAME READY STATUS RESTARTS AGE
daemon-imp-7nbt2 1/1 Running 0 13s
daemon-imp-82mz6 1/1 Running 0 13s