clone controller shallow copy bug
1978629634 opened this issue · 4 comments
What happened:
If the cloner controller directly updates the PVC reference without using a deep copy and then calls the Update() method, it can lead to a situation where the cached PVC is updated (removing the finalizer), while the actual PVC remains unmodified in case the Update() operation fails.
// syncClaimHandler gets the claim from informer's cache then calls syncClaim
func (p *CloningProtectionController) syncClaimHandler(ctx context.Context, key string) error {
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key))
return nil
}
// shallow copy
claim, err := p.claimLister.PersistentVolumeClaims(namespace).Get(name)
if err != nil {
if apierrs.IsNotFound(err) {
utilruntime.HandleError(fmt.Errorf("Item '%s' in work queue no longer exists", key))
return nil
}
return err
}
return p.syncClaim(ctx, claim)
}
// syncClaim removes finalizers from a PVC, when cloning is finished
func (p *CloningProtectionController) syncClaim(ctx context.Context, claim *v1.PersistentVolumeClaim) error {
if !checkFinalizer(claim, pvcCloneFinalizer) {
return nil
}
// Checking for PVCs in the same namespace to have other states aside from Pending, which means that cloning is still in progress
pvcList, err := p.claimLister.PersistentVolumeClaims(claim.Namespace).List(labels.Everything())
if err != nil {
return err
}
// Check for pvc state with DataSource pointing to claim
for _, pvc := range pvcList {
if pvc.Spec.DataSource == nil {
continue
}
// Requeue when at least one PVC is still works on cloning
if pvc.Spec.DataSource.Kind == pvcKind &&
pvc.Spec.DataSource.Name == claim.Name &&
pvc.Status.Phase == v1.ClaimPending {
return fmt.Errorf("PVC '%s' is in 'Pending' state, cloning in progress", pvc.Name)
}
}
// Remove clone finalizer
finalizers := make([]string, 0)
for _, finalizer := range claim.ObjectMeta.Finalizers {
if finalizer != pvcCloneFinalizer {
finalizers = append(finalizers, finalizer)
}
}
claim.ObjectMeta.Finalizers = finalizers
if _, err = p.client.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(ctx, claim, metav1.UpdateOptions{}); err != nil {
if !apierrs.IsNotFound(err) {
// Couldn't remove finalizer and the object still exists, the controller may
// try to remove the finalizer again on the next update
klog.Infof("failed to remove clone finalizer from PVC %v", claim.Name)
return err
}
}
return nil
}
What you expected to happen:
deep copy before update()
How to reproduce it:
Anything else we need to know?:
Environment:
- Driver version:
- Kubernetes version (use
kubectl version
): - OS (e.g. from /etc/os-release):
- Kernel (e.g.
uname -a
): - Install tools:
- Others:
@pohly I'm facing an issue and would appreciate if you could take a look when you have a chance.
Here's a link to the relevant source code:
external-provisioner/pkg/controller/clone_controller.go
Lines 202 to 204 in 33c5c1f
@1978629634: your analysis seems correct to me, a DeepCopy
seems appropriate here. Care to submit a PR?
Here's a link to the relevant source code:
external-provisioner/pkg/controller/clone_controller.go
Lines 202 to 204 in 33c5c1f
@1978629634: your analysis seems correct to me, a
DeepCopy
seems appropriate here. Care to submit a PR?
Sure, I'd be happy to submit a PR for that fix
/assign