Worker upgrading seems to cordon all the nodes at once
kke opened this issue · 1 comments
kke commented
There should be batching and I'm almost sure there used to be, I think it may have been lost with the dry-run mechanism.
kke commented
Actually it is still there:
func (p *UpgradeWorkers) Run() error {
if err := p.hosts.Each(p.cordonWorker); err != nil {
return err
}
// Upgrade worker hosts parallelly in 10% chunks
concurrentUpgrades := int(math.Floor(float64(len(p.hosts)) * 0.10))
if concurrentUpgrades == 0 {
concurrentUpgrades = 1
}
log.Infof("Upgrading max %d workers in parallel", concurrentUpgrades)
return p.hosts.BatchedParallelEach(concurrentUpgrades,
p.start,
p.cordonWorker,
p.drainWorker,
p.upgradeWorker,
p.uncordonWorker,
p.finish,
)
}