failed to recover split

This is caused by the region worker being blocked by a recover split task after role changed. That is why some new leaders can not apply any change sets to the kv engine, but the followers can apply. We can't schedule the recover split task until the region stage is SplitStage_PRE_SPLIT_FLUSH_DONE or SplitStage_SPLIT_FILE_DONE

unistore/tikv/raftstore/peer.go

Lines 739 to 749 in b4ca36b

    
           if shardMeta.SplitStage != enginepb.SplitStage_INITIAL { 
        
           	p.Store().regionSched <- task{ 
        
           		tp: taskTypeRecoverSplit, 
        
           		data: &regionTask{ 
        
           			region:    p.Region(), 
        
           			peer:      p.Meta, 
        
           			stage:     shardMeta.SplitStage, 
        
           			splitKeys: shardMeta.PreSplitKeys(), 
        
           		}, 
        
           	} 
        
           }

unistore/tikv/raftstore/worker.go

Lines 476 to 480 in b4ca36b

    
           case enginepb.SplitStage_PRE_SPLIT, enginepb.SplitStage_PRE_SPLIT_FLUSH_DONE: 
        
           	err := splitShardFiles(r.router, r.kv, task.peer, task.region) 
        
           	if err != nil { 
        
           		return err 
        
           	}

unistore/engine/split.go

Lines 91 to 99 in b4ca36b

    
           func (en *Engine) waitForPreSplitFlushState(shard *Shard) { 
        
           	for { 
        
           		switch shard.GetSplitStage() { 
        
           		case enginepb.SplitStage_PRE_SPLIT_FLUSH_DONE, enginepb.SplitStage_SPLIT_FILE_DONE: 
        
           			return 
        
           		} 
        
           		time.Sleep(time.Millisecond * 100) 
        
           	} 
        
           }

Store-1:

09:04.381 [worker.go:512] ["shard 150:48 apply change set flush stage INITIAL seq 54695"]
09:13.914 [peer.go:887] ["150:48 handle change set set engine meta, apply change shardID:150 shardVer:48 stage:PRE_SPLIT sequence:56571 "]
09:13.921 [shard.go:348] ["shard 150:48 set split stage PRE_SPLIT"]
09:15.085 [raft.go:765] ["150 became follower at term 7"]
09:15.085 [peer.go:721] ["shard 150:48 set passive true"]
09:15.545 [peer.go:887] ["150:48 handle change set set engine meta, apply change shardID:150 shardVer:48 flush commitTS:61988 stage:PRE_SPLIT_FLUSH_DONE sequence:56856"]
09:15.546 [worker.go:512] ["shard 150:48 apply change set flush stage PRE_SPLIT_FLUSH_DONE seq 56856"]
09:15.609 [shard.go:348] ["shard 150:48 set split stage PRE_SPLIT_FLUSH_DONE"]
09:15.625 [fsm_peer.go:1410] ["150:48 peer store split stage is changed from INITIAL to PRE_SPLIT_FLUSH_DONE"]

Store-2:

09:04.338 [worker.go:512] ["shard 150:48 apply change set flush stage INITIAL seq 54695"]
09:13.863 [worker.go:214] ["split region 150:48 by checker size:16000000"]
09:13.902 [peer.go:887] ["150:48 handle change set set engine meta, apply change shardID:150 shardVer:48 stage:PRE_SPLIT sequence:56571 "]
09:13.906 [shard.go:348] ["shard 150:48 set split stage PRE_SPLIT"]
09:15.089 [raft.go:765] ["150 became follower at term 7"]
09:15.089 [peer.go:721] ["shard 150:48 set passive true"]
09:15.271 [flush.go:99] ["150:48 flush memtable id:426720350718918670, size:89395560, l0 size: 40111762, commitTS: 61988"]
09:15.552 [peer.go:887] ["150:48 handle change set set engine meta, apply change shardID:150 shardVer:48 flush commitTS:61988 stage:PRE_SPLIT_FLUSH_DONE sequence:56856"]
09:15.553 [worker.go:512] ["shard 150:48 apply change set flush stage PRE_SPLIT_FLUSH_DONE seq 56856"]
09:15.602 [shard.go:348] ["shard 150:48 set split stage PRE_SPLIT_FLUSH_DONE"]
09:15.605 [fsm_peer.go:1410] ["150:48 peer store split stage is changed from INITIAL to PRE_SPLIT_FLUSH_DONE"]
09:15.772 [worker.go:235] ["shard 150:48 failed to split files err: region 150 is not leader"]
09:15.772 [worker.go:217] ["region 150:48 failed to send check result err: failed to split files: region 150 is not leader"]

Store-3:

09:04.379 [worker.go:512] ["shard 150:48 apply change set flush stage INITIAL seq 54695"]
09:13.914 [peer.go:887] ["150:48 handle change set set engine meta, apply change shardID:150 shardVer:48 stage:PRE_SPLIT sequence:56571 "]
09:13.921 [shard.go:348] ["shard 150:48 set split stage PRE_SPLIT"]
09:15.086 [raft.go:830] ["150 became leader at term 7"]
09:15.086 [peer.go:721] ["shard 150:48 set passive false"]
09:15.086 [engine.go:801] ["150:48 trigger flush mem table ver:61988"]
09:15.086 [peer.go:741] ["shard 150:48 recover split"]
09:15.353 [flush.go:99] ["150:48 flush memtable id:426720350718918657, size:89395560, l0 size: 40111762, commitTS: 61988"]
09:15.544 [peer.go:887] ["150:48 handle change set set engine meta, apply change shardID:150 shardVer:48 flush commitTS:61988 stage:PRE_SPLIT_FLUSH_DONE sequence:56856"]
09:24.880 [worker.go:214] ["split region 150:48 by checker size:16000000"]
09:24.881 [worker.go:260] ["shard 150:48 wrong split stage PRE_SPLIT"]
09:24.881 [worker.go:230] ["shard 150:48 failed to pre-split region err: wrong split stage PRE_SPLIT"]
09:24.881 [worker.go:217] ["region 150:48 failed to send check result err: failed to pre-split region: wrong split stage PRE_SPLIT"]

Fixed by #615

	if shardMeta.SplitStage != enginepb.SplitStage_INITIAL {
	p.Store().regionSched <- task{
	tp: taskTypeRecoverSplit,
	data: &regionTask{
	region: p.Region(),
	peer: p.Meta,
	stage: shardMeta.SplitStage,
	splitKeys: shardMeta.PreSplitKeys(),
	},
	}
	}

	case enginepb.SplitStage_PRE_SPLIT, enginepb.SplitStage_PRE_SPLIT_FLUSH_DONE:
	err := splitShardFiles(r.router, r.kv, task.peer, task.region)
	if err != nil {
	return err
	}

	func (en Engine) waitForPreSplitFlushState(shard Shard) {
	for {
	switch shard.GetSplitStage() {
	case enginepb.SplitStage_PRE_SPLIT_FLUSH_DONE, enginepb.SplitStage_SPLIT_FILE_DONE:
	return
	}
	time.Sleep(time.Millisecond * 100)
	}
	}