erigontech/erigon

merge: lookupFileByItsRange: file not found

Closed this issue · 6 comments

main eth-mainnet:
file 1536-1540 exists but merge trying to find file 1539-1540

 ls *acc*kv
v1-accounts.0-1024.kv 
 v1-accounts.1024-1536.kv 
 v1-accounts.1536-1537.kv  
v1-accounts.1536-1540.kv 
 v1-accounts.1537-1538.kv
[INFO] [05-15|05:59:13.708] [snapshots] state merge done accounts(val:1536-1540, hist:1536-1540, idx:1536-1540), storage(val:1536-1540, hist:1536-1540, idx:1536-1540), code(val:1536-1540, hist:1536-1540, idx:1536-1540), commitment(val:1536-1540), logAddr=1536-1540, logTopic=1536-1540, traceFrom=1536-1540, traceTo=1536-1540
[INFO] [05-15|05:59:22.584] [4/12 Execution] Transaction replay      blk=19855785 tx/s=727.3 buffer=57.6MB/256.0MB stepsInDB=1.14 step=1540.3 alloc=14.8GB sys=22.1GB
[INFO] [05-15|05:59:42.594] [4/12 Execution] Transaction replay      blk=19855872 tx/s=666.6 buffer=60.3MB/256.0MB stepsInDB=1.14 step=1540.3 alloc=15.7GB sys=22.1GB
[INFO] [05-15|06:00:02.623] [4/12 Execution] Transaction replay      blk=19855961 tx/s=731.5 buffer=62.7MB/256.0MB stepsInDB=1.14 step=1540.3 alloc=16.8GB sys=22.1GB
[INFO] [05-15|06:00:04.260] P2P                                      app=caplin peers=67
[WARN] [05-15|06:00:12.542] lookupFileByItsRange: file not found     stepFrom=1539 stepTo=1540 domain=AccountKeys files=0-1024;1024-1536;1536-1540; _visibleFiles=0-1024;1024-1536;1536-1540; visibleFilesCount=3 filesCount=3
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x121b977]

goroutine 22477306 [running]:
github.com/ledgerwatch/erigon-lib/state.(*SharedDomains).replaceShortenedKeysInBranch(0xc34555ec00, {0xc20e52d24c?, 0x3?, 0x3?}, {0x6c29f75116dc, 0x224, 0x3ce74722}, 0x8f549e8c, 0x8f6c7610)
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:352 +0x317
github.com/ledgerwatch/erigon-lib/state.(*SharedDomains).LatestCommitment(0xc34555ec00, {0xc20e52d24c, 0x3, 0x3})
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:326 +0x196
github.com/ledgerwatch/erigon-lib/state.(*SharedDomainsCommitmentContext).GetBranch(0xc13d2f8230, {0xc20e52d24c, 0x3, 0x3})
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:966 +0x90
github.com/ledgerwatch/erigon-lib/commitment.(*HexPatriciaHashed).unfoldBranchNode(0xc348764000, 0x5, 0x0, 0x6)
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/hex_patricia_hashed.go:826 +0xc3
github.com/ledgerwatch/erigon-lib/commitment.(*HexPatriciaHashed).unfold(0xc348764000, {0xc30db92dc0?, 0x40?, 0x40?}, 0x1)
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/hex_patricia_hashed.go:934 +0x605
github.com/ledgerwatch/erigon-lib/commitment.(*HexPatriciaHashed).ProcessTree.func1({0xc30db92dc0, 0x40, 0x40}, {0xc30da49fe0, 0x14, 0x14})
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/hex_patricia_hashed.go:1311 +0x5e5
github.com/ledgerwatch/erigon-lib/commitment.(*UpdateTree).HashSort.func1({0xc30db92dc0?, 0x0?, 0xc35e4c5c50?}, {0xc30da49fe0?, 0xc35e4c5c50?, 0xaa2313?}, {0xc35e4c5d90?, 0x0?}, 0x1?)
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/commitment.go:946 +0x23
github.com/ledgerwatch/erigon-lib/etl.(*Collector).Load.func2({0xc30db92dc0?, 0x0?, 0x0?}, {0xc30da49fe0?, 0x23985a0?, 0x0?})
	github.com/ledgerwatch/erigon-lib@v1.0.0/etl/collector.go:255 +0x31
github.com/ledgerwatch/erigon-lib/etl.mergeSortFiles({0x296ae8c, 0xa}, {0xc1a3cc19a0, 0x2, 0xc35e4c5f18?}, 0xc35e4c5f48, {0xc001736180, 0x0, 0x0, {0x0, ...}, ...}, ...)
	github.com/ledgerwatch/erigon-lib@v1.0.0/etl/collector.go:341 +0x86e
github.com/ledgerwatch/erigon-lib/etl.(*Collector).Load(0xc2f8ea1280, {0x0, 0x0}, {0x0, 0x0}, 0xc2ca169230, {0xc001736180, 0x0, 0x0, {0x0, ...}, ...})
	github.com/ledgerwatch/erigon-lib@v1.0.0/etl/collector.go:257 +0x666
github.com/ledgerwatch/erigon-lib/commitment.(*UpdateTree).HashSort(0xc250875380, {0x31bba48, 0xc00172e2d0}, 0xc23db3d130)
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/commitment.go:945 +0x59c
github.com/ledgerwatch/erigon-lib/commitment.(*HexPatriciaHashed).ProcessTree(0xc348764000, {0x31bba48, 0xc00172e2d0}, 0xc250875380, {0xc1ca2dff80, 0xe})
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/hex_patricia_hashed.go:1291 +0x25c
github.com/ledgerwatch/erigon-lib/state.(*SharedDomainsCommitmentContext).ComputeCommitment(0xc13d2f8230, {0x31bba48, 0xc00172e2d0}, 0x1, 0x12efa7f, {0xc1ca2dff80, 0xe})
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:1098 +0x5c2
github.com/ledgerwatch/erigon-lib/state.(*SharedDomains).ComputeCommitment(...)
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:568
github.com/ledgerwatch/erigon/eth/stagedsync.flushAndCheckCommitmentV3({_, _}, _, {_, _}, _, {{0x31d1dd0, 0xc141cf5890}, 0x10000000, {0x1, ...}, ...}, ...)
	github.com/ledgerwatch/erigon/eth/stagedsync/exec3.go:1049 +0x125
github.com/ledgerwatch/erigon/eth/stagedsync.ExecV3({_, _}, _, {_, _}, _, {{0x31d1dd0, 0xc141cf5890}, 0x10000000, {0x1, ...}, ...}, ...)
	github.com/ledgerwatch/erigon/eth/stagedsync/exec3.go:955 +0x1f45
github.com/ledgerwatch/erigon/eth/stagedsync.ExecBlockV3(_, {_, _}, {{_, _}, {_, _}, _}, _, {0x31bba48, ...}, ...)
	github.com/ledgerwatch/erigon/eth/stagedsync/stage_execute.go:300 +0x3c5
github.com/ledgerwatch/erigon/eth/stagedsync.SpawnExecuteBlocksStage(_, {_, _}, {{_, _}, {_, _}, _}, _, {0x31bba48, ...}, ...)
	github.com/ledgerwatch/erigon/eth/stagedsync/stage_execute.go:408 +0x1cf
github.com/ledgerwatch/erigon/eth/stagedsync.PipelineStages.func10(0x8?, 0xc0?, 0xc346b47f60, {0x31b5910?, 0xc1c781f080?}, {{0x31f14b0, 0xc33a764960}, {0x0, 0x0}, 0x0}, ...)
	github.com/ledgerwatch/erigon/eth/stagedsync/default_stages.go:320 +0xfa
github.com/ledgerwatch/erigon/eth/stagedsync.(*Sync).runStage(0xc1c781f080, 0xc1c9be5400, {0x31d1dd0, 0xc141cf5890}, {{0x31f14b0, 0xc33a764960}, {0x0, 0x0}, 0x0}, 0x1, ...)
	github.com/ledgerwatch/erigon/eth/stagedsync/sync.go:529 +0x179
github.com/ledgerwatch/erigon/eth/stagedsync.(*Sync).Run(0xc1c781f080, {0x31d1dd0, 0xc141cf5890}, {{0x31f14b0, 0xc33a764960}, {0x0, 0x0}, 0x0}, 0x60?)
	github.com/ledgerwatch/erigon/eth/stagedsync/sync.go:399 +0x2b9
github.com/ledgerwatch/erigon/turbo/execution/eth1.(*EthereumExecutionModule).updateForkChoice(0xc1c76cbe00, {0x31bba48, 0xc00172e2d0}, {0xdd, 0xa2, 0xf, 0xaf, 0xaa, 0x5f, 0x6a, ...}, ...)
	github.com/ledgerwatch/erigon/turbo/execution/eth1/forkchoice.go:381 +0x1fa5
created by github.com/ledgerwatch/erigon/turbo/execution/eth1.(*EthereumExecutionModule).UpdateForkChoice in goroutine 22464423
	github.com/ledgerwatch/erigon/turbo/execution/eth1/forkchoice.go:84 +0x30a
exit status 2

faced same on Holesky:

[INFO] [05-15|15:02:05.817] [4/12 Execution] Transaction replay      blk=759559 tx/s=285.8 buffer=7.2MB/256.0MB stepsInDB=0.08 step=16.1 alloc=8.6GB sys=20.4GB
[INFO] [05-15|15:02:22.222] [snapshots] state merge done accounts(val:0-16, hist:0-16, idx:0-16), storage(val:0-16, hist:0-16, idx:0-16), code(val:0-16, hist:0-16, idx:0-16), commitment(val:0-16), logAddr=0-16, logTopic=0-16, traceFrom=0-16, traceTo=0-16
[INFO] [05-15|15:02:22.421] P2P                                      app=caplin peers=66
[INFO] [05-15|15:02:26.027] [4/12 Execution] Transaction replay      blk=759732 tx/s=314.3 buffer=8.0MB/256.0MB stepsInDB=0.08 step=16.1 alloc=10.1GB sys=20.4GB
[INFO] [05-15|15:02:45.972] [4/12 Execution] Transaction replay      blk=759925 tx/s=354.1 buffer=10.4MB/256.0MB stepsInDB=0.08 step=16.1 alloc=12.4GB sys=20.4GB
[INFO] [05-15|15:03:05.924] [4/12 Execution] Transaction replay      blk=760132 tx/s=374.7 buffer=15.5MB/256.0MB stepsInDB=0.08 step=16.1 alloc=9.0GB sys=20.4GB
[INFO] [05-15|15:03:22.421] P2P                                      app=caplin peers=64
[INFO] [05-15|15:03:25.823] [4/12 Execution] Transaction replay      blk=760334 tx/s=350.7 buffer=19.6MB/256.0MB stepsInDB=0.08 step=16.1 alloc=10.5GB sys=20.4GB
[INFO] [05-15|15:03:45.970] [4/12 Execution] Transaction replay      blk=760654 tx/s=564.1 buffer=27.2MB/256.0MB stepsInDB=0.08 step=16.1 alloc=6.7GB sys=20.4GB
[INFO] [05-15|15:04:05.903] [4/12 Execution] Transaction replay      blk=760975 tx/s=610.2 buffer=33.4MB/256.0MB stepsInDB=0.08 step=16.1 alloc=8.6GB sys=20.4GB
[INFO] [05-15|15:04:22.421] P2P                                      app=caplin peers=67
[INFO] [05-15|15:04:25.946] [4/12 Execution] Transaction replay      blk=761300 tx/s=518.6 buffer=41.2MB/256.0MB stepsInDB=0.08 step=16.2 alloc=11.3GB sys=20.4GB
[INFO] [05-15|15:04:36.771] [p2p] GoodPeers                          eth66=2 eth67=12 eth68=39
[INFO] [05-15|15:04:37.908] [mem] memory stats                       Rss=24.2GB Size=0B Pss=22.8GB SharedClean=2.9GB SharedDirty=0B PrivateClean=7.8GB PrivateDirty=13.6GB Referenced=24.2GB Anonymous=13.6GB Swap=0B alloc=7.8GB sys=20.4GB
[INFO] [05-15|15:04:45.819] [4/12 Execution] Transaction replay      blk=761626 tx/s=515.9 buffer=49.3MB/256.0MB stepsInDB=0.08 step=16.2 alloc=8.2GB sys=20.4GB
[INFO] [05-15|15:05:05.847] [4/12 Execution] Transaction replay      blk=761965 tx/s=575.0 buffer=58.6MB/256.0MB stepsInDB=0.08 step=16.2 alloc=11.0GB sys=20.4GB
[WARN] [05-15|15:05:09.168] lookupFileByItsRange: file not found     stepFrom=12 stepTo=14 domain=StorageKeys files=0-16; _visibleFiles=0-16; visibleFilesCount=1 filesCount=1
[WARN] [05-15|15:05:09.168] lookupFileByItsRange: file not found     stepFrom=12 stepTo=14 domain=AccountKeys files=0-16; _visibleFiles=0-16; visibleFilesCount=1 filesCount=1
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x121b82c]

goroutine 37762238 [running]:
github.com/ledgerwatch/erigon-lib/state.(*SharedDomains).replaceShortenedKeysInBranch(0xc068ad75c0, {0xc1523c4908?, 0x3?, 0x3?}, {0x72e7fdfb5b95, 0xb2, 0xabb95ca}, 0x11e1a30, 0x14dc938)
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:351 +0x1cc
github.com/ledgerwatch/erigon-lib/state.(*SharedDomains).LatestCommitment(0xc068ad75c0, {0xc1523c4908, 0x3, 0x3})
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:326 +0x196
github.com/ledgerwatch/erigon-lib/state.(*SharedDomainsCommitmentContext).GetBranch(0xc074acf400, {0xc1523c4908, 0x3, 0x3})
	github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain_shared.go:966 +0x90
github.com/ledgerwatch/erigon-lib/commitment.(*HexPatriciaHashed).unfoldBranchNode(0xc07bb7e000, 0x5, 0x0, 0x6)
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/hex_patricia_hashed.go:826 +0xc3
github.com/ledgerwatch/erigon-lib/commitment.(*HexPatriciaHashed).unfold(0xc07bb7e000, {0xc0a796cf00?, 0x40?, 0x40?}, 0x1)
	github.com/ledgerwatch/erigon-lib@v1.0.0/commitment/hex_patricia_hashed.go:934 +0x605

happens after recent ctrlc+ restart? seems like need to update file protection during merge

I didn't ctrlc or restart. node just worked at night.

Can reproduce today on yesterday's main version.

moved file lock into sd.ComputeCommitment where it originally should be. Testing.