tkestack/kvass

0.1.0版本coordinator运行过程中程序崩溃异常重启

like-inspur opened this issue · 3 comments

coordinator管理prometheus开始运行正常,后来自己挂掉重启,挂掉日志如下

time="2021-03-31T10:25:47Z" level=info msg="prometheus-1 need update targets" component="shard manager" shard=prometheus-1 sts=prometheus
fatal error: concurrent map iteration and map write

goroutine 509 [running]:
runtime.throw(0x27f20af, 0x26)
        /home/wushuai/go/src/runtime/panic.go:1116 +0x72 fp=0xc0028c9ba8 sp=0xc0028c9b78 pc=0x4376b2
runtime.mapiternext(0xc0028c9c80)
        /home/wushuai/go/src/runtime/map.go:853 +0x554 fp=0xc0028c9c28 sp=0xc0028c9ba8 pc=0x410cf4
tkestack.io/kvass/pkg/discovery.(*TargetsDiscovery).ActiveTargetsByHash(0xc0000bbf80, 0x0)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/pkg/discovery/discovery.go:113 +0x17c fp=0xc0028c9d00 sp=0xc0028c9c28 pc=0x15642dc
tkestack.io/kvass/pkg/discovery.(*TargetsDiscovery).ActiveTargetsByHash-fm(0xc001b27c40)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/pkg/discovery/discovery.go:109 +0x2a fp=0xc0028c9d20 sp=0xc0028c9d00 pc=0x202c54a
tkestack.io/kvass/pkg/coordinator.(*Coordinator).runOnce(0xc00014db00, 0xc0009f4900, 0x0)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/pkg/coordinator/coordinator.go:100 +0x25d fp=0xc0028c9e50 sp=0xc0028c9d20 pc=0x1596ebd
tkestack.io/kvass/pkg/coordinator.(*Coordinator).runOnce-fm(0x0, 0x0)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/pkg/coordinator/coordinator.go:85 +0x2a fp=0xc0028c9e78 sp=0xc0028c9e50 pc=0x159c1ca
tkestack.io/kvass/pkg/utils/wait.RunUntil(0x2b588e0, 0xc000128000, 0x2b8d9a0, 0xc000688930, 0x2540be400, 0xc0028c9ef8, 0x0, 0x0)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/pkg/utils/wait/wait.go:35 +0x63 fp=0xc0028c9eb8 sp=0xc0028c9e78 pc=0x1568283
tkestack.io/kvass/pkg/coordinator.(*Coordinator).Run(0xc00014db00, 0x2b588e0, 0xc000128000, 0x11, 0x0)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/pkg/coordinator/coordinator.go:75 +0x7b fp=0xc0028c9f18 sp=0xc0028c9eb8 pc=0x1596bfb
main.glob..func1.8(0x0, 0x441256)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/cmd/kvass/coordinator.go:207 +0x9a fp=0xc0028c9f78 sp=0xc0028c9f18 pc=0x202be7a
golang.org/x/sync/errgroup.(*Group).Go.func1(0xc000aadec0, 0xc001878060)
        /home/wushuai/gospace/pkg/mod/golang.org/x/sync@v0.0.0-20200930132711-30421366ff76/errgroup/errgroup.go:57 +0x59 fp=0xc0028c9fd0 sp=0xc0028c9f78 pc=0x15001d9
runtime.goexit()
        /home/wushuai/go/src/runtime/asm_amd64.s:1374 +0x1 fp=0xc0028c9fd8 sp=0xc0028c9fd0 pc=0x46f8e1
created by golang.org/x/sync/errgroup.(*Group).Go
        /home/wushuai/gospace/pkg/mod/golang.org/x/sync@v0.0.0-20200930132711-30421366ff76/errgroup/errgroup.go:54 +0x66

goroutine 1 [semacquire, 7908 minutes]:
sync.runtime_Semacquire(0xc000aaded0)
        /home/wushuai/go/src/runtime/sema.go:56 +0x45
sync.(*WaitGroup).Wait(0xc000aadec8)
        /home/wushuai/go/src/sync/waitgroup.go:130 +0x65
golang.org/x/sync/errgroup.(*Group).Wait(0xc000aadec0, 0xc004ab1040, 0xc000abf860)
        /home/wushuai/gospace/pkg/mod/golang.org/x/sync@v0.0.0-20200930132711-30421366ff76/errgroup/errgroup.go:40 +0x31
main.glob..func1(0x3b35960, 0xc000598d20, 0x0, 0x5, 0x0, 0x0)
        /home/wushuai/gospace/src/github.com/tkestack/kvass/cmd/kvass/coordinator.go:215 +0x1085
github.com/spf13/cobra.(*Command).execute(0x3b35960, 0xc000598cd0, 0x5, 0x5, 0x3b35960, 0xc000598cd0)
        /home/wushuai/gospace/pkg/mod/github.com/spf13/cobra@v1.0.0/command.go:842 +0x47c
github.com/spf13/cobra.(*Command).ExecuteC(0x3b35c00, 0x166fc4bbac19f505, 0x3b4d020, 0x0)
        /home/wushuai/gospace/pkg/mod/github.com/spf13/cobra@v1.0.0/command.go:950 +0x375
github.com/spf13/cobra.(*Command).Execute(...)
        /home/wushuai/gospace/pkg/mod/github.com/spf13/cobra@v1.0.0/command.go:887
main.main()
        /home/wushuai/gospace/src/github.com/tkestack/kvass/cmd/kvass/main.go:42 +0x11c

日志很多,这里只截取一部分,同时发现goroutine个数特别大,是不是存在协程泄露的问题

非常感谢提供的日志,我定位一下

已发现问题,非常感谢您!: - )

在v0.1.1中已修复 : - )