Mongey/terraform-provider-kafka

Panic: concurrent map write

Closed this issue · 2 comments

Running an apply through TF Cloud, this panic was hit.

Stack trace from the terraform-provider-kafka_v0.4.1 plugin:

fatal error: concurrent map writes

goroutine 167 [running]:
runtime.throw(0x1145a3c, 0x15)
/opt/hostedtoolcache/go/1.16.8/x64/src/runtime/panic.go:1117 +0x72 fp=0xc000877ab0 sp=0xc000877a80 pc=0x437e92
runtime.mapassign_faststr(0xfe1e80, 0xc0004ffdd0, 0xc0005ca7f8, 0x17, 0xc0002cc7d8)
/opt/hostedtoolcache/go/1.16.8/x64/src/runtime/map_faststr.go:211 +0x3f1 fp=0xc000877b18 sp=0xc000877ab0 pc=0x414671
github.com/Mongey/terraform-provider-kafka/kafka.(*Client).extractTopics(0xc0004e56e0, 0x0, 0x0)
/home/runner/work/terraform-provider-kafka/terraform-provider-kafka/kafka/client.go:189 +0x13c fp=0xc000877b90 sp=0xc000877b18 pc=0xee89bc
github.com/Mongey/terraform-provider-kafka/kafka.(*Client).ReadTopic(0xc0004e56e0, 0xc0005068a0, 0x30, 0xc000080001, 0x2, 0xc000705200, 0x54, 0x0, 0x0, 0x54)
/home/runner/work/terraform-provider-kafka/terraform-provider-kafka/kafka/client.go:481 +0x26b fp=0xc000877cb0 sp=0xc000877b90 pc=0xeead2b
github.com/Mongey/terraform-provider-kafka/kafka.(*LazyClient).ReadTopic(0xc0001772c0, 0xc0005068a0, 0x30, 0x1, 0x1, 0xffffffffffffffff, 0xffffffffffffffff, 0x0, 0x0, 0x1)
/home/runner/work/terraform-provider-kafka/terraform-provider-kafka/kafka/lazy_client.go:74 +0xb7 fp=0xc000877d10 sp=0xc000877cb0 pc=0xef0517
github.com/Mongey/terraform-provider-kafka/kafka.topicRefreshFunc.func1(0xc000877ee0, 0xc000877ea0, 0x0, 0x0, 0x2, 0x1)
/home/runner/work/terraform-provider-kafka/terraform-provider-kafka/kafka/resource_kafka_topic.go:197 +0x119 fp=0xc000877e20 sp=0xc000877d10 pc=0xefa0f9
github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource.(*StateChangeConf).WaitForStateContext.func1(0xc000420480, 0xc000203b90, 0xc000041860, 0xc0005f70c0, 0xc0007b73b8, 0xc0007b73b0)
/home/runner/go/pkg/mod/github.com/hashicorp/terraform-plugin-sdk/v2@v2.7.1/helper/resource/state.go:110 +0x2e9 fp=0xc000877fb0 sp=0xc000877e20 pc=0xe94469
runtime.goexit()
/opt/hostedtoolcache/go/1.16.8/x64/src/runtime/asm_amd64.s:1371 +0x1 fp=0xc000877fb8 sp=0xc000877fb0 pc=0x46f0a1
created by github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource.(*StateChangeConf).WaitForStateContext
/home/runner/go/pkg/mod/github.com/hashicorp/terraform-plugin-sdk/v2@v2.7.1/helper/resource/state.go:83 +0x1c6

goroutine 1 [select]:
github.com/hashicorp/go-plugin.Serve(0xc00050de50)
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/server.go:469 +0x954
github.com/hashicorp/terraform-plugin-sdk/v2/plugin.Serve(0xc0004803c0)
/home/runner/go/pkg/mod/github.com/hashicorp/terraform-plugin-sdk/v2@v2.7.1/plugin/serve.go:122 +0xf4
main.main()
/home/runner/work/terraform-provider-kafka/terraform-provider-kafka/main.go:37 +0x1d0

goroutine 18 [chan receive]:
github.com/klauspost/compress/zstd.(*blockDec).startDecoder(0xc0000fe0d0)
/home/runner/go/pkg/mod/github.com/klauspost/compress@v1.12.2/zstd/blockdec.go:215 +0x149
created by github.com/klauspost/compress/zstd.newBlockDec
/home/runner/go/pkg/mod/github.com/klauspost/compress@v1.12.2/zstd/blockdec.go:118 +0x173

goroutine 19 [chan receive]:
github.com/klauspost/compress/zstd.(*blockDec).startDecoder(0xc0000fe1a0)
/home/runner/go/pkg/mod/github.com/klauspost/compress@v1.12.2/zstd/blockdec.go:215 +0x149
created by github.com/klauspost/compress/zstd.newBlockDec
/home/runner/go/pkg/mod/github.com/klauspost/compress@v1.12.2/zstd/blockdec.go:118 +0x173

goroutine 20 [select]:
go.opencensus.io/stats/view.(*worker).start(0xc00012a900)
/home/runner/go/pkg/mod/go.opencensus.io@v0.22.5/stats/view/worker.go:276 +0xcd
created by go.opencensus.io/stats/view.init.0
/home/runner/go/pkg/mod/go.opencensus.io@v0.22.5/stats/view/worker.go:34 +0x68

goroutine 23 [select]:
github.com/hashicorp/go-plugin.(*gRPCBrokerServer).Recv(0xc000091200, 0x0, 0x0, 0x0)
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/grpc_broker.go:121 +0x86
github.com/hashicorp/go-plugin.(*GRPCBroker).Run(0xc0003f2000)
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/grpc_broker.go:411 +0x7a
created by github.com/hashicorp/go-plugin.(*GRPCServer).Init
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/grpc_server.go:85 +0x3a5

goroutine 24 [IO wait]:
internal/poll.runtime_pollWait(0x7f6c461b4af0, 0x72, 0xffffffffffffffff)
/opt/hostedtoolcache/go/1.16.8/x64/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc00040a078, 0x72, 0x1001, 0x1000, 0xffffffffffffffff)
/opt/hostedtoolcache/go/1.16.8/x64/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
/opt/hostedtoolcache/go/1.16.8/x64/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc00040a060, 0xc0004b7000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/opt/hostedtoolcache/go/1.16.8/x64/src/internal/poll/fd_unix.go:166 +0x1d5
os.(*File).read(...)
/opt/hostedtoolcache/go/1.16.8/x64/src/os/file_posix.go:31
os.(*File).Read(0xc0000ac018, 0xc0004b7000, 0x1000, 0x1000, 0x400, 0xf99ea0, 0x1)
/opt/hostedtoolcache/go/1.16.8/x64/src/os/file.go:117 +0x77
bufio.(*Reader).Read(0xc000050f50, 0xc0000dc800, 0x400, 0x400, 0x0, 0x0, 0x0)
/opt/hostedtoolcache/go/1.16.8/x64/src/bufio/bufio.go:227 +0x222
github.com/hashicorp/go-plugin.copyChan(0x13c9f88, 0xc0004a80e0, 0xc0000409c0, 0x13a6d20, 0xc0000ac018)
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/grpc_stdio.go:181 +0xc5
created by github.com/hashicorp/go-plugin.newGRPCStdioServer
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/grpc_stdio.go:37 +0xb1

goroutine 25 [IO wait]:
internal/poll.runtime_pollWait(0x7f6c461b4920, 0x72, 0xffffffffffffffff)
/opt/hostedtoolcache/go/1.16.8/x64/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc00040a198, 0x72, 0x1001, 0x1000, 0xffffffffffffffff)
/opt/hostedtoolcache/go/1.16.8/x64/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
/opt/hostedtoolcache/go/1.16.8/x64/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc00040a180, 0xc000614000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/opt/hostedtoolcache/go/1.16.8/x64/src/internal/poll/fd_unix.go:166 +0x1d5
os.(*File).read(...)
/opt/hostedtoolcache/go/1.16.8/x64/src/os/file_posix.go:31
os.(*File).Read(0xc0000ac0c8, 0xc000614000, 0x1000, 0x1000, 0x400, 0xf99ea0, 0x1)
/opt/hostedtoolcache/go/1.16.8/x64/src/os/file.go:117 +0x77
bufio.(*Reader).Read(0xc000051750, 0xc0000dd400, 0x400, 0x400, 0x0, 0x0, 0x0)
/opt/hostedtoolcache/go/1.16.8/x64/src/bufio/bufio.go:227 +0x222
github.com/hashicorp/go-plugin.copyChan(0x13c9f88, 0xc0004a80e0, 0xc000040a20, 0x13a6d20, 0xc0000ac0c8)
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/grpc_stdio.go:181 +0xc5
created by github.com/hashicorp/go-plugin.newGRPCStdioServer
/home/runner/go/pkg/mod/github.com/hashicorp/go-plugin@v1.4.1/grpc_stdio.go:38 +0xfe

goroutine 27 [syscall]:
os/signal.signal_recv(0x0)
/opt/hostedtoolcache/go/1.16.8/x64/src/runtime/sigqueue.go:168 +0xa5
os/signal.loop()
/opt/hostedtoolcache/go/1.16.8/x64/src/os/signal/signal_unix.go:23 +0x25

Error: The terraform-provider-kafka_v0.4.1 plugin crashed!

This is always indicative of a bug within the plugin. It would be immensely
helpful if you could report the crash with the plugin's maintainers so that it
can be fixed. The output above should help diagnose the issue.

It appears that the Client is passed through to each resource, and there's no locking mechanism on the map? My go is not the best, but I would imagine that this needs a lock on it, or more ideally ensure that the map is only populated once.

I think this patch should fix the issue, even if not the most ideal.

Patch
diff --git a/kafka/client.go b/kafka/client.go
index f13502f..50ec741 100644
--- a/kafka/client.go
+++ b/kafka/client.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"log"
 	"math/rand"
+	"sync"
 	"time"
 
 	"github.com/Shopify/sarama"
@@ -26,6 +27,7 @@ type Client struct {
 	config        *Config
 	supportedAPIs map[int]int
 	topics        map[string]void
+	mu            sync.Mutex
 }
 
 func NewClient(config *Config) (*Client, error) {
@@ -472,6 +474,8 @@ func (client *Client) ReadTopic(name string, refreshMetadata bool) (Topic, error
 	}
 
 	if refreshMetadata {
+		log.Printf("[DEBUG] Acquiring refresh lock")
+		client.mu.Lock()
 		log.Printf("[DEBUG] Refreshing metadata")
 		err := c.RefreshMetadata()
 		if err != nil {
@@ -482,6 +486,8 @@ func (client *Client) ReadTopic(name string, refreshMetadata bool) (Topic, error
 		if err != nil {
 			return topic, err
 		}
+		log.Printf("[DEBUG] Releasing refresh lock")
+		client.mu.Unlock()
 	} else {
 		log.Printf("[DEBUG] skipping metadata refresh")
 	}