AutoMQ/automq

[BUG] checkFailoverSuccess throws IllegalReferenceCountException

Closed this issue · 2 comments

Related PR: #2051

2024-10-18 14:52:22.440 +0800 ERROR [kafka.server.streamaspect.ElasticKafkaApis]  [KafkaApi-1018] Unexpected error handling request RequestHeader(apiKey=METADATA, apiVersion=12, clientId=consumer-default-group-5, correlationId=87495, headerVersion=2) -- MetadataRequestData(topics=[MetadataRequestTopic(topicId=AAAAAAAAAAAAAAAAAAAAAA, name='default_5'), MetadataRequestTopic(topicId=AAAAAAAAAAAAAAAAAAAAAA, name='default_9'), MetadataRequestTopic(topicId=AAAAAAAAAAAAAAAAAAAAAA, name='default_8'), MetadataRequestTopic(topicId=AAAAAAAAAAAAAAAAAAAAAA, name='default_7'), MetadataRequestTopic(topicId=AAAAAAAAAAAAAAAAAAAAAA, name='default_6')], allowAutoTopicCreation=true, includeClusterAuthorizedOperations=false, includeTopicAuthorizedOperations=false) with context RequestContext(header=RequestHeader(apiKey=METADATA, apiVersion=12, clientId=consumer-default-group-5, correlationId=87495, headerVersion=2), connectionId='172.16.2.155:9092-172.16.0.203:47750-44', clientAddress=/172.16.0.203, principal=User:ANONYMOUS, listenerName=ListenerName(PLAINTEXT), securityProtocol=PLAINTEXT, clientInformation=ClientInformation(softwareName=apache-kafka-java, softwareVersion=3.7.0), fromPrivilegedListener=false, principalSerde=Optional[org.apache.kafka.common.security.authenticator.DefaultKafkaPrincipalBuilder@afa82db])
io.netty.util.IllegalReferenceCountException: refCnt: 0, increment: 1
	at io.netty.util.internal.ReferenceCountUpdater.retain0(ReferenceCountUpdater.java:133)
	at io.netty.util.internal.ReferenceCountUpdater.retain(ReferenceCountUpdater.java:120)
	at io.netty.util.AbstractReferenceCounted.retain(AbstractReferenceCounted.java:61)
	at org.apache.kafka.image.MetadataImage.retain(MetadataImage.java:249)
	at kafka.server.metadata.KRaftMetadataCache.retainedImage(KRaftMetadataCache.scala:632)
	at kafka.server.metadata.KRaftMetadataCache.safeRun(KRaftMetadataCache.scala:620)
	at kafka.server.metadata.KRaftMetadataCache.checkFailoverSuccess(KRaftMetadataCache.scala:97)
	at kafka.server.metadata.KRaftMetadataCache.$anonfun$getPartitionMetadata$1(KRaftMetadataCache.scala:146)
	at scala.collection.StrictOptimizedIterableOps.map(StrictOptimizedIterableOps.scala:100)
	at scala.collection.StrictOptimizedIterableOps.map$(StrictOptimizedIterableOps.scala:87)
	at scala.collection.convert.JavaCollectionWrappers$JSetWrapper.map(JavaCollectionWrappers.scala:215)
	at kafka.server.metadata.KRaftMetadataCache.getPartitionMetadata(KRaftMetadataCache.scala:137)
	at kafka.server.metadata.KRaftMetadataCache.$anonfun$getTopicMetadata$1(KRaftMetadataCache.scala:302)
	at scala.collection.immutable.List.flatMap(List.scala:294)
	at scala.collection.immutable.List.flatMap(List.scala:79)
	at kafka.server.metadata.KRaftMetadataCache.getTopicMetadata(KRaftMetadataCache.scala:301)
	at kafka.server.KafkaApis.getTopicMetadata(KafkaApis.scala:1281)
	at kafka.server.KafkaApis.handleTopicMetadataRequest(KafkaApis.scala:1392)
	at kafka.server.KafkaApis.handle(KafkaApis.scala:193)
	at kafka.server.streamaspect.ElasticKafkaApis.handle(ElasticKafkaApis.scala:197)
	at kafka.server.KafkaRequestHandler.run(KafkaRequestHandler.scala:163)
	at java.base/java.lang.Thread.run(Thread.java:840)


sorry for the late reply.

I think this may cause by the refcount not incr when publish to the automq relate Image user.

When created the MetadataImage the refCount is 1
but this MetadataImage is used both at StreamMetadataManager and KRaftMetadataCache. the refcount should incr to 2.

if the StreamMetadataManager release all the refCount but the KRaftMetadataCache still use it this may cause this exception , I guess.

I'll submit one pr try to fix by this way later this day.