deadlock in HBaseClient.removeClientFromCache logging
jasonculverhouse opened this issue · 1 comments
jasonculverhouse commented
FYI related to
When removing the wrong region there is an ordered set of locks that can lead to the following deadlock.
} else if (oldclient != null) { // Didn't remove what we expected?!
LOG.warn("When handling disconnection of " + client
+ " and removing " + region + " from region2client"
+ ", it was found that " + oldclient + " was in fact"
+ " serving this region");
}
Found one Java-level deadlock:
=============================
"cl7d-7":
waiting to lock monitor 0x00007f88f4eee848 (object 0x00000005108d7940, a org.hbase.async.RegionClient),
which is held by "cl7d-5"
"cl7d-5":
waiting to lock monitor 0x00007f879e228098 (object 0x000000050c1afa18, a org.hbase.async.RegionClient),
which is held by "cl7d-2"
"cl7d-2":
waiting to lock monitor 0x00007f8872f1ef08 (object 0x00000005108d2b10, a org.hbase.async.RegionClient),
which is held by "cl7d-3"
"cl7d-3":
waiting to lock monitor 0x00007f88f64ca468 (object 0x00000005108d7a40, a org.hbase.async.RegionClient),
which is held by "cl7d-7"
Java stack information for the threads listed above:
===================================================
"cl7d-7":
at org.hbase.async.RegionClient.toString(RegionClient.java:2028)
- waiting to lock <0x00000005108d7940> (a org.hbase.async.RegionClient)
at java.lang.String.valueOf(String.java:2994)
at java.lang.StringBuilder.append(StringBuilder.java:131)
at org.hbase.async.HBaseClient.removeClientFromCache(HBaseClient.java:3296)
at org.hbase.async.HBaseClient.access$2200(HBaseClient.java:190)
at org.hbase.async.HBaseClient$RegionClientPipeline.handleDisconnect(HBaseClient.java:3164)
- locked <0x00000005108d7a40> (a org.hbase.async.RegionClient)
at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3109)
at org.jboss.netty.channel.Channels.close(Channels.java:812)
at org.jboss.netty.channel.AbstractChannel.close(AbstractChannel.java:206)
at org.hbase.async.HBaseClient$RegionClientIdleStateHandler.channelIdle(HBaseClient.java:3195)
at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:34)
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
at org.jboss.netty.handler.timeout.IdleStateHandler.channelIdle(IdleStateHandler.java:392)
at org.jboss.netty.handler.timeout.IdleStateHandler$1.run(IdleStateHandler.java:382)
at org.jboss.netty.channel.socket.ChannelRunnableWrapper.run(ChannelRunnableWrapper.java:40)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.processTaskQueue(AbstractNioSelector.java:391)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:315)
at org.jboss.netty.channel.socket.nio.AbstractNioWorker.run(AbstractNioWorker.java:89)
at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:178)
at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
at org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"cl7d-5":
at org.hbase.async.RegionClient.toString(RegionClient.java:2028)
- waiting to lock <0x000000050c1afa18> (a org.hbase.async.RegionClient)
at java.lang.String.valueOf(String.java:2994)
at java.lang.StringBuilder.append(StringBuilder.java:131)
at org.hbase.async.HBaseClient.removeClientFromCache(HBaseClient.java:3296)
at org.hbase.async.HBaseClient.access$2200(HBaseClient.java:190)
at org.hbase.async.HBaseClient$RegionClientPipeline.handleDisconnect(HBaseClient.java:3164)
- locked <0x00000005108d7940> (a org.hbase.async.RegionClient)
at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3109)
at org.jboss.netty.channel.Channels.close(Channels.java:812)
at org.jboss.netty.channel.AbstractChannel.close(AbstractChannel.java:206)
at org.hbase.async.HBaseClient$RegionClientIdleStateHandler.channelIdle(HBaseClient.java:3195)
at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:34)
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
at org.jboss.netty.handler.timeout.IdleStateHandler.channelIdle(IdleStateHandler.java:392)
at org.jboss.netty.handler.timeout.IdleStateHandler$1.run(IdleStateHandler.java:382)
at org.jboss.netty.channel.socket.ChannelRunnableWrapper.run(ChannelRunnableWrapper.java:40)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.processTaskQueue(AbstractNioSelector.java:391)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:315)
at org.jboss.netty.channel.socket.nio.AbstractNioWorker.run(AbstractNioWorker.java:89)
at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:178)
at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
at org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"cl7d-2":
at org.hbase.async.RegionClient.toString(RegionClient.java:2028)
- waiting to lock <0x00000005108d2b10> (a org.hbase.async.RegionClient)
at java.lang.String.valueOf(String.java:2994)
at java.lang.StringBuilder.append(StringBuilder.java:131)
at org.hbase.async.HBaseClient.removeClientFromCache(HBaseClient.java:3296)
at org.hbase.async.HBaseClient.access$2200(HBaseClient.java:190)
at org.hbase.async.HBaseClient$RegionClientPipeline.handleDisconnect(HBaseClient.java:3164)
- locked <0x000000050c1afa18> (a org.hbase.async.RegionClient)
at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3109)
at org.jboss.netty.channel.Channels.close(Channels.java:812)
at org.jboss.netty.channel.AbstractChannel.close(AbstractChannel.java:206)
at org.hbase.async.HBaseClient$RegionClientIdleStateHandler.channelIdle(HBaseClient.java:3195)
at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:34)
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
at org.jboss.netty.handler.timeout.IdleStateHandler.channelIdle(IdleStateHandler.java:392)
at org.jboss.netty.handler.timeout.IdleStateHandler$1.run(IdleStateHandler.java:382)
at org.jboss.netty.channel.socket.ChannelRunnableWrapper.run(ChannelRunnableWrapper.java:40)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.processTaskQueue(AbstractNioSelector.java:391)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:315)
at org.jboss.netty.channel.socket.nio.AbstractNioWorker.run(AbstractNioWorker.java:89)
at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:178)
at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
at org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"cl7d-3":
at org.hbase.async.RegionClient.toString(RegionClient.java:2028)
- waiting to lock <0x00000005108d7a40> (a org.hbase.async.RegionClient)
at java.lang.String.valueOf(String.java:2994)
at java.lang.StringBuilder.append(StringBuilder.java:131)
at org.hbase.async.HBaseClient.removeClientFromCache(HBaseClient.java:3296)
at org.hbase.async.HBaseClient.access$2200(HBaseClient.java:190)
at org.hbase.async.HBaseClient$RegionClientPipeline.handleDisconnect(HBaseClient.java:3164)
- locked <0x00000005108d2b10> (a org.hbase.async.RegionClient)
at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3109)
at org.jboss.netty.channel.Channels.close(Channels.java:812)
at org.jboss.netty.channel.AbstractChannel.close(AbstractChannel.java:206)
at org.hbase.async.HBaseClient$RegionClientIdleStateHandler.channelIdle(HBaseClient.java:3195)
at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:34)
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
at org.jboss.netty.handler.timeout.IdleStateHandler.channelIdle(IdleStateHandler.java:392)
at org.jboss.netty.handler.timeout.IdleStateHandler$1.run(IdleStateHandler.java:382)
at org.jboss.netty.channel.socket.ChannelRunnableWrapper.run(ChannelRunnableWrapper.java:40)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.processTaskQueue(AbstractNioSelector.java:391)
at org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:315)
at org.jboss.netty.channel.socket.nio.AbstractNioWorker.run(AbstractNioWorker.java:89)
at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:178)
at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
at org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Found 1 deadlock.
manolama commented
Merged the patch so this should be good. Thanks!