[ https://issues.apache.org/jira/browse/HBASE-27926?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17896005#comment-17896005 ]
Rushabh Shah commented on HBASE-27926: -------------------------------------- [~pankajkumar] Did you try this patch HBASE-28850 ? > DBB release too early for replication > ------------------------------------- > > Key: HBASE-27926 > URL: https://issues.apache.org/jira/browse/HBASE-27926 > Project: HBase > Issue Type: Bug > Components: Replication > Affects Versions: 3.0.0-alpha-4, 2.4.17, 2.5.5 > Reporter: Xiaolin Ha > Assignee: Xiaolin Ha > Priority: Major > > When the RS of destination cluster acts as client to forward the replicated > entries and encounters exception, the DBB will be released too early by > calling RpcResponse#done() in NettyRpcServerResponseEncoder. > The coredump and log details are as follows, > {code:java} > Stack: [0x00007f92d9e6d000,0x00007f92d9f6e000], sp=0x00007f92d9f6be18, free > space=1019kNative frames: (J=compiled Java code, j=interpreted, Vv=VM code, > C=native code)C [libc.so.6+0x89db4] _wordcopy_fwd_dest_aligned+0xd4 > Java frames: (J=compiled Java code, j=interpreted, Vv=VM code)J 3297 > sun.misc.Unsafe.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V (0 > bytes) @ 0x00007fad7d9aa267 [0x00007fad7d9aa200+0x67]j > org.apache.hadoop.hbase.util.UnsafeAccess.unsafeCopy(Ljava/lang/Object;JLjava/lang/Object;JJ)V+36j > > org.apache.hadoop.hbase.util.UnsafeAccess.copy(Ljava/nio/ByteBuffer;I[BII)V+69j > > org.apache.hadoop.hbase.util.ByteBufferUtils.copyFromBufferToArray([BLjava/nio/ByteBuffer;III)V+39j > > org.apache.hadoop.hbase.CellUtil.copyQualifierTo(Lorg/apache/hadoop/hbase/Cell;[BI)I+31J > 15658 C1 > org.apache.hadoop.hbase.CellUtil.cloneQualifier(Lorg/apache/hadoop/hbase/Cell;)[B > (18 bytes) @ 0x00007fad7e9a6c2c [0x00007fad7e9a6aa0+0x18c]j > org.apache.hadoop.hbase.ByteBufferKeyValue.getQualifierArray()[B+1j > org.apache.hadoop.hbase.client.Mutation.cellToStringMap(Lorg/apache/hadoop/hbase/Cell;)Ljava/util/Map;+12j > org.apache.hadoop.hbase.client.Mutation.toMap(I)Ljava/util/Map;+189j > org.apache.hadoop.hbase.client.Operation.toJSON(I)Ljava/lang/String;+2j > org.apache.hadoop.hbase.client.Operation.toString(I)Ljava/lang/String;+2j > org.apache.hadoop.hbase.client.Operation.toString()Ljava/lang/String;+2J 8353 > C2 > java.lang.StringBuilder.append(Ljava/lang/Object;)Ljava/lang/StringBuilder; > (9 bytes) @ 0x00007fad7ea0a1bc [0x00007fad7ea0a180+0x3c]j > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.manageLocationError(Lorg/apache/hadoop/hbase/client/Action;Ljava/lang/Exception;)V+28j > > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.groupAndSendMultiAction(Ljava/util/List;I)V+163J > 23463 C2 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.resubmit(Lorg/apache/hadoop/hbase/ServerName;Ljava/util/List;IILjava/lang/Throwable;)V > (214 bytes) @ 0x00007fad80effb54 [0x00007fad80eff7a0+0x3b4]J 19097 C2 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.receiveGlobalFailure(Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V > (312 bytes) @ 0x00007fad7ff53370 [0x00007fad7ff52fa0+0x3d0]J 20201 C1 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.access$1600(Lorg/apache/hadoop/hbase/client/AsyncRequestFutureImpl;Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V > (12 bytes) @ 0x00007fad803f31dc [0x00007fad803f3180+0x5c]J 18619 C2 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl$SingleServerRequestRunnable.run()V > (677 bytes) @ 0x00007fad7f40a8b4 [0x00007fad7f409160+0x1754]J 13220 C2 > java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V > (225 bytes) @ 0x00007fad7f3b3a28 [0x00007fad7f3b38a0+0x188]J 10884 C1 > java.util.concurrent.ThreadPoolExecutor$Worker.run()V (9 bytes) @ > 0x00007fad7db53c44 [0x00007fad7db53b40+0x104]J 7961 C1 > java.lang.Thread.run()V (17 bytes) @ 0x00007fad7d61bbfc > [0x00007fad7d61bac0+0x13c]v ~StubRoutines::call_stubStack: > [0x00007f92d9e6d000,0x00007f92d9f6e000], sp=0x00007f92d9f6be18, free > space=1019kNative frames: (J=compiled Java code, j=interpreted, Vv=VM code, > C=native code)C [libc.so.6+0x89db4] _wordcopy_fwd_dest_aligned+0xd4 > Java frames: (J=compiled Java code, j=interpreted, Vv=VM code)J 3297 > sun.misc.Unsafe.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V (0 > bytes) @ 0x00007fad7d9aa267 [0x00007fad7d9aa200+0x67]j > org.apache.hadoop.hbase.util.UnsafeAccess.unsafeCopy(Ljava/lang/Object;JLjava/lang/Object;JJ)V+36j > > org.apache.hadoop.hbase.util.UnsafeAccess.copy(Ljava/nio/ByteBuffer;I[BII)V+69j > > org.apache.hadoop.hbase.util.ByteBufferUtils.copyFromBufferToArray([BLjava/nio/ByteBuffer;III)V+39j > > org.apache.hadoop.hbase.CellUtil.copyQualifierTo(Lorg/apache/hadoop/hbase/Cell;[BI)I+31J > 15658 C1 > org.apache.hadoop.hbase.CellUtil.cloneQualifier(Lorg/apache/hadoop/hbase/Cell;)[B > (18 bytes) @ 0x00007fad7e9a6c2c [0x00007fad7e9a6aa0+0x18c]j > org.apache.hadoop.hbase.ByteBufferKeyValue.getQualifierArray()[B+1j > org.apache.hadoop.hbase.client.Mutation.cellToStringMap(Lorg/apache/hadoop/hbase/Cell;)Ljava/util/Map;+12j > org.apache.hadoop.hbase.client.Mutation.toMap(I)Ljava/util/Map;+189j > org.apache.hadoop.hbase.client.Operation.toJSON(I)Ljava/lang/String;+2j > org.apache.hadoop.hbase.client.Operation.toString(I)Ljava/lang/String;+2j > org.apache.hadoop.hbase.client.Operation.toString()Ljava/lang/String;+2J 8353 > C2 > java.lang.StringBuilder.append(Ljava/lang/Object;)Ljava/lang/StringBuilder; > (9 bytes) @ 0x00007fad7ea0a1bc [0x00007fad7ea0a180+0x3c]j > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.manageLocationError(Lorg/apache/hadoop/hbase/client/Action;Ljava/lang/Exception;)V+28j > > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.groupAndSendMultiAction(Ljava/util/List;I)V+163J > 23463 C2 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.resubmit(Lorg/apache/hadoop/hbase/ServerName;Ljava/util/List;IILjava/lang/Throwable;)V > (214 bytes) @ 0x00007fad80effb54 [0x00007fad80eff7a0+0x3b4]J 19097 C2 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.receiveGlobalFailure(Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V > (312 bytes) @ 0x00007fad7ff53370 [0x00007fad7ff52fa0+0x3d0]J 20201 C1 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.access$1600(Lorg/apache/hadoop/hbase/client/AsyncRequestFutureImpl;Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V > (12 bytes) @ 0x00007fad803f31dc [0x00007fad803f3180+0x5c]J 18619 C2 > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl$SingleServerRequestRunnable.run()V > (677 bytes) @ 0x00007fad7f40a8b4 [0x00007fad7f409160+0x1754]J 13220 C2 > java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V > (225 bytes) @ 0x00007fad7f3b3a28 [0x00007fad7f3b38a0+0x188]J 10884 C1 > java.util.concurrent.ThreadPoolExecutor$Worker.run()V (9 bytes) @ > 0x00007fad7db53c44 [0x00007fad7db53b40+0x104]J 7961 C1 > java.lang.Thread.run()V (17 bytes) @ 0x00007fad7d61bbfc > [0x00007fad7d61bac0+0x13c]v ~StubRoutines::call_stub {code} > {code:java} > 2023-06-13 09:04:08,106 ERROR > [RpcServer.replication.FPBQ.Fifo.handler=9,queue=0,port=16020] > regionserver.ReplicationSink: Unable to accept edit because: > org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException: Failed > 816 actions: NotServingRegionException: 816 times, servers with issues: > XXXXXX,XXXX,1686558427722, > at > org.apache.hadoop.hbase.client.BatchErrors.makeException(BatchErrors.java:53) > at > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.getErrors(AsyncRequestFutureImpl.java:1309) > at org.apache.hadoop.hbase.client.HTable.batch(HTable.java:936) > at org.apache.hadoop.hbase.client.HTable.batch(HTable.java:927) > at > org.apache.hadoop.hbase.replication.regionserver.ReplicationSink.batch(ReplicationSink.java:434) > at > org.apache.hadoop.hbase.replication.regionserver.ReplicationSink.replicateEntries(ReplicationSink.java:261) > at > org.apache.hadoop.hbase.replication.regionserver.Replication.replicateLogEntries(Replication.java:187) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.replicateWALEntry(RSRpcServices.java:2094) > at > org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos$AdminService$2.callBlockingMethod(AdminProtos.java:32335) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:396) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:135) > at org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:103) > at org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:83) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)