Copilot commented on code in PR #7172: URL: https://github.com/apache/hbase/pull/7172#discussion_r2232849421
########## hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestBidirectionSerialReplicationStuck.java: ########## @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.replication; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.ReplicationTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ ReplicationTests.class, LargeTests.class }) +public class TestBidirectionSerialReplicationStuck extends TestReplicationBase { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestBidirectionSerialReplicationStuck.class); + + @Override + protected boolean isSerialPeer() { + return true; + } + + @Override + public void setUpBase() throws Exception { + UTIL1.ensureSomeRegionServersAvailable(2); + hbaseAdmin.balancerSwitch(false, true); + addPeer(PEER_ID2, tableName, UTIL1, UTIL2); + addPeer(PEER_ID2, tableName, UTIL2, UTIL1); + } + + @Override + public void tearDownBase() throws Exception { + removePeer(PEER_ID2, UTIL1); + removePeer(PEER_ID2, UTIL2); + } + + @Test + public void testStuck() throws Exception { + // disable the peer cluster1 -> cluster2 + hbaseAdmin.disableReplicationPeer(PEER_ID2); + byte[] qualifier = Bytes.toBytes("q"); + htable1.put(new Put(Bytes.toBytes("aaa-1")).addColumn(famName, qualifier, Bytes.toBytes(1))); + + // add a row to cluster2 and wait it replicate back to cluster1 + htable2.put(new Put(Bytes.toBytes("aaa-2")).addColumn(famName, qualifier, Bytes.toBytes(2))); + UTIL1.waitFor(30000, () -> htable1.exists(new Get(Bytes.toBytes("aaa-2")))); + + // kill the region server which holds the region which contains our rows + UTIL1.getRSForFirstRegionInTable(tableName).abort("for testing"); + // wait until the region is online + UTIL1.waitFor(30000, () -> htable1.exists(new Get(Bytes.toBytes("aaa-2")))); + + // put a new row in cluster1 + htable1.put(new Put(Bytes.toBytes("aaa-3")).addColumn(famName, qualifier, Bytes.toBytes(3))); + + // enable peer cluster1 -> cluster2, the new row should be replicated to cluster2 + hbaseAdmin.enableReplicationPeer(PEER_ID2); + UTIL1.waitFor(30000000, () -> htable2.exists(new Get(Bytes.toBytes("aaa-3")))); Review Comment: The timeout value of 30000000 milliseconds (30 seconds) appears to be incorrectly specified as 30 million milliseconds (8.33 hours). This should likely be 30000 milliseconds to match the other timeouts in the test. ```suggestion UTIL1.waitFor(30000, () -> htable2.exists(new Get(Bytes.toBytes("aaa-3")))); ``` ########## hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/SerialReplicationSourceWALReader.java: ########## @@ -44,6 +48,10 @@ public class SerialReplicationSourceWALReader extends ReplicationSourceWALReader // skip filtering. private Cell firstCellInEntryBeforeFiltering; + private String encodedRegionName; + + private long sequenceId = HConstants.NO_SEQNUM; Review Comment: The sequenceId field is not reset to HConstants.NO_SEQNUM after being used. This could lead to stale sequence IDs being recorded if subsequent entries don't have global scope. Consider resetting it in the removeEntryFromStream method. ########## hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ScopeWALEntryFilter.java: ########## @@ -39,13 +39,14 @@ public class ScopeWALEntryFilter implements WALEntryFilter, WALCellFilter { @Override public Entry filter(Entry entry) { - // Do not filter out an entire entry by replication scopes. As now we support serial - // replication, the sequence id of a marker is also needed by upper layer. We will filter out - // all the cells in the filterCell method below if the replication scopes is null or empty. + NavigableMap<byte[], Integer> scopes = entry.getKey().getReplicationScopes(); + if (scopes == null || scopes.isEmpty()) { + return null; Review Comment: This change reverts the filtering behavior for entries with null or empty scopes, which contradicts the comment that was removed about not filtering entire entries for serial replication. This could break serial replication marker handling. ```suggestion // Do not filter out the entire entry if scopes are null or empty to preserve serial replication markers if (scopes == null || scopes.isEmpty()) { return entry; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org