Copilot commented on code in PR #7270: URL: https://github.com/apache/hbase/pull/7270#discussion_r2318539027
########## hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java: ########## @@ -0,0 +1,865 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.DEFAULT_ERROR_TOLERATION_DURATION; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.CacheTestUtils; +import org.apache.hadoop.hbase.io.hfile.HFileBlock; +import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; +import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is used to test the functionality of the DataTieringManager. + * + * The mock online regions are stored in {@link TestDataTieringManager#testOnlineRegions}. + * For all tests, the setup of {@link TestDataTieringManager#testOnlineRegions} occurs only once. + * Please refer to {@link TestDataTieringManager#setupOnlineRegions()} for the structure. + * Additionally, a list of all store files is maintained in {@link TestDataTieringManager#hStoreFiles}. + * The characteristics of these store files are listed below: + * @formatter:off ## HStoreFile Information + * + * | HStoreFile | Region | Store | DataTiering | isHot | + * |------------------|--------------------|---------------------|-----------------------|-------| + * | hStoreFile0 | region1 | hStore11 | TIME_RANGE | true | + * | hStoreFile1 | region1 | hStore12 | NONE | true | + * | hStoreFile2 | region2 | hStore21 | TIME_RANGE | true | + * | hStoreFile3 | region2 | hStore22 | TIME_RANGE | false | + * @formatter:on + */ + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestDataTieringManager { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestDataTieringManager.class); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final Logger LOG = LoggerFactory.getLogger(TestDataTieringManager.class); + private static final long DAY = 24 * 60 * 60 * 1000; + private static Configuration defaultConf; + private static FileSystem fs; + private static BlockCache blockCache; + private static CacheConfig cacheConf; + private static Path testDir; + private static final Map<String, HRegion> testOnlineRegions = new HashMap<>(); + + private static DataTieringManager dataTieringManager; + private static final List<HStoreFile> hStoreFiles = new ArrayList<>(); + + /** + * Represents the current lexicographically increasing string used as a row key when writing + * HFiles. It is incremented each time {@link #nextString()} is called to generate unique row + * keys. + */ + private static String rowKeyString; + + @BeforeClass + public static void setupBeforeClass() throws Exception { + testDir = TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName()); + defaultConf = TEST_UTIL.getConfiguration(); + updateCommonConfigurations(); + assertTrue(DataTieringManager.instantiate(defaultConf, testOnlineRegions)); + dataTieringManager = DataTieringManager.getInstance(); + rowKeyString = ""; + } + + private static void updateCommonConfigurations() { + defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, true); + defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); + defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32); + } + + @FunctionalInterface + interface DataTieringMethodCallerWithPath { + boolean call(DataTieringManager manager, Path path) throws DataTieringException; + } + + @FunctionalInterface + interface DataTieringMethodCallerWithKey { + boolean call(DataTieringManager manager, BlockCacheKey key) throws DataTieringException; + } + + @Test + public void testDataTieringEnabledWithKey() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isDataTieringEnabled; + + // Test with valid key + BlockCacheKey key = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, true); + + // Test with another valid key + key = new BlockCacheKey(hStoreFiles.get(1).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, false); + + // Test with valid key with no HFile Path + key = new BlockCacheKey(hStoreFiles.get(0).getPath().getName(), 0); + testDataTieringMethodWithKeyExpectingException(methodCallerWithKey, key, + new DataTieringException("BlockCacheKey Doesn't Contain HFile Path")); + } + + @Test + public void testDataTieringEnabledWithPath() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isDataTieringEnabled; + + // Test with valid path + Path hFilePath = hStoreFiles.get(1).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Test with another valid path + hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + + // Test with an incorrect path + hFilePath = new Path("incorrectPath"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("Incorrect HFile Path: " + hFilePath)); + + // Test with a non-existing HRegion path + Path basePath = hStoreFiles.get(0).getPath().getParent().getParent().getParent(); + hFilePath = new Path(basePath, "incorrectRegion/cf1/filename"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist")); + + // Test with a non-existing HStore path + basePath = hStoreFiles.get(0).getPath().getParent().getParent(); + hFilePath = new Path(basePath, "incorrectCf/filename"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist")); + } + + @Test + public void testHotDataWithKey() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isHotData; + + // Test with valid key + BlockCacheKey key = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, true); + + // Test with another valid key + key = new BlockCacheKey(hStoreFiles.get(3).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, false); + } + + @Test + public void testHotDataWithPath() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isHotData; + + // Test with valid path + Path hFilePath = hStoreFiles.get(2).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + + // Test with another valid path + hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Test with a filename where corresponding HStoreFile in not present + hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), "incorrectFileName"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("Store file corresponding to " + hFilePath + " doesn't exist")); + } + + @Test + public void testPrefetchWhenDataTieringEnabled() throws IOException { + setPrefetchBlocksOnOpen(); + initializeTestEnvironment(); + // Evict blocks from cache by closing the files and passing evict on close. + // Then initialize the reader again. Since Prefetch on open is set to true, it should prefetch + // those blocks. + for (HStoreFile file : hStoreFiles) { + file.closeStoreFile(true); + file.initReader(); + } + + // Since we have one cold file among four files, only three should get prefetched. + Optional<Map<String, Pair<String, Long>>> fullyCachedFiles = blockCache.getFullyCachedFiles(); + assertTrue("We should get the fully cached files from the cache", fullyCachedFiles.isPresent()); + Waiter.waitFor(defaultConf, 10000, () -> fullyCachedFiles.get().size() == 3); + assertEquals("Number of fully cached files are incorrect", 3, fullyCachedFiles.get().size()); + } + + private void setPrefetchBlocksOnOpen() { + defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true); + } + + @Test + public void testColdDataFiles() throws IOException { Review Comment: [nitpick] Missing class-level javadoc. Given the complexity described in the comment block (lines 77-94), the class would benefit from proper javadoc explaining the test setup, particularly the HStoreFile information table. ########## hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCustomCellDataTieringManager.java: ########## @@ -0,0 +1,860 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.DEFAULT_ERROR_TOLERATION_DURATION; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.CacheTestUtils; +import org.apache.hadoop.hbase.io.hfile.HFileBlock; +import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; +import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is used to test the functionality of the DataTieringManager. + * + * The mock online regions are stored in {@link TestCustomCellDataTieringManager#testOnlineRegions}. + * For all tests, the setup of + * {@link TestCustomCellDataTieringManager#testOnlineRegions} occurs only once. + * Please refer to {@link TestCustomCellDataTieringManager#setupOnlineRegions()} for the structure. + * Additionally, a list of all store files is + * maintained in {@link TestCustomCellDataTieringManager#hStoreFiles}. + * The characteristics of these store files are listed below: + * @formatter:off + * ## HStoreFile Information + * | HStoreFile | Region | Store | DataTiering | isHot | + * |------------------|--------------------|---------------------|-----------------------|-------| + * | hStoreFile0 | region1 | hStore11 | CUSTOM_CELL_VALUE | true | + * | hStoreFile1 | region1 | hStore12 | NONE | true | + * | hStoreFile2 | region2 | hStore21 | CUSTOM_CELL_VALUE | true | + * | hStoreFile3 | region2 | hStore22 | CUSTOM_CELL_VALUE | false | + * @formatter:on + */ + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestCustomCellDataTieringManager { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestCustomCellDataTieringManager.class); + + private static final Logger LOG = LoggerFactory.getLogger(TestCustomCellDataTieringManager.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final long DAY = 24 * 60 * 60 * 1000; + private static Configuration defaultConf; + private static FileSystem fs; + private BlockCache blockCache; + private static CacheConfig cacheConf; + private static Path testDir; + private static final Map<String, HRegion> testOnlineRegions = new HashMap<>(); + + private static DataTieringManager dataTieringManager; + private static final List<HStoreFile> hStoreFiles = new ArrayList<>(); + + /** + * Represents the current lexicographically increasing string used as a row key when writing + * HFiles. It is incremented each time {@link #nextString()} is called to generate unique row + * keys. + */ + private static String rowKeyString; + + @BeforeClass + public static void setupBeforeClass() throws Exception { + testDir = TEST_UTIL.getDataTestDir(TestCustomCellDataTieringManager.class.getSimpleName()); + defaultConf = TEST_UTIL.getConfiguration(); + updateCommonConfigurations(); + DataTieringManager.instantiate(defaultConf, testOnlineRegions); + dataTieringManager = DataTieringManager.getInstance(); + rowKeyString = ""; + } + + private static void updateCommonConfigurations() { + defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, true); + defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); + defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32); + } + + @FunctionalInterface + interface DataTieringMethodCallerWithPath { + boolean call(DataTieringManager manager, Path path) throws DataTieringException; + } + + @FunctionalInterface + interface DataTieringMethodCallerWithKey { + boolean call(DataTieringManager manager, BlockCacheKey key) throws DataTieringException; + } + + @Test + public void testDataTieringEnabledWithKey() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isDataTieringEnabled; + + // Test with valid key + BlockCacheKey key = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, true); + + // Test with another valid key + key = new BlockCacheKey(hStoreFiles.get(1).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, false); + + // Test with valid key with no HFile Path + key = new BlockCacheKey(hStoreFiles.get(0).getPath().getName(), 0); + testDataTieringMethodWithKeyExpectingException(methodCallerWithKey, key, + new DataTieringException("BlockCacheKey Doesn't Contain HFile Path")); + } + + @Test + public void testDataTieringEnabledWithPath() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isDataTieringEnabled; + + // Test with valid path + Path hFilePath = hStoreFiles.get(1).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Test with another valid path + hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + + // Test with an incorrect path + hFilePath = new Path("incorrectPath"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("Incorrect HFile Path: " + hFilePath)); + + // Test with a non-existing HRegion path + Path basePath = hStoreFiles.get(0).getPath().getParent().getParent().getParent(); + hFilePath = new Path(basePath, "incorrectRegion/cf1/filename"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist")); + + // Test with a non-existing HStore path + basePath = hStoreFiles.get(0).getPath().getParent().getParent(); + hFilePath = new Path(basePath, "incorrectCf/filename"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist")); + } + + @Test + public void testHotDataWithKey() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isHotData; + + // Test with valid key + BlockCacheKey key = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, true); + + // Test with another valid key + key = new BlockCacheKey(hStoreFiles.get(3).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, false); + } + + @Test + public void testHotDataWithPath() throws IOException { + initializeTestEnvironment(); + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isHotData; + + // Test with valid path + Path hFilePath = hStoreFiles.get(2).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + + // Test with another valid path + hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Test with a filename where corresponding HStoreFile in not present + hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), "incorrectFileName"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("Store file corresponding to " + hFilePath + " doesn't exist")); + } + + @Test + public void testPrefetchWhenDataTieringEnabled() throws IOException { + setPrefetchBlocksOnOpen(); + this.blockCache = initializeTestEnvironment(); + // Evict blocks from cache by closing the files and passing evict on close. + // Then initialize the reader again. Since Prefetch on open is set to true, it should prefetch + // those blocks. + for (HStoreFile file : hStoreFiles) { + file.closeStoreFile(true); + file.initReader(); + } + + // Since we have one cold file among four files, only three should get prefetched. + Optional<Map<String, Pair<String, Long>>> fullyCachedFiles = blockCache.getFullyCachedFiles(); + assertTrue("We should get the fully cached files from the cache", fullyCachedFiles.isPresent()); + Waiter.waitFor(defaultConf, 10000, () -> fullyCachedFiles.get().size() == 3); + assertEquals("Number of fully cached files are incorrect", 3, fullyCachedFiles.get().size()); + } + + private void setPrefetchBlocksOnOpen() { + defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true); + } + + @Test + public void testColdDataFiles() throws IOException { + initializeTestEnvironment(); + Set<BlockCacheKey> allCachedBlocks = new HashSet<>(); + for (HStoreFile file : hStoreFiles) { + allCachedBlocks.add(new BlockCacheKey(file.getPath(), 0, true, BlockType.DATA)); + } + + // Verify hStoreFile3 is identified as cold data + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isHotData; + Path hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Verify all the other files in hStoreFiles are hot data + for (int i = 0; i < hStoreFiles.size() - 1; i++) { + hFilePath = hStoreFiles.get(i).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + } + + try { + Set<String> coldFilePaths = dataTieringManager.getColdDataFiles(allCachedBlocks); + assertEquals(1, coldFilePaths.size()); + } catch (DataTieringException e) { + fail("Unexpected DataTieringException: " + e.getMessage()); + } + } + + @Test + public void testCacheCompactedBlocksOnWriteDataTieringDisabled() throws IOException { + setCacheCompactBlocksOnWrite(); + this.blockCache = initializeTestEnvironment(); Review Comment: [nitpick] Instance variable assignment inside test methods can lead to confusion and potential side effects between tests. Consider using local variables or proper test setup/teardown methods. ########## hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java: ########## @@ -1105,6 +1114,18 @@ void freeSpace(final String why) { } } } + + if ( + bytesFreed < bytesToFreeWithExtra && coldFiles != null + && coldFiles.containsKey(bucketEntryWithKey.getKey().getHfileName()) + ) { Review Comment: [nitpick] The condition check for cold files happens inside the main loop over all cache entries. Consider pre-filtering or using a more efficient lookup mechanism to avoid checking cold files for every entry. ########## hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CustomDateTieredCompactionPolicy.java: ########## @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.compactions; + +import static org.apache.hadoop.hbase.regionserver.CustomTieringMultiFileWriter.CUSTOM_TIERING_TIME_RANGE; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import org.apache.commons.lang3.mutable.MutableLong; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HDFSBlocksDistribution; +import org.apache.hadoop.hbase.regionserver.HStoreFile; +import org.apache.hadoop.hbase.regionserver.StoreConfigInformation; +import org.apache.hadoop.hbase.regionserver.StoreUtils; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Custom implementation of DateTieredCompactionPolicy that calculates compaction boundaries based + * on the <b>hbase.hstore.compaction.date.tiered.custom.age.limit.millis</b> configuration property + * and the TIERING_CELL_MIN/TIERING_CELL_MAX stored on metadata of each store file. This policy + * would produce either one or two tiers: - One tier if either all files data age are older than the + * configured age limit or all files data age are younger than the configured age limit. - Two tiers + * if files have both younger and older data than the configured age limit. + */ [email protected] +public class CustomDateTieredCompactionPolicy extends DateTieredCompactionPolicy { + + public static final String AGE_LIMIT_MILLIS = + "hbase.hstore.compaction.date.tiered.custom.age.limit.millis"; + + // Defaults to 10 years + public static final long DEFAULT_AGE_LIMIT_MILLIS = + (long) (10L * 365.25 * 24L * 60L * 60L * 1000L); + + private static final Logger LOG = LoggerFactory.getLogger(CustomDateTieredCompactionPolicy.class); + + private long cutOffTimestamp; + + public CustomDateTieredCompactionPolicy(Configuration conf, + StoreConfigInformation storeConfigInfo) throws IOException { + super(conf, storeConfigInfo); + cutOffTimestamp = EnvironmentEdgeManager.currentTime() + - conf.getLong(AGE_LIMIT_MILLIS, DEFAULT_AGE_LIMIT_MILLIS); + + } + + @Override + protected List<Long> getCompactBoundariesForMajor(Collection<HStoreFile> filesToCompact, + long now) { + MutableLong min = new MutableLong(Long.MAX_VALUE); + MutableLong max = new MutableLong(0); + filesToCompact.forEach(f -> { + byte[] timeRangeBytes = f.getMetadataValue(CUSTOM_TIERING_TIME_RANGE); + long minCurrent = Long.MAX_VALUE; + long maxCurrent = 0; + if (timeRangeBytes != null) { + try { + TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(timeRangeBytes); + timeRangeTracker.getMin(); + minCurrent = timeRangeTracker.getMin(); + maxCurrent = timeRangeTracker.getMax(); + } catch (IOException e) { + LOG.warn("Got TIERING_CELL_TIME_RANGE info from file, but failed to parse it:", e); + } + } + if (minCurrent < min.getValue()) { + min.setValue(minCurrent); + } + if (maxCurrent > max.getValue()) { + max.setValue(maxCurrent); + } + }); + + List<Long> boundaries = new ArrayList<>(); + boundaries.add(Long.MIN_VALUE); + if (min.getValue() < cutOffTimestamp) { + boundaries.add(min.getValue()); + if (max.getValue() > cutOffTimestamp) { + boundaries.add(cutOffTimestamp); + } + } + return boundaries; + } + + @Override + public CompactionRequestImpl selectMinorCompaction(ArrayList<HStoreFile> candidateSelection, + boolean mayUseOffPeak, boolean mayBeStuck) throws IOException { + ArrayList<HStoreFile> filteredByPolicy = this.compactionPolicyPerWindow + .applyCompactionPolicy(candidateSelection, mayUseOffPeak, mayBeStuck); + return selectMajorCompaction(filteredByPolicy); + } + + @Override + public boolean shouldPerformMajorCompaction(Collection<HStoreFile> filesToCompact) + throws IOException { + long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact); + long now = EnvironmentEdgeManager.currentTime(); + if (isMajorCompactionTime(filesToCompact, now, lowTimestamp)) { + long cfTTL = this.storeConfigInfo.getStoreFileTtl(); + int countLower = 0; + int countHigher = 0; + HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); + for (HStoreFile f : filesToCompact) { + if (checkForTtl(cfTTL, f)) { + return true; + } + if (isMajorOrBulkloadResult(f, now - lowTimestamp)) { + return true; + } + byte[] timeRangeBytes = f.getMetadataValue(CUSTOM_TIERING_TIME_RANGE); Review Comment: Potential null pointer exception if timeRangeBytes is null. The parseFrom method call should be protected with a null check, similar to the pattern used elsewhere in the codebase (lines 77-86). ```suggestion byte[] timeRangeBytes = f.getMetadataValue(CUSTOM_TIERING_TIME_RANGE); if (timeRangeBytes == null) { // Skip this file if no time range metadata is present continue; } ``` ########## hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java: ########## @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.io.hfile.HFileInfo; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The DataTieringManager class categorizes data into hot data and cold data based on the specified + * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with + * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} + * determines the logic for distinguishing data into hot or cold. By default, all data is considered + * as hot. + */ [email protected] +public class DataTieringManager { + private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); + public static final String GLOBAL_DATA_TIERING_ENABLED_KEY = + "hbase.regionserver.datatiering.enable"; + public static final boolean DEFAULT_GLOBAL_DATA_TIERING_ENABLED = false; // disabled by default + public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; + public static final String DATATIERING_HOT_DATA_AGE_KEY = + "hbase.hstore.datatiering.hot.age.millis"; + public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; + public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days + private static DataTieringManager instance; + private final Map<String, HRegion> onlineRegions; + + private DataTieringManager(Map<String, HRegion> onlineRegions) { + this.onlineRegions = onlineRegions; + } + + /** + * Initializes the DataTieringManager instance with the provided map of online regions, only if + * the configuration "hbase.regionserver.datatiering.enable" is enabled. + * @param conf Configuration object. + * @param onlineRegions A map containing online regions. + * @return True if the instance is instantiated successfully, false otherwise. + */ + public static synchronized boolean instantiate(Configuration conf, + Map<String, HRegion> onlineRegions) { + if (isDataTieringFeatureEnabled(conf) && instance == null) { + instance = new DataTieringManager(onlineRegions); + LOG.info("DataTieringManager instantiated successfully."); + return true; + } else { + LOG.warn("DataTieringManager is already instantiated."); Review Comment: The warning message is misleading when the feature is disabled. When isDataTieringFeatureEnabled returns false, the warning suggests the manager is already instantiated, but it could be that the feature is simply disabled. ```suggestion if (!isDataTieringFeatureEnabled(conf)) { LOG.warn("DataTieringManager feature is disabled. Not instantiating."); } else if (instance != null) { LOG.warn("DataTieringManager is already instantiated."); } else { instance = new DataTieringManager(onlineRegions); LOG.info("DataTieringManager instantiated successfully."); return true; ``` ########## hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java: ########## @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.io.hfile.HFileInfo; +import org.apache.hadoop.hbase.util.Bytes; Review Comment: Several unused imports detected. The Bytes import appears unused as the class doesn't use any Bytes utility methods. ```suggestion ``` ########## hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CustomCellTieringValueProvider.java: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.compactions; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ArrayBackedTag; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.TagType; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * An extension of DateTieredCompactor, overriding the decorateCells method to allow for custom + * values to be used for the different file tiers during compaction. + */ [email protected] +public class CustomCellTieringValueProvider implements CustomTieredCompactor.TieringValueProvider { + public static final String TIERING_CELL_QUALIFIER = "TIERING_CELL_QUALIFIER"; + private byte[] tieringQualifier; + + @Override + public void init(Configuration conf) throws Exception { + tieringQualifier = Bytes.toBytes(conf.get(TIERING_CELL_QUALIFIER)); + } + + @Override + public List<Cell> decorateCells(List<Cell> cells) { + // if no tiering qualifier properly set, skips the whole flow Review Comment: [nitpick] Grammatical error in comment: should be 'if no tiering qualifier is properly set, skip the whole flow' or 'skips' should be 'skip'. ```suggestion // if no tiering qualifier is properly set, skip the whole flow ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
