wchevreuil commented on code in PR #7477:
URL: https://github.com/apache/hbase/pull/7477#discussion_r2568084631
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java:
##########
@@ -19,83 +19,164 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.io.hfile.bucket.FilePathStringPool;
import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Cache Key for use with implementations of {@link BlockCache}
*/
@InterfaceAudience.Private
public class BlockCacheKey implements HeapSize, java.io.Serializable {
- private static final long serialVersionUID = -5199992013113130534L;
- private final String hfileName;
+ private static final long serialVersionUID = -5199992013113130535L; //
Changed due to format
+ // change
+
+ // New compressed format using integer file ID (when codec is available)
+ private final int hfileNameId;
+
+ private transient final FilePathStringPool stringPool;
+
+ private final int regionId;
+
+ private final int cfId;
+
private final long offset;
+
private BlockType blockType;
+
private final boolean isPrimaryReplicaBlock;
- private Path filePath;
+ private final boolean archived;
/**
- * Construct a new BlockCacheKey
+ * Constructs a new BlockCacheKey with the file name and offset only. To be
used for cache lookups
+ * only, DO NOT use this for creating keys when inserting into the cache.
Use either the
+ * overriding constructors with the path parameter or the region and cf
parameters, otherwise,
+ * region cache metrics won't be recorded properly.
* @param hfileName The name of the HFile this block belongs to.
* @param offset Offset of the block into the file
*/
public BlockCacheKey(String hfileName, long offset) {
- this(hfileName, offset, true, BlockType.DATA);
+ this(hfileName, null, null, offset, true, BlockType.DATA, false);
}
+ /**
+ * Constructs a new BlockCacheKey with the file name, offset, replica and
type only. To be used
+ * for cache lookups only, DO NOT use this for creating keys when inserting
into the cache. Use
+ * either the overriding constructors with the path parameter or the region
and cf parameters,
+ * otherwise, region cache metrics won't be recorded properly.
+ * @param hfileName The name of the HFile this block belongs to.
+ * @param offset Offset of the block into the file
+ * @param isPrimaryReplica Whether this is from primary replica
+ * @param blockType Type of block
+ */
public BlockCacheKey(String hfileName, long offset, boolean isPrimaryReplica,
BlockType blockType) {
+ this(hfileName, null, null, offset, isPrimaryReplica, blockType, false);
+ }
+
+ /**
+ * Construct a new BlockCacheKey, with file, column family and region
information. This should be
+ * used when inserting keys into the cache, so that region cache metrics are
recorded properly.
+ * @param hfileName The name of the HFile this block belongs to.
+ * @param cfName The column family name
+ * @param regionName The region name
+ * @param offset Offset of the block into the file
+ * @param isPrimaryReplica Whether this is from primary replica
+ * @param blockType Type of block
+ */
+ public BlockCacheKey(String hfileName, String cfName, String regionName,
long offset,
+ boolean isPrimaryReplica, BlockType blockType, boolean archived) {
this.isPrimaryReplicaBlock = isPrimaryReplica;
- this.hfileName = hfileName;
this.offset = offset;
this.blockType = blockType;
+ this.stringPool = FilePathStringPool.getInstance();
+ // Use string pool for file, region and cf values
+ this.hfileNameId = stringPool.encode(hfileName);
+ this.regionId = (regionName != null) ? stringPool.encode(regionName) : -1;
+ this.cfId = (cfName != null) ? stringPool.encode(cfName) : -1;
+ this.archived = archived;
}
+ /**
+ * Construct a new BlockCacheKey using a file path. File, column family and
region information
+ * will be extracted from the passed path. This should be used when
inserting keys into the cache,
+ * so that region cache metrics are recorded properly.
+ * @param hfilePath The path to the HFile
+ * @param offset Offset of the block into the file
+ * @param isPrimaryReplica Whether this is from primary replica
+ * @param blockType Type of block
+ */
public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica,
BlockType blockType) {
- this.filePath = hfilePath;
- this.isPrimaryReplicaBlock = isPrimaryReplica;
- this.hfileName = hfilePath.getName();
- this.offset = offset;
- this.blockType = blockType;
+ this(hfilePath.getName(), hfilePath.getParent().getName(),
+ hfilePath.getParent().getParent().getName(), offset, isPrimaryReplica,
blockType,
+ HFileArchiveUtil.isHFileArchived(hfilePath));
}
@Override
public int hashCode() {
- return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32));
+ return hfileNameId * 127 + (int) (offset ^ (offset >>> 32));
Review Comment:
IDs are provided by the FileStringPool.encode() method, which keeps a
ConcurrentHashMap of all Ids currently in use, and calculates new Ids for not
yet pooled strings atomically. It also keeps a limit of Integer.MAX_VALUE
possible IDs, if we reach that value, FileStringPool.encode() throws an error.
When a string is removed from the pool (FileStringPool.remove()), its
associated Id is released and can be reused. Removing of a string should only
be made when we are sure no BlockCacheKey relies on such string. It's up for
BucketCache to perform Strings removal, as it's the only component with enough
information about when a given string is no longer used (i.e. when we remove
all blocks for a given file and all files from a given region).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]