jpountz commented on PR #13224: URL: https://github.com/apache/lucene/pull/13224#issuecomment-2022824497
The idea makes sense to me, but the fact that we're checking for a specific `blockShift` of `16` looks fragile to me. If codecs change the value of `blockShift` tomorrow, this will break this optimization and we won't know about it. I haven't tested it, but I believe that we don't actually need to test for a specific `blockShift` value and could do something like below instead (should be seen as a quick hack): ```diff diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java index 1f5954ec05f..af43da8b948 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java @@ -39,6 +39,10 @@ public final class DirectMonotonicReader extends LongValues implements Accountab * from disk. */ public static class Meta implements Accountable { + + // Use a shift of 63 so that there would be a single block regardless of the number of values. + private static final Meta SINGLE_ZERO_BLOCK = new Meta(1, 63); + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Meta.class); @@ -49,19 +53,32 @@ public final class DirectMonotonicReader extends LongValues implements Accountab final byte[] bpvs; final long[] offsets; - Meta(long numValues, int blockShift) { - this.blockShift = blockShift; + public static Meta of(long numValues, int blockShift) { long numBlocks = numValues >>> blockShift; if ((numBlocks << blockShift) < numValues) { numBlocks += 1; } - this.numBlocks = (int) numBlocks; + return new Meta(Math.toIntExact(numBlocks), blockShift); + } + + private Meta(int numBlocks, int blockShift) { + this.blockShift = blockShift; + this.numBlocks = numBlocks; this.mins = new long[this.numBlocks]; this.avgs = new float[this.numBlocks]; this.bpvs = new byte[this.numBlocks]; this.offsets = new long[this.numBlocks]; } + public boolean isFullOfZeroes() { + for (int i = 0; i < numBlocks; ++i) { + if (mins[i] != 0 || bpvs[i] != 0 || avgs[i] != 0) { + return false; + } + } + return true; + } + @Override public long ramBytesUsed() { return BASE_RAM_BYTES_USED @@ -79,13 +96,16 @@ public final class DirectMonotonicReader extends LongValues implements Accountab */ public static Meta loadMeta(IndexInput metaIn, long numValues, int blockShift) throws IOException { - Meta meta = new Meta(numValues, blockShift); + Meta meta = Meta.of(numValues, blockShift); for (int i = 0; i < meta.numBlocks; ++i) { meta.mins[i] = metaIn.readLong(); meta.avgs[i] = Float.intBitsToFloat(metaIn.readInt()); meta.offsets[i] = metaIn.readLong(); meta.bpvs[i] = metaIn.readByte(); } + if (meta.isFullOfZeroes()) { + return Meta.SINGLE_ZERO_BLOCK; + } return meta; } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org