zhtaoxiang commented on code in PR #11729: URL: https://github.com/apache/pinot/pull/11729#discussion_r1349349584
########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/ForwardIndexReader.java: ########## @@ -916,4 +916,83 @@ default byte[][] getBytesMV(int docId, T context) { default int getNumValuesMV(int docId, T context) { throw new UnsupportedOperationException(); } + + class ValueRange { Review Comment: Constructor, getters and tostring can be simplified using lombok after https://github.com/apache/pinot/pull/11742 ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/FixedBytePower2ChunkSVForwardIndexReader.java: ########## @@ -111,4 +115,43 @@ protected ByteBuffer getChunkBuffer(int docId, ChunkReaderContext context) { } return decompressChunk(chunkId, context); } + + protected void recordDocIdRanges(int docId, ChunkReaderContext context, List<ValueRange> ranges) { + int chunkId = docId >>> _shift; + if (context.getChunkId() == chunkId) { + ranges.addAll(context.getRanges()); + return; + } + recordChunkRanges(chunkId, context, ranges); + } + + @Override + public List<ValueRange> getDocIdRange(int docId, ChunkReaderContext context, @Nullable List<ValueRange> ranges) { + if (ranges == null) { + ranges = new ArrayList<>(); + } + if (_isCompressed) { + recordDocIdRanges(docId, context, ranges); + } else { + // If uncompressed, should use fixed offset + throw new IllegalStateException("Operation not supported since the forward index is of fixed length type"); + } + + return ranges; + } + + @Override + public boolean isFixedLengthType() { + return !_isCompressed; + } + + @Override + public long getBaseOffset() { + return _rawDataStart; + } + + @Override + public int getDocLength() { + return _storedType.size(); + } Review Comment: same here: should we throw exception when `isFixedLengthType()` returns false? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/FixedByteChunkSVForwardIndexReader.java: ########## @@ -91,4 +95,34 @@ public double getDouble(int docId, ChunkReaderContext context) { return _rawData.getDouble(docId * Double.BYTES); } } + + @Override + public List<ValueRange> getDocIdRange(int docId, ChunkReaderContext context, @Nullable List<ValueRange> ranges) { + if (!_isCompressed) { + // If uncompressed, should use fixed offset + throw new IllegalStateException("Operation not supported since the forward index is fixed length type"); + } + + if (ranges == null) { + ranges = new ArrayList<>(); + } + recordDocIdRanges(docId, context, ranges); + + return ranges; + } + + @Override + public boolean isFixedLengthType() { + return !_isCompressed; + } + + @Override + public long getBaseOffset() { + return _rawDataStart; + } + + @Override + public int getDocLength() { + return _storedType.size(); Review Comment: should we throw exception when `isFixedLengthType()` returns false? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/ChunkReaderContext.java: ########## @@ -56,6 +62,14 @@ public void setChunkId(int chunkId) { _chunkId = chunkId; } + public void setRanges(List<ForwardIndexReader.ValueRange> ranges) { Review Comment: similarly, use lombok to simplify code ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/ForwardIndexReader.java: ########## @@ -916,4 +916,83 @@ default byte[][] getBytesMV(int docId, T context) { default int getNumValuesMV(int docId, T context) { throw new UnsupportedOperationException(); } + + class ValueRange { + private final long _offset; + private final int _size; + // To tell if size uses bit or byte as unit, for fwd index reader reading values of fixed bits. + private final boolean _isSizeOfBit; + + public static ValueRange newByteRange(long offset, int sizeInBytes) { + return new ValueRange(offset, sizeInBytes, false); + } + + public static ValueRange newBitRange(long offset, int sizeInBits) { + return new ValueRange(offset, sizeInBits, true); + } + + private ValueRange(long offset, int size, boolean isSizeOfBit) { + _offset = offset; + _size = size; + _isSizeOfBit = isSizeOfBit; + } + + public long getOffset() { + return _offset; + } + + public int getSize() { + return _size; + } + + public boolean isSizeOfBit() { + return _isSizeOfBit; + } + + @Override + public String toString() { + return "Range{" + "_offset=" + _offset + ", _size=" + _size + ", _isSizeOfBit=" + _isSizeOfBit + '}'; + } + } + + /** + * An interface that enables the caller to get byte ranges accessed while reading a given docId. + * This can be used to eventually allow prefetching of exact byte ranges during query processing, + * if the docIds to be fetched are known beforehand. + * + * This interface will be implemented by all the forward index readers. + */ + interface ValueRangeProvider<T extends ForwardIndexReaderContext> { + /** + * Returns a list of {@link ValueRange} that represents all the distinct + * buffer byte ranges (absolute offset, sizeInBytes) that are accessed when reading the given (@param docId} + * @param docId to find the range for Review Comment: can you please also add java doc for other parameters? currently, we only have java doc for the first parameter ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/ForwardIndexReader.java: ########## @@ -916,4 +916,83 @@ default byte[][] getBytesMV(int docId, T context) { default int getNumValuesMV(int docId, T context) { throw new UnsupportedOperationException(); } + + class ValueRange { + private final long _offset; + private final int _size; + // To tell if size uses bit or byte as unit, for fwd index reader reading values of fixed bits. + private final boolean _isSizeOfBit; + + public static ValueRange newByteRange(long offset, int sizeInBytes) { + return new ValueRange(offset, sizeInBytes, false); + } + + public static ValueRange newBitRange(long offset, int sizeInBits) { + return new ValueRange(offset, sizeInBits, true); + } + + private ValueRange(long offset, int size, boolean isSizeOfBit) { + _offset = offset; + _size = size; + _isSizeOfBit = isSizeOfBit; + } + + public long getOffset() { + return _offset; + } + + public int getSize() { + return _size; + } + + public boolean isSizeOfBit() { + return _isSizeOfBit; + } + + @Override + public String toString() { + return "Range{" + "_offset=" + _offset + ", _size=" + _size + ", _isSizeOfBit=" + _isSizeOfBit + '}'; + } + } + + /** + * An interface that enables the caller to get byte ranges accessed while reading a given docId. + * This can be used to eventually allow prefetching of exact byte ranges during query processing, + * if the docIds to be fetched are known beforehand. + * + * This interface will be implemented by all the forward index readers. + */ + interface ValueRangeProvider<T extends ForwardIndexReaderContext> { + /** + * Returns a list of {@link ValueRange} that represents all the distinct + * buffer byte ranges (absolute offset, sizeInBytes) that are accessed when reading the given (@param docId} + * @param docId to find the range for + * @return List of {@link ValueRange} that are accessed while reading the given docId + */ + List<ValueRange> getDocIdRange(int docId, T context, @Nullable List<ValueRange> ranges); Review Comment: could you please help me understand why we need to return the same type (do we want to change the return type to void?)? IIUC, we want to modify the parameter `List<ValueRange> ranges` directly? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org