klsince commented on code in PR #11729: URL: https://github.com/apache/pinot/pull/11729#discussion_r1349154892
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReader.java: ########## @@ -195,4 +212,61 @@ private long getValueEndOffset(int chunkId, int chunkRowId, long chunkStartOffse } } } + + private long getValueEndOffsetAndRecordRanges(int chunkId, int chunkRowId, long chunkStartOffset, + List<ValueRange> ranges) { + if (chunkId == _numChunks - 1) { + // Last chunk + if (chunkRowId == _numDocsPerChunk - 1) { + // Last row in the last chunk + return _dataBuffer.size(); + } else { + ranges.add(ValueRange.newByteRange(chunkStartOffset + (chunkRowId + 1) * ROW_OFFSET_SIZE, Integer.BYTES)); + int valueEndOffsetInChunk = _dataBuffer.getInt(chunkStartOffset + (chunkRowId + 1) * ROW_OFFSET_SIZE); + if (valueEndOffsetInChunk == 0) { + // Last row in the last chunk (chunk is incomplete, which stores 0 as the offset for the absent rows) + return _dataBuffer.size(); + } else { + return chunkStartOffset + valueEndOffsetInChunk; + } + } + } else { + if (chunkRowId == _numDocsPerChunk - 1) { + // Last row in the chunk + return getChunkPositionAndRecordRanges(chunkId + 1, ranges); + } else { + ranges.add(ValueRange.newByteRange(chunkStartOffset + (chunkRowId + 1) * ROW_OFFSET_SIZE, Integer.BYTES)); + return chunkStartOffset + _dataBuffer.getInt(chunkStartOffset + (chunkRowId + 1) * ROW_OFFSET_SIZE); + } + } + } + + @Override + public List<ValueRange> getDocIdRange(int docId, ChunkReaderContext context, @Nullable List<ValueRange> ranges) { + if (ranges == null) { + ranges = new ArrayList<>(); + } + if (_isCompressed) { + recordDocIdRanges(docId, context, ranges); + } else { + recordDocIdRanges(docId, ranges); + } + + return ranges; + } + + @Override + public boolean isFixedLengthType() { + return false; + } + + @Override + public long getBaseOffset() { + throw new IllegalStateException("Operation not supported since the forward index is not of fixed length type"); Review Comment: use `UnsupportedOperationException("Forward index is not of fixed length type")` instead, and many other places as well? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkMVForwardIndexReader.java: ########## @@ -179,6 +183,20 @@ private byte[] getBytesCompressed(int docId, ChunkReaderContext context) { return bytes; } + public void recordDocIdRanges(int docId, List<ValueRange> ranges) { Review Comment: this is a private method? btw, changes in this class look quite similar to those in VarByteChunkSVForwardIndexReader.java below, so is there chance to extract common parts to reuse? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java: ########## @@ -271,10 +302,21 @@ public static abstract class ReaderContext implements ForwardIndexReaderContext protected int _nextDocIdOffset; protected boolean _regularChunk; protected int _numDocsInCurrentChunk; + protected long _chunkStartOffset; + private List<ValueRange> _ranges; - protected ReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) { + protected ReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks, long chunkStartOffset) { _chunks = chunks; _metadata = metadata; + _chunkStartOffset = chunkStartOffset; + } + + public void recordRangesForDocId(int docId, List<ValueRange> ranges) { Review Comment: private or protected method? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java: ########## @@ -330,14 +371,37 @@ private byte[] decompressAndRead(int docId) } return processChunkAndReadFirstValue(docId, offset, limit); } + + private void initAndRecordRangesForDocId(int docId, List<ValueRange> ranges) { + // We'd practically end up reading entire metadata buffer Review Comment: expand the comment a bit, to explain why we'd read the entire metadata buffer ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/FixedByteChunkMVForwardIndexReader.java: ########## @@ -195,6 +220,38 @@ private int getValueEndOffset(int rowId, ByteBuffer chunkBuffer) { } } + private long getValueEndOffsetAndRecordRanges(int chunkId, int chunkRowId, long chunkStartOffset, Review Comment: this method looks similar with other classes, might have a chance to extract common code to reuse ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java: ########## @@ -263,6 +270,30 @@ public void close() throws IOException { } + @Override + public List<ValueRange> getDocIdRange(int docId, ReaderContext context, @Nullable List<ValueRange> ranges) { + if (ranges == null) { + ranges = new ArrayList<>(); + } + context.recordRangesForDocId(docId, ranges); + return ranges; + } + + @Override + public boolean isFixedLengthType() { + return false; + } + + @Override + public long getBaseOffset() { + throw new UnsupportedOperationException(); Review Comment: add error msg to be consistent with other places. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org