Jackie-Jiang commented on code in PR #11674: URL: https://github.com/apache/pinot/pull/11674#discussion_r1339083971
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java: ########## @@ -113,6 +116,156 @@ public byte[] getBytes(int docId, ReaderContext context) { return context.getValue(docId); } + @Override + public int getIntMV(int docId, int[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getInt(); + } + return numValues; + } + + @Override + public int[] getIntMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + int[] valueBuffer = new int[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getInt(); + } + return valueBuffer; + } + + @Override + public int getLongMV(int docId, long[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getLong(); + } + return numValues; + } + + @Override + public long[] getLongMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + long[] valueBuffer = new long[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getLong(); + } + return valueBuffer; + } + + @Override + public int getFloatMV(int docId, float[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return numValues; + } + + @Override + public float[] getFloatMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + float[] valueBuffer = new float[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return valueBuffer; + } + + @Override + public int getDoubleMV(int docId, double[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getDouble(); + } + return numValues; + } + + @Override + public double[] getDoubleMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + double[] valueBuffer = new double[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return valueBuffer; + } + + @Override + public int getStringMV(int docId, String[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + int contentOffset = (numValues + 1) * Integer.BYTES; + for (int i = 0; i < numValues; i++) { + int length = byteBuffer.getInt((i + 1) * Integer.BYTES); + byte[] bytes = new byte[length]; + byteBuffer.position(contentOffset); + byteBuffer.get(bytes, 0, length); + valueBuffer[i] = new String(bytes, StandardCharsets.UTF_8); + contentOffset += length; Review Comment: Do we need to maintain contentOffset? `position` it once before the for loop should be good ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkForwardIndexWriterV4.java: ########## @@ -142,6 +144,51 @@ public void putBytes(byte[] bytes) { _nextDocId++; } + @Override + public void putStringMV(String[] values) { + // num values + length of each value + int headerSize = Integer.BYTES + Integer.BYTES * values.length; + int size = headerSize; + for (String value : values) { + size += value.getBytes(UTF_8).length; + } + + // Format : [numValues][length1][length2]...[lengthN][value1][value2]...[valueN] + byte[] serializedBytes = new byte[size]; + ByteBuffer byteBuffer = ByteBuffer.wrap(serializedBytes); + byteBuffer.putInt(values.length); + byteBuffer.position(headerSize); + for (int i = 0; i < values.length; i++) { + byte[] utf8 = values[i].getBytes(UTF_8); + byteBuffer.putInt((i + 1) * Integer.BYTES, utf8.length); + byteBuffer.put(utf8); + } + + putBytes(byteBuffer.array()); Review Comment: ```suggestion putBytes(serializedBytes); ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/forward/ForwardIndexReaderFactory.java: ########## @@ -81,13 +81,17 @@ public static ForwardIndexReader createRawIndexReader(PinotDataBuffer dataBuffer ? new FixedBytePower2ChunkSVForwardIndexReader(dataBuffer, storedType) : new FixedByteChunkSVForwardIndexReader(dataBuffer, storedType); } else { - return version == VarByteChunkForwardIndexWriterV4.VERSION ? new VarByteChunkSVForwardIndexReaderV4(dataBuffer, - storedType) : new VarByteChunkSVForwardIndexReader(dataBuffer, storedType); + return version == VarByteChunkForwardIndexWriterV4.VERSION ? new VarByteChunkForwardIndexReaderV4( Review Comment: Consider reversing the check to first check the version ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkForwardIndexWriterV4.java: ########## @@ -142,6 +144,51 @@ public void putBytes(byte[] bytes) { _nextDocId++; } + @Override + public void putStringMV(String[] values) { + // num values + length of each value + int headerSize = Integer.BYTES + Integer.BYTES * values.length; + int size = headerSize; + for (String value : values) { + size += value.getBytes(UTF_8).length; Review Comment: We can cache the serialized bytes to avoid serializing twice ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java: ########## @@ -113,6 +116,156 @@ public byte[] getBytes(int docId, ReaderContext context) { return context.getValue(docId); } + @Override + public int getIntMV(int docId, int[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getInt(); + } + return numValues; + } + + @Override + public int[] getIntMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + int[] valueBuffer = new int[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getInt(); + } + return valueBuffer; + } + + @Override + public int getLongMV(int docId, long[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getLong(); + } + return numValues; + } + + @Override + public long[] getLongMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + long[] valueBuffer = new long[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getLong(); + } + return valueBuffer; + } + + @Override + public int getFloatMV(int docId, float[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return numValues; + } + + @Override + public float[] getFloatMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + float[] valueBuffer = new float[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return valueBuffer; + } + + @Override + public int getDoubleMV(int docId, double[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getDouble(); + } + return numValues; + } + + @Override + public double[] getDoubleMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + double[] valueBuffer = new double[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return valueBuffer; + } + + @Override + public int getStringMV(int docId, String[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + int contentOffset = (numValues + 1) * Integer.BYTES; + for (int i = 0; i < numValues; i++) { + int length = byteBuffer.getInt((i + 1) * Integer.BYTES); + byte[] bytes = new byte[length]; + byteBuffer.position(contentOffset); + byteBuffer.get(bytes, 0, length); + valueBuffer[i] = new String(bytes, StandardCharsets.UTF_8); + contentOffset += length; + } + return numValues; + } + + @Override + public String[] getStringMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { Review Comment: Same for these methods ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java: ########## @@ -113,6 +116,156 @@ public byte[] getBytes(int docId, ReaderContext context) { return context.getValue(docId); } + @Override + public int getIntMV(int docId, int[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getInt(); + } + return numValues; + } + + @Override + public int[] getIntMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + int[] valueBuffer = new int[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getInt(); + } + return valueBuffer; + } + + @Override + public int getLongMV(int docId, long[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getLong(); + } + return numValues; + } + + @Override + public long[] getLongMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + long[] valueBuffer = new long[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getLong(); + } + return valueBuffer; + } + + @Override + public int getFloatMV(int docId, float[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return numValues; + } + + @Override + public float[] getFloatMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + float[] valueBuffer = new float[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return valueBuffer; + } + + @Override + public int getDoubleMV(int docId, double[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getDouble(); + } + return numValues; + } + + @Override + public double[] getDoubleMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + double[] valueBuffer = new double[numValues]; + for (int i = 0; i < numValues; i++) { + valueBuffer[i] = byteBuffer.getFloat(); + } + return valueBuffer; + } + + @Override + public int getStringMV(int docId, String[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { + ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + int numValues = byteBuffer.getInt(); + int contentOffset = (numValues + 1) * Integer.BYTES; + for (int i = 0; i < numValues; i++) { + int length = byteBuffer.getInt((i + 1) * Integer.BYTES); + byte[] bytes = new byte[length]; + byteBuffer.position(contentOffset); + byteBuffer.get(bytes, 0, length); Review Comment: ```suggestion byteBuffer.get(bytes); ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java: ########## @@ -39,13 +39,13 @@ /** - * Chunk-based single-value raw (non-dictionary-encoded) forward index reader for values of variable length data type - * (BIG_DECIMAL, STRING, BYTES). + * Chunk-based single-value raw (non-dictionary-encoded) forward index reader for values of SV variable length data type + * (BIG_DECIMAL, STRING, BYTES), MV fixed length and MV variable length data types. Review Comment: ```suggestion * Chunk-based raw (non-dictionary-encoded) forward index reader for values of SV variable length data type * (BIG_DECIMAL, STRING, BYTES), MV fixed length and MV variable length data types. ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkForwardIndexWriterV4.java: ########## @@ -142,6 +144,51 @@ public void putBytes(byte[] bytes) { _nextDocId++; } + @Override + public void putStringMV(String[] values) { + // num values + length of each value + int headerSize = Integer.BYTES + Integer.BYTES * values.length; + int size = headerSize; + for (String value : values) { + size += value.getBytes(UTF_8).length; + } + + // Format : [numValues][length1][length2]...[lengthN][value1][value2]...[valueN] + byte[] serializedBytes = new byte[size]; + ByteBuffer byteBuffer = ByteBuffer.wrap(serializedBytes); + byteBuffer.putInt(values.length); + byteBuffer.position(headerSize); + for (int i = 0; i < values.length; i++) { + byte[] utf8 = values[i].getBytes(UTF_8); + byteBuffer.putInt((i + 1) * Integer.BYTES, utf8.length); + byteBuffer.put(utf8); + } + + putBytes(byteBuffer.array()); + } + + @Override + public void putBytesMV(byte[][] values) { + // num values + length of each value + int headerSize = Integer.BYTES + Integer.BYTES * values.length; + int size = headerSize; + for (byte[] value : values) { + size += value.length; + } + + // Format : [numValues][length1][length2]...[lengthN][bytes1][bytes2]...[bytesN] + byte[] serializedBytes = new byte[size]; + ByteBuffer byteBuffer = ByteBuffer.wrap(serializedBytes); + byteBuffer.putInt(values.length); + byteBuffer.position(headerSize); + for (int i = 0; i < values.length; i++) { + byteBuffer.putInt((i + 1) * Integer.BYTES, values[i].length); + byteBuffer.put(values[i]); + } + + putBytes(byteBuffer.array()); Review Comment: ```suggestion putBytes(serializedBytes); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org