easyice commented on code in PR #12841: URL: https://github.com/apache/lucene/pull/12841#discussion_r1404387618
########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -303,6 +304,30 @@ public byte readByte(long pos) throws IOException { } } + @Override + public void readGroupVInt(long[] docs, int pos) throws IOException { + if (curSegment.byteSize() - curPosition < 17) { + super.readGroupVInt(docs, pos); + return; + } + + final int flag = readByte() & 0xFF; + + final int n1Minus1 = flag >> 6; + final int n2Minus1 = (flag >> 4) & 0x03; + final int n3Minus1 = (flag >> 2) & 0x03; + final int n4Minus1 = flag & 0x03; + + docs[pos] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n1Minus1]; + curPosition += 1 + n1Minus1; + docs[pos + 1] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n2Minus1]; + curPosition += 1 + n2Minus1; + docs[pos + 2] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n3Minus1]; + curPosition += 1 + n3Minus1; + docs[pos + 3] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n4Minus1]; + curPosition += 1 + n4Minus1; + } Review Comment: +1, Thanks! ########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -49,6 +49,7 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces final int chunkSizePower; final Arena arena; final MemorySegment[] segments; + private static final int[] MASKS = new int[] {0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF}; Review Comment: +1, great suggestion! ########## lucene/core/src/java/org/apache/lucene/store/DataOutput.java: ########## @@ -29,6 +29,7 @@ * internal state like file position). */ public abstract class DataOutput { + BytesRef groupVIntBytes; Review Comment: +1, Thanks for the suggestion! ########## lucene/core/src/java/org/apache/lucene/store/DataOutput.java: ########## @@ -324,4 +325,45 @@ public void writeSetOfStrings(Set<String> set) throws IOException { writeString(value); } } + + /** + * Encode integers using group-varint. It uses VInt to encode tail values that are not enough for + * a group + * + * @param values the values to write + * @param limit the number of values to write. + */ + public void writeGroupVInts(long[] values, int limit) throws IOException { + if (groupVIntBytes == null) { + // the maximum size of one group is 4 integers + 1 byte flag. + groupVIntBytes = new BytesRef(17); + } + int off = 0; + + // encode each group + while ((limit - off) >= 4) { + byte flag = 0; + groupVIntBytes.offset = 1; + flag |= (encodeGroupValue((int) values[off++]) - 1) << 6; + flag |= (encodeGroupValue((int) values[off++]) - 1) << 4; + flag |= (encodeGroupValue((int) values[off++]) - 1) << 2; + flag |= (encodeGroupValue((int) values[off++]) - 1); + groupVIntBytes.bytes[0] = flag; + writeBytes(groupVIntBytes.bytes, groupVIntBytes.offset); + } + + // tail vints + for (; off < limit; off++) { + writeVInt((int) values[off]); Review Comment: Good idea, i like that! ########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -303,6 +304,30 @@ public byte readByte(long pos) throws IOException { } } + @Override + public void readGroupVInt(long[] docs, int pos) throws IOException { + if (curSegment.byteSize() - curPosition < 17) { + super.readGroupVInt(docs, pos); + return; + } Review Comment: In `TestGroupVInt#testEncodeDecode` we use a range of [1-31] `bpv` and a ragne of [1-128] `numValues`, For instance if the `bpv==2` and `numValues==4` it will cover this case? ########## lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestGroupVInt.java: ########## @@ -31,9 +34,7 @@ public void testEncodeDecode() throws IOException { long[] values = new long[ForUtil.BLOCK_SIZE]; long[] restored = new long[ForUtil.BLOCK_SIZE]; final int iterations = atLeast(100); - - final GroupVIntWriter w = new GroupVIntWriter(); - byte[] encoded = new byte[(int) (Integer.BYTES * ForUtil.BLOCK_SIZE * 1.25)]; + Directory dir = FSDirectory.open(createTempDir()); Review Comment: +1 ########## lucene/core/src/java/org/apache/lucene/store/DataInput.java: ########## @@ -98,6 +98,55 @@ public int readInt() throws IOException { return ((b4 & 0xFF) << 24) | ((b3 & 0xFF) << 16) | ((b2 & 0xFF) << 8) | (b1 & 0xFF); } + /** + * Read all the group varints, including the tail vints. + * + * @param docs the array to read ints into. + * @param limit the number of int values to read. + */ + public void readGroupVInts(long[] docs, int limit) throws IOException { + int i; + for (i = 0; i <= limit - 4; i += 4) { + readGroupVInt(docs, i); + } + for (; i < limit; ++i) { + docs[i] = readVInt(); + } + } + + /** + * Read single group varint. we need a long[] because this is what postings are using. + * + * @param docs the array to read ints into. + * @param offset the offset in the array to start storing ints. + */ + public void readGroupVInt(long[] docs, int offset) throws IOException { Review Comment: +1, This also reduces virtual function calls. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org