jpountz commented on code in PR #12841: URL: https://github.com/apache/lucene/pull/12841#discussion_r1404341416
########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -303,6 +304,30 @@ public byte readByte(long pos) throws IOException { } } + @Override + public void readGroupVInt(long[] docs, int pos) throws IOException { + if (curSegment.byteSize() - curPosition < 17) { + super.readGroupVInt(docs, pos); + return; + } + + final int flag = readByte() & 0xFF; + + final int n1Minus1 = flag >> 6; + final int n2Minus1 = (flag >> 4) & 0x03; + final int n3Minus1 = (flag >> 2) & 0x03; + final int n4Minus1 = flag & 0x03; + + docs[pos] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n1Minus1]; + curPosition += 1 + n1Minus1; + docs[pos + 1] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n2Minus1]; + curPosition += 1 + n2Minus1; + docs[pos + 2] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n3Minus1]; + curPosition += 1 + n3Minus1; + docs[pos + 3] = curSegment.get(LAYOUT_LE_INT, curPosition) & MASKS[n4Minus1]; + curPosition += 1 + n4Minus1; + } Review Comment: Can you add the same `catch (NullPointerException | IllegalStateException e)` that `readInt()` and other read methods have, for the case when the index input is closed? ########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -303,6 +304,30 @@ public byte readByte(long pos) throws IOException { } } + @Override + public void readGroupVInt(long[] docs, int pos) throws IOException { + if (curSegment.byteSize() - curPosition < 17) { + super.readGroupVInt(docs, pos); + return; + } Review Comment: I don't think we have a test that covers this case well at the moment. ########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -49,6 +49,7 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces final int chunkSizePower; final Arena arena; final MemorySegment[] segments; + private static final int[] MASKS = new int[] {0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF}; Review Comment: maybe rename to `GROUP_VINT_MASKS` or something along these lines now that this logic moved to a class which is not only about group vint? Also in general I prefer having constants before instance members in the class definition. ########## lucene/core/src/java/org/apache/lucene/store/DataOutput.java: ########## @@ -29,6 +29,7 @@ * internal state like file position). */ public abstract class DataOutput { + BytesRef groupVIntBytes; Review Comment: BytesRefBuilder feels like a better fit for how you're using it (using `length` rather than `offset` to track the number of written bytes). Also let's make it `private`? ########## lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestGroupVInt.java: ########## @@ -31,9 +34,7 @@ public void testEncodeDecode() throws IOException { long[] values = new long[ForUtil.BLOCK_SIZE]; long[] restored = new long[ForUtil.BLOCK_SIZE]; final int iterations = atLeast(100); - - final GroupVIntWriter w = new GroupVIntWriter(); - byte[] encoded = new byte[(int) (Integer.BYTES * ForUtil.BLOCK_SIZE * 1.25)]; + Directory dir = FSDirectory.open(createTempDir()); Review Comment: Let's use `newFSDirectory` to add coverage for all Directory implementations? ```suggestion Directory dir = newFSDirectory(createTempDir()); ``` ########## lucene/core/src/java/org/apache/lucene/store/DataOutput.java: ########## @@ -324,4 +325,45 @@ public void writeSetOfStrings(Set<String> set) throws IOException { writeString(value); } } + + /** + * Encode integers using group-varint. It uses VInt to encode tail values that are not enough for + * a group + * + * @param values the values to write + * @param limit the number of values to write. + */ + public void writeGroupVInts(long[] values, int limit) throws IOException { + if (groupVIntBytes == null) { + // the maximum size of one group is 4 integers + 1 byte flag. + groupVIntBytes = new BytesRef(17); + } + int off = 0; + + // encode each group + while ((limit - off) >= 4) { + byte flag = 0; + groupVIntBytes.offset = 1; + flag |= (encodeGroupValue((int) values[off++]) - 1) << 6; + flag |= (encodeGroupValue((int) values[off++]) - 1) << 4; + flag |= (encodeGroupValue((int) values[off++]) - 1) << 2; + flag |= (encodeGroupValue((int) values[off++]) - 1); + groupVIntBytes.bytes[0] = flag; + writeBytes(groupVIntBytes.bytes, groupVIntBytes.offset); + } + + // tail vints + for (; off < limit; off++) { + writeVInt((int) values[off]); Review Comment: Now that we're moving this to `DataOutput`, we probably need to check these casts, e.g. with `Math.toIntExact`. ########## lucene/core/src/java/org/apache/lucene/store/DataInput.java: ########## @@ -98,6 +98,55 @@ public int readInt() throws IOException { return ((b4 & 0xFF) << 24) | ((b3 & 0xFF) << 16) | ((b2 & 0xFF) << 8) | (b1 & 0xFF); } + /** + * Read all the group varints, including the tail vints. + * + * @param docs the array to read ints into. + * @param limit the number of int values to read. + */ + public void readGroupVInts(long[] docs, int limit) throws IOException { + int i; + for (i = 0; i <= limit - 4; i += 4) { + readGroupVInt(docs, i); + } + for (; i < limit; ++i) { + docs[i] = readVInt(); + } + } + + /** + * Read single group varint. we need a long[] because this is what postings are using. + * + * @param docs the array to read ints into. + * @param offset the offset in the array to start storing ints. + */ + public void readGroupVInt(long[] docs, int offset) throws IOException { Review Comment: Let's make this method private? This would force `MemorySegmentIndexInput` to copy the logic of `readGroupVInts` but this would also be better encapsulated? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org