jpountz commented on code in PR #12841: URL: https://github.com/apache/lucene/pull/12841#discussion_r1412795418
########## lucene/core/src/java/org/apache/lucene/store/DataOutput.java: ########## @@ -324,4 +326,41 @@ public void writeSetOfStrings(Set<String> set) throws IOException { writeString(value); } } + + /** + * Encode integers using group-varint. It uses VInt to encode tail values that are not enough for Review Comment: Add a javadoc link to #writeVInt? Also mention that all longs are actually required to be integers? ########## lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java: ########## @@ -212,6 +224,51 @@ public long readLong() throws IOException { } } + @Override + public void readGroupVInts(long[] docs, int limit) throws IOException { + if (asReadOnlyBuffer == true) { + super.readGroupVInts(docs, limit); + return; + } + + int i; + for (i = 0; i <= limit - 4; i += 4) { + readGroupVInt(docs, i); + } + for (; i < limit; ++i) { + docs[i] = readVInt(); + } + } + + private void readGroupVInt(long[] docs, int offset) throws IOException { + int blockOffset = blockOffset(pos); + if (blockOffset + MAX_LENGTH_PER_GROUP <= blockMask) { + super.fallbackReadGroupVInt(docs, offset); + return; + } + + final int flag = readByte() & 0xFF; + + final int n1Minus1 = flag >> 6; + final int n2Minus1 = (flag >> 4) & 0x03; + final int n3Minus1 = (flag >> 2) & 0x03; + final int n4Minus1 = flag & 0x03; + + blockOffset = blockOffset(pos); + int curPosition = blockOffset; + + byte[] bytes = blocks[blockIndex(pos)].array(); + docs[offset] = (int) BitUtil.VH_LE_INT.get(bytes, curPosition) & GROUP_VINT_MASKS[n1Minus1]; Review Comment: I'd rather work on the ByteBuffer (ie. do ByteBuffer#getInt rather than using var handles on the underlying byte[]), even if performance is a bit better by working directly on the byte[]. ########## lucene/core/src/java/org/apache/lucene/store/DataInput.java: ########## @@ -38,6 +38,9 @@ * positioned independently. */ public abstract class DataInput implements Cloneable { + // the maximum length of a single group-varint is 4 integers + 1 byte flag. + static final int MAX_LENGTH_PER_GROUP = 17; + static final int[] GROUP_VINT_MASKS = new int[] {0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF}; Review Comment: Nit: I would like it better if these constants and `fallbackReadGroupVInt` were moved to a helper class, e.g. `org.apache.lucene.util.GroupVIntUtil`. ########## lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestGroupVInt.java: ########## @@ -18,38 +18,144 @@ import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteArrayDataOutput; +import java.nio.file.Files; +import java.util.Locale; +import java.util.function.BiFunction; +import org.apache.lucene.store.ByteBuffersDataInput; +import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.ByteBuffersIndexInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.FilterIndexInput; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MMapDirectory; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.store.SingleInstanceLockFactory; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.packed.PackedInts; public class TestGroupVInt extends LuceneTestCase { Review Comment: Should we move these tests to BaseDirectoryTestCase now, to automatically get coverage across all directories? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org