easyice commented on code in PR #12841: URL: https://github.com/apache/lucene/pull/12841#discussion_r1413151301
########## lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java: ########## @@ -212,6 +224,51 @@ public long readLong() throws IOException { } } + @Override + public void readGroupVInts(long[] docs, int limit) throws IOException { + if (asReadOnlyBuffer == true) { + super.readGroupVInts(docs, limit); + return; + } + + int i; + for (i = 0; i <= limit - 4; i += 4) { + readGroupVInt(docs, i); + } + for (; i < limit; ++i) { + docs[i] = readVInt(); + } + } + + private void readGroupVInt(long[] docs, int offset) throws IOException { + int blockOffset = blockOffset(pos); + if (blockOffset + MAX_LENGTH_PER_GROUP <= blockMask) { + super.fallbackReadGroupVInt(docs, offset); + return; + } + + final int flag = readByte() & 0xFF; + + final int n1Minus1 = flag >> 6; + final int n2Minus1 = (flag >> 4) & 0x03; + final int n3Minus1 = (flag >> 2) & 0x03; + final int n4Minus1 = flag & 0x03; + + blockOffset = blockOffset(pos); + int curPosition = blockOffset; + + byte[] bytes = blocks[blockIndex(pos)].array(); + docs[offset] = (int) BitUtil.VH_LE_INT.get(bytes, curPosition) & GROUP_VINT_MASKS[n1Minus1]; Review Comment: @jpountz Thanks for this suggestion, the performance is similar ########## lucene/core/src/java/org/apache/lucene/store/DataOutput.java: ########## @@ -324,4 +326,41 @@ public void writeSetOfStrings(Set<String> set) throws IOException { writeString(value); } } + + /** + * Encode integers using group-varint. It uses VInt to encode tail values that are not enough for Review Comment: +1, Thanks, i like that ########## lucene/core/src/java/org/apache/lucene/store/DataInput.java: ########## @@ -38,6 +38,9 @@ * positioned independently. */ public abstract class DataInput implements Cloneable { + // the maximum length of a single group-varint is 4 integers + 1 byte flag. + static final int MAX_LENGTH_PER_GROUP = 17; + static final int[] GROUP_VINT_MASKS = new int[] {0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF}; Review Comment: +1 ########## lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestGroupVInt.java: ########## @@ -18,38 +18,144 @@ import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteArrayDataOutput; +import java.nio.file.Files; +import java.util.Locale; +import java.util.function.BiFunction; +import org.apache.lucene.store.ByteBuffersDataInput; +import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.ByteBuffersIndexInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.FilterIndexInput; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MMapDirectory; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.store.SingleInstanceLockFactory; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.packed.PackedInts; public class TestGroupVInt extends LuceneTestCase { Review Comment: +1, cool! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org