easyice commented on code in PR #12841: URL: https://github.com/apache/lucene/pull/12841#discussion_r1420565417
########## lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java: ########## @@ -1438,4 +1440,68 @@ public void testListAllIsSorted() throws IOException { assertArrayEquals(expected, actual); } } + + public void testDataTypes() throws IOException { + final long[] values = new long[] {43, 12345, 123456, 1234567890}; + try (Directory dir = getDirectory(createTempDir("testDataTypes"))) { + IndexOutput out = dir.createOutput("test", IOContext.DEFAULT); + out.writeByte((byte) 43); + out.writeShort((short) 12345); + out.writeInt(1234567890); + out.writeGroupVInts(values, 4); + out.writeLong(1234567890123456789L); + out.close(); + + long[] restored = new long[4]; + IndexInput in = dir.openInput("test", IOContext.DEFAULT); + assertEquals(43, in.readByte()); + assertEquals(12345, in.readShort()); + assertEquals(1234567890, in.readInt()); + in.readGroupVInts(restored, 4); + assertArrayEquals(values, restored); + assertEquals(1234567890123456789L, in.readLong()); + in.close(); + } + } + + public void testGroupVInt() throws IOException { + try (Directory dir = getDirectory(createTempDir("testGroupVInt"))) { + // test fallback to default implementation of readGroupVInt + doTestGroupVInt(dir, 5, 1, 6, 8); + + // use more iterations to covers all bpv + doTestGroupVInt(dir, atLeast(100), 1, 31, 128); + + // we use BaseChunkedDirectoryTestCase#testGroupVIntMultiBlocks cover multiple blocks for + // ByteBuffersDataInput and MMapDirectory + } + } + + protected void doTestGroupVInt( + Directory dir, int iterations, int minBpv, int maxBpv, int maxNumValues) throws IOException { + long[] values = new long[maxNumValues]; + long[] restored = new long[maxNumValues]; + + for (int i = 0; i < iterations; i++) { Review Comment: I changed the all iterations write to a single file, then do `writeVInt` to an other file as expected. ########## lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java: ########## @@ -62,4 +62,42 @@ private static long readLongInGroup(DataInput in, int numBytesMinus1) throws IOE return in.readInt() & 0xFFFFFFFFL; } } + + /** + * Provides an abstraction for read int values, so that decoding logic can be reused in different DataInput. + * + */ + public static interface IntReader { + int read(long v); + } + + /** + * Faster implementation of read single group, It read values from the buffer that would not cross + * boundaries. + * + * @param flag the flag of group varint. + * @param reader the supplier of read int. + * @param dst the array to read ints into. Review Comment: sorry for the missing! ########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -324,24 +324,9 @@ private void readGroupVInt(long[] dst, int offset) throws IOException { try { final int flag = curSegment.get(LAYOUT_BYTE, curPosition++) & 0xFF; - - final int n1Minus1 = flag >> 6; - final int n2Minus1 = (flag >> 4) & 0x03; - final int n3Minus1 = (flag >> 2) & 0x03; - final int n4Minus1 = flag & 0x03; - - dst[offset] = - curSegment.get(LAYOUT_LE_INT, curPosition) & GroupVIntUtil.GROUP_VINT_MASKS[n1Minus1]; - curPosition += 1 + n1Minus1; - dst[offset + 1] = - curSegment.get(LAYOUT_LE_INT, curPosition) & GroupVIntUtil.GROUP_VINT_MASKS[n2Minus1]; - curPosition += 1 + n2Minus1; - dst[offset + 2] = - curSegment.get(LAYOUT_LE_INT, curPosition) & GroupVIntUtil.GROUP_VINT_MASKS[n3Minus1]; - curPosition += 1 + n3Minus1; - dst[offset + 3] = - curSegment.get(LAYOUT_LE_INT, curPosition) & GroupVIntUtil.GROUP_VINT_MASKS[n4Minus1]; - curPosition += 1 + n4Minus1; + curPosition += + GroupVIntUtil.readGroupVInt( + flag, p -> curSegment.get(LAYOUT_LE_INT, p), curPosition, dst, offset); Review Comment: the `curPosition` had +1 when read flag, so it don't need +1 > This is why I'd like to have a test that reads a vint group and other data from the same input placed behind each other! In `BaseDirectoryTestCase#testDataTypes` we did the similar test, is that enough? ########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java: ########## @@ -303,6 +304,34 @@ public byte readByte(long pos) throws IOException { } } + @Override + public void readGroupVInts(long[] dst, int limit) throws IOException { + int i; + for (i = 0; i <= limit - 4; i += 4) { + readGroupVInt(dst, i); + } + for (; i < limit; ++i) { + dst[i] = readVInt(); + } + } + + private void readGroupVInt(long[] dst, int offset) throws IOException { + final MemorySegment curSegment = this.curSegment; Review Comment: okay! ########## lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java: ########## @@ -62,4 +62,42 @@ private static long readLongInGroup(DataInput in, int numBytesMinus1) throws IOE return in.readInt() & 0xFFFFFFFFL; } } + + /** + * Provides an abstraction for read int values, so that decoding logic can be reused in different DataInput. + * + */ + public static interface IntReader { Review Comment: +1 ########## lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java: ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.io.IOException; +import org.apache.lucene.store.DataInput; + +/** + * This class contains utility methods and constants for group varint + * + * @lucene.internal + */ +public final class GroupVIntUtil { + // the maximum length of a single group-varint is 4 integers + 1 byte flag. + public static final int MAX_LENGTH_PER_GROUP = 17; + private static final int[] MASKS = new int[] {0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF}; + + /** + * Default implementation of read single group, for optimal performance, you should use {@link + * DataInput#readGroupVInts(long[], int)} instead. + * + * @param dst the array to read ints into. + * @param offset the offset in the array to start storing ints. + */ + public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException { + final int flag = in.readByte() & 0xFF; + + final int n1Minus1 = flag >> 6; + final int n2Minus1 = (flag >> 4) & 0x03; + final int n3Minus1 = (flag >> 2) & 0x03; + final int n4Minus1 = flag & 0x03; + + dst[offset] = readLongInGroup(in, n1Minus1); + dst[offset + 1] = readLongInGroup(in, n2Minus1); + dst[offset + 2] = readLongInGroup(in, n3Minus1); + dst[offset + 3] = readLongInGroup(in, n4Minus1); + } + + private static long readLongInGroup(DataInput in, int numBytesMinus1) throws IOException { + switch (numBytesMinus1) { + case 0: + return in.readByte() & 0xFFL; + case 1: + return in.readShort() & 0xFFFFL; + case 2: + return (in.readShort() & 0xFFFFL) | ((in.readByte() & 0xFFL) << 16); + default: + return in.readInt() & 0xFFFFFFFFL; + } + } + + /** + * Provides an abstraction for read int values, so that decoding logic can be reused in different + * DataInput. + */ + public static interface IntReader { + int read(long v); + } + + /** + * Faster implementation of read single group, It read values from the buffer that would not cross + * boundaries. + * + * @param flag the flag of group varint. + * @param reader the supplier of read int. + * @param dst the array to read ints into. + * @param offset the offset in the array to start storing ints. + * @return the number of bytes read, it is a positive number and less than {@link + * #MAX_LENGTH_PER_GROUP} + */ + public static int readGroupVInt(int flag, IntReader reader, long pos, long[] dst, int offset) + throws IOException { Review Comment: Thanks for explaining :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org