jpountz commented on code in PR #14203:
URL: https://github.com/apache/lucene/pull/14203#discussion_r1997535617


##########
lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java:
##########
@@ -105,15 +107,22 @@ public Lucene90PointsWriter(
     }
   }
 
+  public Lucene90PointsWriter(
+      SegmentWriteState writeState, int maxPointsInLeafNode, double 
maxMBSortInHeap)
+      throws IOException {
+    this(writeState, maxPointsInLeafNode, maxMBSortInHeap, 
Lucene90PointsFormat.VERSION_CURRENT);
+  }

Review Comment:
   let's make all constructors that take a version pkg-private?



##########
lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java:
##########
@@ -139,6 +141,28 @@ public BKDWriter(
       BKDConfig config,
       double maxMBSortInHeap,
       long totalPointCount) {
+    this(
+        maxDoc,
+        tempDir,
+        tempFileNamePrefix,
+        config,
+        maxMBSortInHeap,
+        totalPointCount,
+        BKDWriter.VERSION_CURRENT);
+  }
+
+  public BKDWriter(

Review Comment:
   Can you add javadocs that this ctor should be only used for testing with 
older versions?



##########
lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsFormat.java:
##########
@@ -59,18 +61,39 @@ public final class Lucene90PointsFormat extends 
PointsFormat {
   public static final String META_EXTENSION = "kdm";
 
   static final int VERSION_START = 0;
-  static final int VERSION_CURRENT = VERSION_START;
+  static final int VERSION_BKD_VECTORIZED_BPV24 = 1;
+  static final int VERSION_CURRENT = VERSION_BKD_VECTORIZED_BPV24;
+
+  private static final Map<Integer, Integer> VERSION_TO_BKD_VERSION =
+      Map.of(
+          VERSION_START, BKDWriter.VERSION_META_FILE,
+          VERSION_BKD_VECTORIZED_BPV24, BKDWriter.VERSION_VECTORIZED_DOCID);
+
+  private final int version;
 
   /** Sole constructor */
-  public Lucene90PointsFormat() {}
+  public Lucene90PointsFormat() {
+    this(VERSION_CURRENT);
+  }
+
+  public Lucene90PointsFormat(int version) {

Review Comment:
   Could it be pkg-private? I think we only need it for testing?



##########
lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java:
##########
@@ -114,31 +117,28 @@ void writeDocIds(int[] docIds, int start, int count, 
DataOutput out) throws IOEx
     } else {
       if (max <= 0xFFFFFF) {
         out.writeByte(BPV_24);
-        // write them the same way we are reading them.
-        int i;
-        for (i = 0; i < count - 7; i += 8) {
-          int doc1 = docIds[start + i];
-          int doc2 = docIds[start + i + 1];
-          int doc3 = docIds[start + i + 2];
-          int doc4 = docIds[start + i + 3];
-          int doc5 = docIds[start + i + 4];
-          int doc6 = docIds[start + i + 5];
-          int doc7 = docIds[start + i + 6];
-          int doc8 = docIds[start + i + 7];
-          long l1 = (doc1 & 0xffffffL) << 40 | (doc2 & 0xffffffL) << 16 | 
((doc3 >>> 8) & 0xffffL);
-          long l2 =
-              (doc3 & 0xffL) << 56
-                  | (doc4 & 0xffffffL) << 32
-                  | (doc5 & 0xffffffL) << 8
-                  | ((doc6 >> 16) & 0xffL);
-          long l3 = (doc6 & 0xffffL) << 48 | (doc7 & 0xffffffL) << 24 | (doc8 
& 0xffffffL);
-          out.writeLong(l1);
-          out.writeLong(l2);
-          out.writeLong(l3);
-        }
-        for (; i < count; ++i) {
-          out.writeShort((short) (docIds[start + i] >>> 8));
-          out.writeByte((byte) docIds[start + i]);
+        if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
+          writeScalarInts24(docIds, start, count, out);
+        } else {
+          // encode the docs in the format that can be vectorized decoded.
+          final int quarter = count >> 2;
+          final int numInts = quarter * 3;
+          for (int i = 0; i < numInts; i++) {
+            scratch[i] = docIds[i + start] << 8;
+          }
+          for (int i = 0; i < quarter; i++) {
+            final int longIdx = i + numInts + start;
+            scratch[i] |= docIds[longIdx] >>> 16;
+            scratch[i + quarter] |= (docIds[longIdx] >>> 8) & 0xFF;
+            scratch[i + quarter * 2] |= docIds[longIdx] & 0xFF;

Review Comment:
   nit: maybe write bytes in little-endian order for consistency, so 
`scratch[i] |= docIds[longIdx] & 0xFF; scratch[i + quarter] |= (docIds[longIdx] 
>>> 8) & 0xFF; scratch[i + quarter * 2] |= docIds[longIdx] >>> 16;`, etc. ?



##########
lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java:
##########
@@ -248,21 +281,68 @@ private void readBitSet(IndexInput in, int count, int[] 
docIDs) throws IOExcepti
     assert pos == count : "pos: " + pos + ", count: " + count;
   }
 
-  private static void readDelta16(IndexInput in, int count, int[] docIDs) 
throws IOException {
+  private static void readDelta16(IndexInput in, int count, int[] docIds) 
throws IOException {
     final int min = in.readVInt();
-    final int halfLen = count >>> 1;
-    in.readInts(docIDs, 0, halfLen);
-    for (int i = 0; i < halfLen; ++i) {
-      int l = docIDs[i];
+    final int half = count >> 1;
+    in.readInts(docIds, 0, half);
+    if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) {
+      // Same format, but enabling the JVM to specialize the decoding logic 
for the default number
+      // of points per node proved to help on benchmarks
+      decode16(docIds, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 2, min);
+      assert half * 2 == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE
+          : "we are assuming DEFAULT_MAX_POINTS_IN_LEAF_NODE is a multiple of 
2 here.";
+    } else {
+      decode16(docIds, half, min);
+      for (int i = half << 1; i < count; i++) {
+        docIds[i] = Short.toUnsignedInt(in.readShort()) + min;
+      }
+    }
+  }
+
+  private static void decode16(int[] docIDs, int half, int min) {
+    for (int i = 0; i < half; ++i) {
+      final int l = docIDs[i];
       docIDs[i] = (l >>> 16) + min;
-      docIDs[halfLen + i] = (l & 0xFFFF) + min;
+      docIDs[i + half] = (l & 0xFFFF) + min;
     }
-    if ((count & 1) == 1) {
-      docIDs[count - 1] = Short.toUnsignedInt(in.readShort()) + min;
+  }
+
+  private void readInts24(IndexInput in, int count, int[] docIDs) throws 
IOException {
+    int quarter = count >> 2;
+    int numInts = quarter * 3;
+    in.readInts(scratch, 0, numInts);
+    if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) {
+      // Same format, but enabling the JVM to specialize the decoding logic 
for the default number
+      // of points per node proved to help on benchmarks
+      decode24(
+          docIDs,
+          scratch,
+          BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 4,
+          BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 4 * 3);
+      assert quarter * 4 == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE
+          : " we are assuming DEFAULT_MAX_POINTS_IN_LEAF_NODE is a multiple of 
4 here.";
+    } else {
+      decode24(docIDs, scratch, quarter, numInts);
+      // Now read the remaining 0, 1, 2 or 3 values
+      for (int i = quarter << 2; i < count; ++i) {
+        docIDs[i] = (in.readShort() & 0xFFFF) | (in.readByte() & 0xFF) << 16;
+      }

Review Comment:
   out of curiosity, does it hurt performance if we add this as part of 
decode24? This would help save the above assertion?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to