gf2121 commented on code in PR #14203: URL: https://github.com/apache/lucene/pull/14203#discussion_r1997571999
########## lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java: ########## @@ -248,21 +281,68 @@ private void readBitSet(IndexInput in, int count, int[] docIDs) throws IOExcepti assert pos == count : "pos: " + pos + ", count: " + count; } - private static void readDelta16(IndexInput in, int count, int[] docIDs) throws IOException { + private static void readDelta16(IndexInput in, int count, int[] docIds) throws IOException { final int min = in.readVInt(); - final int halfLen = count >>> 1; - in.readInts(docIDs, 0, halfLen); - for (int i = 0; i < halfLen; ++i) { - int l = docIDs[i]; + final int half = count >> 1; + in.readInts(docIds, 0, half); + if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { + // Same format, but enabling the JVM to specialize the decoding logic for the default number + // of points per node proved to help on benchmarks + decode16(docIds, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 2, min); + assert half * 2 == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE + : "we are assuming DEFAULT_MAX_POINTS_IN_LEAF_NODE is a multiple of 2 here."; + } else { + decode16(docIds, half, min); + for (int i = half << 1; i < count; i++) { + docIds[i] = Short.toUnsignedInt(in.readShort()) + min; + } + } + } + + private static void decode16(int[] docIDs, int half, int min) { + for (int i = 0; i < half; ++i) { + final int l = docIDs[i]; docIDs[i] = (l >>> 16) + min; - docIDs[halfLen + i] = (l & 0xFFFF) + min; + docIDs[i + half] = (l & 0xFFFF) + min; } - if ((count & 1) == 1) { - docIDs[count - 1] = Short.toUnsignedInt(in.readShort()) + min; + } + + private void readInts24(IndexInput in, int count, int[] docIDs) throws IOException { + int quarter = count >> 2; + int numInts = quarter * 3; + in.readInts(scratch, 0, numInts); + if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { + // Same format, but enabling the JVM to specialize the decoding logic for the default number + // of points per node proved to help on benchmarks + decode24( + docIDs, + scratch, + BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 4, + BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 4 * 3); + assert quarter * 4 == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE + : " we are assuming DEFAULT_MAX_POINTS_IN_LEAF_NODE is a multiple of 4 here."; + } else { + decode24(docIDs, scratch, quarter, numInts); + // Now read the remaining 0, 1, 2 or 3 values + for (int i = quarter << 2; i < count; ++i) { + docIDs[i] = (in.readShort() & 0xFFFF) | (in.readByte() & 0xFF) << 16; + } Review Comment: I want to keep decode24 small so i put it under the `if else` block to save the assertion. luceneutil and jmh proved it has similar performance. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org