jpountz commented on code in PR #13692: URL: https://github.com/apache/lucene/pull/13692#discussion_r1734653387
########## lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java: ########## @@ -212,13 +213,74 @@ static void prefixSum(long[] buffer, int count, long base) { } } - static int findFirstGreater(long[] buffer, int target, int from) { - for (int i = from; i < BLOCK_SIZE; ++i) { - if (buffer[i] >= target) { - return i; + private static boolean assertDocBuffer(long[] values, int start) { + assert values.length == BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE; + assert values[BLOCK_SIZE] == DocIdSetIterator.NO_MORE_DOCS; + assert start < BLOCK_SIZE; + + int endOffset; + if (values[0] == DocIdSetIterator.NO_MORE_DOCS) { + endOffset = 0; + } else { + endOffset = -1; + for (int i = 1; i < values.length; ++i) { + assert values[i] > values[i - 1] : Arrays.toString(values); + if (values[i] == DocIdSetIterator.NO_MORE_DOCS) { + endOffset = i; + break; + } } } - return BLOCK_SIZE; + for (int i = 0; i < BINARY_SEARCH_WINDOW_SIZE; ++i) { + assert values[endOffset + i] == DocIdSetIterator.NO_MORE_DOCS; + } + return true; + } + + /** + * Return the first index in sorted array {@code buffer} whose value is greater than or equal to + * {@code target}. For correctness, it requires the last 4 values to be set to {@code + * NO_MORE_DOCS}. + */ + static int findNextGEQ(long[] values, long target, int start) { + assert assertDocBuffer(values, start); + + if (values[start] >= target) { + // Surprisingly this is a likely condition in practice, so optimizing for it helps. + return start; + } + + // We just ruled out that our target index is at `start`. + start += 1; + + // Now find the first interval of 4 values that contains our target. + for (int i = start; + i + BINARY_SEARCH_WINDOW_SIZE <= values.length; + i += BINARY_SEARCH_WINDOW_SIZE) { + if (values[i + BINARY_SEARCH_WINDOW_SIZE - 1] >= target) { + start = i; + break; + } + } Review Comment: I suspect it's a matter of taste but it's easier for me to reason about the start index of blocks than about the last valid index in a block (your version). In case one concern is efficiency, the compiler should fold `BINARY_SEARCH_WINDOW_SIZE - 1` together into a 3 since these are constants. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org