gsmiller commented on code in PR #13886: URL: https://github.com/apache/lucene/pull/13886#discussion_r1812687585
########## lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java: ########## @@ -397,106 +413,80 @@ private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] quer } private IteratorAndCount getDocIdSetIteratorOrNullFromBkd( - LeafReaderContext context, DocIdSetIterator delegate) throws IOException { - Sort indexSort = context.reader().getMetaData().sort(); + LeafReader reader, NumericDocValues numericDocValues, DocValuesSkipper skipper) + throws IOException { + if (skipper.docCount() != reader.maxDoc()) { + return null; + } + final Sort indexSort = reader.getMetaData().sort(); if (indexSort == null || indexSort.getSort().length == 0 || indexSort.getSort()[0].getField().equals(field) == false) { return null; } + final int minDocID; + final int maxDocID; final boolean reverse = indexSort.getSort()[0].getReverse(); - - PointValues points = context.reader().getPointValues(field); Review Comment: Do we still want to leverage the points index when possible instead of the doc values skipper? I would think it would still be preferable when it fits, but maybe not? ########## lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java: ########## @@ -20,9 +20,11 @@ import java.util.ArrayDeque; import java.util.Deque; import java.util.Objects; +import java.util.function.LongPredicate; import org.apache.lucene.document.IntPoint; Review Comment: Try running `./gradlew spotlessApply` to tidy up this file (you've got a couple imports here that are no longer used and spotless will take care of fixing this for you) ########## lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java: ########## @@ -302,6 +304,20 @@ private static int nextDoc( } } + private static int nextDocSkipper(int startDoc, DocIdSetIterator docValues, LongPredicate predicate) + throws IOException { + int doc = docValues.docID(); + if (startDoc > doc) { + doc = docValues.advance(startDoc); + } + for (; doc < DocIdSetIterator.NO_MORE_DOCS; doc = docValues.nextDoc()) { + if (predicate.test(docValues.cost())) { Review Comment: I assume you want `docValues.longValue()` here not `cost()` right? (And we need the method signature to take `NumericDocValues` not the more general `DISI`). ########## lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java: ########## @@ -397,106 +413,80 @@ private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] quer } private IteratorAndCount getDocIdSetIteratorOrNullFromBkd( - LeafReaderContext context, DocIdSetIterator delegate) throws IOException { - Sort indexSort = context.reader().getMetaData().sort(); + LeafReader reader, NumericDocValues numericDocValues, DocValuesSkipper skipper) + throws IOException { + if (skipper.docCount() != reader.maxDoc()) { + return null; + } + final Sort indexSort = reader.getMetaData().sort(); if (indexSort == null || indexSort.getSort().length == 0 || indexSort.getSort()[0].getField().equals(field) == false) { return null; } + final int minDocID; + final int maxDocID; final boolean reverse = indexSort.getSort()[0].getReverse(); - - PointValues points = context.reader().getPointValues(field); - if (points == null) { - return null; - } - - if (points.getNumDimensions() != 1) { - return null; - } - - if (points.getBytesPerDimension() != Long.BYTES - && points.getBytesPerDimension() != Integer.BYTES) { - return null; - } - - if (points.size() != points.getDocCount()) { - return null; - } - - assert lowerValue <= upperValue; - byte[] queryLowerPoint; - byte[] queryUpperPoint; - if (points.getBytesPerDimension() == Integer.BYTES) { - queryLowerPoint = IntPoint.pack((int) lowerValue).bytes; - queryUpperPoint = IntPoint.pack((int) upperValue).bytes; - } else { - queryLowerPoint = LongPoint.pack(lowerValue).bytes; - queryUpperPoint = LongPoint.pack(upperValue).bytes; - } - if (matchNone(points, queryLowerPoint, queryUpperPoint)) { - return IteratorAndCount.empty(); - } - if (matchAll(points, queryLowerPoint, queryUpperPoint)) { - int maxDoc = context.reader().maxDoc(); - if (points.getDocCount() == maxDoc) { - return IteratorAndCount.all(maxDoc); - } else { - return IteratorAndCount.sparseRange(0, maxDoc, delegate); - } - } - - int minDocId, maxDocId; - final ByteArrayComparator comparator = - ArrayUtil.getUnsignedComparator(points.getBytesPerDimension()); - if (reverse) { - minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1; - } else { - minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false); - if (minDocId == -1) { - // No matches - return IteratorAndCount.empty(); + if (skipper.maxValue() <= upperValue) { + minDocID = 0; + } else { + skipper.advance(Long.MIN_VALUE, upperValue); + minDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> l <= upperValue); } - } - - if (reverse) { - maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1; - if (maxDocId == 0) { - // No matches - return IteratorAndCount.empty(); + if (skipper.minValue() >= lowerValue) { + maxDocID = skipper.docCount(); + } else { + skipper.advance(Long.MIN_VALUE, lowerValue); + maxDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> l < lowerValue); } } else { - maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false); - if (maxDocId == -1) { - maxDocId = context.reader().maxDoc(); + if (skipper.minValue() >= lowerValue) { + minDocID = 0; + } else { + skipper.advance(lowerValue, Long.MAX_VALUE); + minDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> l >= lowerValue); + } + if (skipper.maxValue() <= upperValue) { + maxDocID = skipper.docCount(); + } else { + skipper.advance(upperValue, Long.MAX_VALUE); + maxDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> l > upperValue); } } - if (minDocId == maxDocId) { - return IteratorAndCount.empty(); - } - - if ((points.getDocCount() == context.reader().maxDoc())) { - return IteratorAndCount.denseRange(minDocId, maxDocId); - } else { - return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate); - } + return minDocID == maxDocID + ? IteratorAndCount.empty() + : IteratorAndCount.denseRange(minDocID, maxDocID); } private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) throws IOException { if (lowerValue > upperValue) { return IteratorAndCount.empty(); } - SortedNumericDocValues sortedNumericValues = - DocValues.getSortedNumeric(context.reader(), field); - NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues); - if (numericValues != null) { - IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues); - if (itAndCount != null) { - return itAndCount; + DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field); + if (skipper != null) { + if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) { + return null; Review Comment: I don't think you want to return `null` here. The best thing to do is to make sure the `scorerSupplier` call actually returns `null` (indicating it cannot provide any hits). The way you have this implemented, a `null` return here will force `scorerSupplier` to not use the optimization at all. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org