Re: [PR] DocValuesSkipper implementation in IndexSortSorted [lucene]

via GitHub Fri, 01 Nov 2024 12:51:29 -0700


gsmiller commented on code in PR #13886:
URL: https://github.com/apache/lucene/pull/13886#discussion_r1826238079



##########
lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java:
##########
@@ -397,106 +413,80 @@ private boolean matchAll(PointValues points, byte[] 
queryLowerPoint, byte[] quer
   }
 
   private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(
-      LeafReaderContext context, DocIdSetIterator delegate) throws IOException 
{
-    Sort indexSort = context.reader().getMetaData().sort();
+      LeafReader reader, NumericDocValues numericDocValues, DocValuesSkipper 
skipper)
+      throws IOException {
+    if (skipper.docCount() != reader.maxDoc()) {
+      return null;
+    }
+    final Sort indexSort = reader.getMetaData().sort();
     if (indexSort == null
         || indexSort.getSort().length == 0
         || indexSort.getSort()[0].getField().equals(field) == false) {
       return null;
     }
 
+    final int minDocID;
+    final int maxDocID;
     final boolean reverse = indexSort.getSort()[0].getReverse();
-
-    PointValues points = context.reader().getPointValues(field);
-    if (points == null) {
-      return null;
-    }
-
-    if (points.getNumDimensions() != 1) {
-      return null;
-    }
-
-    if (points.getBytesPerDimension() != Long.BYTES
-        && points.getBytesPerDimension() != Integer.BYTES) {
-      return null;
-    }
-
-    if (points.size() != points.getDocCount()) {
-      return null;
-    }
-
-    assert lowerValue <= upperValue;
-    byte[] queryLowerPoint;
-    byte[] queryUpperPoint;
-    if (points.getBytesPerDimension() == Integer.BYTES) {
-      queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
-      queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
-    } else {
-      queryLowerPoint = LongPoint.pack(lowerValue).bytes;
-      queryUpperPoint = LongPoint.pack(upperValue).bytes;
-    }
-    if (matchNone(points, queryLowerPoint, queryUpperPoint)) {
-      return IteratorAndCount.empty();
-    }
-    if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
-      int maxDoc = context.reader().maxDoc();
-      if (points.getDocCount() == maxDoc) {
-        return IteratorAndCount.all(maxDoc);
-      } else {
-        return IteratorAndCount.sparseRange(0, maxDoc, delegate);
-      }
-    }
-
-    int minDocId, maxDocId;
-    final ByteArrayComparator comparator =
-        ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
-
     if (reverse) {
-      minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, 
comparator, true) + 1;
-    } else {
-      minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, 
comparator, false);
-      if (minDocId == -1) {
-        // No matches
-        return IteratorAndCount.empty();
+      if (skipper.maxValue() <= upperValue) {
+        minDocID = 0;
+      } else {
+        skipper.advance(Long.MIN_VALUE, upperValue);
+        minDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l <= upperValue);
       }
-    }
-
-    if (reverse) {
-      maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, 
comparator, true) + 1;
-      if (maxDocId == 0) {
-        // No matches
-        return IteratorAndCount.empty();
+      if (skipper.minValue() >= lowerValue) {
+        maxDocID = skipper.docCount();
+      } else {
+        skipper.advance(Long.MIN_VALUE, lowerValue);
+        maxDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l < lowerValue);
       }
     } else {
-      maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, 
comparator, false);
-      if (maxDocId == -1) {
-        maxDocId = context.reader().maxDoc();
+      if (skipper.minValue() >= lowerValue) {
+        minDocID = 0;
+      } else {
+        skipper.advance(lowerValue, Long.MAX_VALUE);
+        minDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l >= lowerValue);
+      }
+      if (skipper.maxValue() <= upperValue) {
+        maxDocID = skipper.docCount();
+      } else {
+        skipper.advance(upperValue, Long.MAX_VALUE);
+        maxDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l > upperValue);
       }
     }
 
-    if (minDocId == maxDocId) {
-      return IteratorAndCount.empty();
-    }
-
-    if ((points.getDocCount() == context.reader().maxDoc())) {
-      return IteratorAndCount.denseRange(minDocId, maxDocId);
-    } else {
-      return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate);
-    }
+    return minDocID == maxDocID
+        ? IteratorAndCount.empty()
+        : IteratorAndCount.denseRange(minDocID, maxDocID);
   }
 
   private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext 
context) throws IOException {
     if (lowerValue > upperValue) {
       return IteratorAndCount.empty();
     }
 
-    SortedNumericDocValues sortedNumericValues =
-        DocValues.getSortedNumeric(context.reader(), field);
-    NumericDocValues numericValues = 
DocValues.unwrapSingleton(sortedNumericValues);
-    if (numericValues != null) {
-      IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, 
numericValues);
-      if (itAndCount != null) {
-        return itAndCount;
+    DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
+    if (skipper != null) {
+      if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
+        return null;

Review Comment:
   This isn't quite what I mean. The optimization you're looking for is to have 
the `#scoreSupplier` method itself return a `null` instance when you know there 
can be no matching hits in the given leaf. In this specific situation, if you 
_do_ have a skipper and you learn that the `skipper.minValue() > upperValue || 
skipper.maxValue() < lowerValue` condition holds `true`, then you want to find 
a way to return a `null` `ScoreSupplier` instance (not act as if there is no 
skipper). So I think this means trying to load your `skipper` earlier in the 
call stack—within the `#scorerSupplier` method itself—and doing the check 
there. Does this make sense?
   
   Or, actually, on second thought, maybe the easiest thing to do is to keep 
the skipper loading where you have it but just explicitly return `return 
IteratorAndCount.empty();`  if `skipper.minValue() > upperValue || 
skipper.maxValue() < lowerValue` is `true`. That's probably just as good and 
keeps the code a little simpler. Maybe that's the best thing...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] DocValuesSkipper implementation in IndexSortSorted [lucene]

Reply via email to