Re: [PR] DocValuesSkipper implementation in IndexSortSorted [lucene]

via GitHub Wed, 23 Oct 2024 06:05:52 -0700


gsmiller commented on code in PR #13886:
URL: https://github.com/apache/lucene/pull/13886#discussion_r1812687585



##########
lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java:
##########
@@ -397,106 +413,80 @@ private boolean matchAll(PointValues points, byte[] 
queryLowerPoint, byte[] quer
   }
 
   private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(
-      LeafReaderContext context, DocIdSetIterator delegate) throws IOException 
{
-    Sort indexSort = context.reader().getMetaData().sort();
+      LeafReader reader, NumericDocValues numericDocValues, DocValuesSkipper 
skipper)
+      throws IOException {
+    if (skipper.docCount() != reader.maxDoc()) {
+      return null;
+    }
+    final Sort indexSort = reader.getMetaData().sort();
     if (indexSort == null
         || indexSort.getSort().length == 0
         || indexSort.getSort()[0].getField().equals(field) == false) {
       return null;
     }
 
+    final int minDocID;
+    final int maxDocID;
     final boolean reverse = indexSort.getSort()[0].getReverse();
-
-    PointValues points = context.reader().getPointValues(field);

Review Comment:
   Do we still want to leverage the points index when possible instead of the 
doc values skipper? I would think it would still be preferable when it fits, 
but maybe not?



##########
lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java:
##########
@@ -20,9 +20,11 @@
 import java.util.ArrayDeque;
 import java.util.Deque;
 import java.util.Objects;
+import java.util.function.LongPredicate;
 import org.apache.lucene.document.IntPoint;

Review Comment:
   Try running `./gradlew spotlessApply` to tidy up this file (you've got a 
couple imports here that are no longer used and spotless will take care of 
fixing this for you)



##########
lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java:
##########
@@ -302,6 +304,20 @@ private static int nextDoc(
     }
   }
 
+  private static int nextDocSkipper(int startDoc, DocIdSetIterator docValues, 
LongPredicate predicate)
+      throws IOException {
+    int doc = docValues.docID();
+    if (startDoc > doc) {
+      doc = docValues.advance(startDoc);
+    }
+    for (; doc < DocIdSetIterator.NO_MORE_DOCS; doc = docValues.nextDoc()) {
+      if (predicate.test(docValues.cost())) {

Review Comment:
   I assume you want `docValues.longValue()` here not `cost()` right? (And we 
need the method signature to take `NumericDocValues` not the more general 
`DISI`).



##########
lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java:
##########
@@ -397,106 +413,80 @@ private boolean matchAll(PointValues points, byte[] 
queryLowerPoint, byte[] quer
   }
 
   private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(
-      LeafReaderContext context, DocIdSetIterator delegate) throws IOException 
{
-    Sort indexSort = context.reader().getMetaData().sort();
+      LeafReader reader, NumericDocValues numericDocValues, DocValuesSkipper 
skipper)
+      throws IOException {
+    if (skipper.docCount() != reader.maxDoc()) {
+      return null;
+    }
+    final Sort indexSort = reader.getMetaData().sort();
     if (indexSort == null
         || indexSort.getSort().length == 0
         || indexSort.getSort()[0].getField().equals(field) == false) {
       return null;
     }
 
+    final int minDocID;
+    final int maxDocID;
     final boolean reverse = indexSort.getSort()[0].getReverse();
-
-    PointValues points = context.reader().getPointValues(field);
-    if (points == null) {
-      return null;
-    }
-
-    if (points.getNumDimensions() != 1) {
-      return null;
-    }
-
-    if (points.getBytesPerDimension() != Long.BYTES
-        && points.getBytesPerDimension() != Integer.BYTES) {
-      return null;
-    }
-
-    if (points.size() != points.getDocCount()) {
-      return null;
-    }
-
-    assert lowerValue <= upperValue;
-    byte[] queryLowerPoint;
-    byte[] queryUpperPoint;
-    if (points.getBytesPerDimension() == Integer.BYTES) {
-      queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
-      queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
-    } else {
-      queryLowerPoint = LongPoint.pack(lowerValue).bytes;
-      queryUpperPoint = LongPoint.pack(upperValue).bytes;
-    }
-    if (matchNone(points, queryLowerPoint, queryUpperPoint)) {
-      return IteratorAndCount.empty();
-    }
-    if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
-      int maxDoc = context.reader().maxDoc();
-      if (points.getDocCount() == maxDoc) {
-        return IteratorAndCount.all(maxDoc);
-      } else {
-        return IteratorAndCount.sparseRange(0, maxDoc, delegate);
-      }
-    }
-
-    int minDocId, maxDocId;
-    final ByteArrayComparator comparator =
-        ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
-
     if (reverse) {
-      minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, 
comparator, true) + 1;
-    } else {
-      minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, 
comparator, false);
-      if (minDocId == -1) {
-        // No matches
-        return IteratorAndCount.empty();
+      if (skipper.maxValue() <= upperValue) {
+        minDocID = 0;
+      } else {
+        skipper.advance(Long.MIN_VALUE, upperValue);
+        minDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l <= upperValue);
       }
-    }
-
-    if (reverse) {
-      maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, 
comparator, true) + 1;
-      if (maxDocId == 0) {
-        // No matches
-        return IteratorAndCount.empty();
+      if (skipper.minValue() >= lowerValue) {
+        maxDocID = skipper.docCount();
+      } else {
+        skipper.advance(Long.MIN_VALUE, lowerValue);
+        maxDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l < lowerValue);
       }
     } else {
-      maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, 
comparator, false);
-      if (maxDocId == -1) {
-        maxDocId = context.reader().maxDoc();
+      if (skipper.minValue() >= lowerValue) {
+        minDocID = 0;
+      } else {
+        skipper.advance(lowerValue, Long.MAX_VALUE);
+        minDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l >= lowerValue);
+      }
+      if (skipper.maxValue() <= upperValue) {
+        maxDocID = skipper.docCount();
+      } else {
+        skipper.advance(upperValue, Long.MAX_VALUE);
+        maxDocID = nextDocSkipper(skipper.minDocID(0), numericDocValues, l -> 
l > upperValue);
       }
     }
 
-    if (minDocId == maxDocId) {
-      return IteratorAndCount.empty();
-    }
-
-    if ((points.getDocCount() == context.reader().maxDoc())) {
-      return IteratorAndCount.denseRange(minDocId, maxDocId);
-    } else {
-      return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate);
-    }
+    return minDocID == maxDocID
+        ? IteratorAndCount.empty()
+        : IteratorAndCount.denseRange(minDocID, maxDocID);
   }
 
   private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext 
context) throws IOException {
     if (lowerValue > upperValue) {
       return IteratorAndCount.empty();
     }
 
-    SortedNumericDocValues sortedNumericValues =
-        DocValues.getSortedNumeric(context.reader(), field);
-    NumericDocValues numericValues = 
DocValues.unwrapSingleton(sortedNumericValues);
-    if (numericValues != null) {
-      IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, 
numericValues);
-      if (itAndCount != null) {
-        return itAndCount;
+    DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
+    if (skipper != null) {
+      if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
+        return null;

Review Comment:
   I don't think you want to return `null` here. The best thing to do is to 
make sure the `scorerSupplier` call actually returns `null` (indicating it 
cannot provide any hits). The way you have this implemented, a `null` return 
here will force `scorerSupplier` to not use the optimization at all.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Re: [PR] DocValuesSkipper implementation in IndexSortSorted [lucene]

Reply via email to