[GitHub] [lucene] LuXugang commented on a diff in pull request #12405: Skip docs with Docvalues in NumericLeafComparator

via GitHub Thu, 27 Jul 2023 00:49:38 -0700


LuXugang commented on code in PR #12405:
URL: https://github.com/apache/lucene/pull/12405#discussion_r1275868914



##########
lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java:
##########
@@ -329,87 +324,65 @@ private void updateSkipInterval(boolean success) {
      * value is 5, we will use a range on [MIN_VALUE, 4].
      */
     private void encodeBottom() {
-      if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO && isBottomMinOrMax() == 
false) {
-        byte[] bottom = new byte[bytesCount];
-        encodeBottom(bottom);
-        if (reverse == false) {
-          NumericUtils.subtract(bytesCount, 0, bottom, deltaOne, 
maxValueAsBytes);
-        } else {
-          NumericUtils.add(bytesCount, 0, bottom, deltaOne, minValueAsBytes);
+      if (reverse == false) {
+        encodeBottom(maxValueAsBytes);
+        if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {
+          NumericUtils.nextDown(maxValueAsBytes);
         }
       } else {
-        encodeBottom(reverse == false ? maxValueAsBytes : minValueAsBytes);
+        encodeBottom(minValueAsBytes);
+        if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {
+          NumericUtils.nextUp(minValueAsBytes);
+        }
       }
     }
 
-    private class CompetitiveIterator extends DocIdSetIterator {
-
-      private final LeafReaderContext context;
-      private final int maxDoc;
-      private final String field;
-      private int doc = -1;
-      private DocIdSetIterator docsWithDocValue;
-      private DocIdSetIterator docsWithPoint;
-      private final boolean skipWithDocValues;
-
-      CompetitiveIterator(LeafReaderContext context, String field, boolean 
skipWithDocValues) {
-        this.context = context;
-        this.maxDoc = context.reader().maxDoc();
-        this.field = field;
-        this.skipWithDocValues = skipWithDocValues;
-      }
-
-      @Override
-      public int docID() {
-        return doc;
-      }
-
-      @Override
-      public int nextDoc() throws IOException {
-        return advance(docID() + 1);
-      }
-
-      @Override
-      public int advance(int target) throws IOException {
-        if (target >= maxDoc) {
-          return doc = NO_MORE_DOCS;
-        } else if (docsWithPoint != null) {
-          assert hitsThresholdReached == true;
-          return doc = docsWithPoint.advance(target);
-        } else if (docsWithDocValue != null) {
-          assert hitsThresholdReached == true;
-          return doc = docsWithDocValue.advance(target);
-        } else {
-          return doc = target;
+    /**
+     * If {@link NumericComparator#pruning} equals {@link 
Pruning#GREATER_THAN_OR_EQUAL_TO}, we
+     * could better tune the {@link 
NumericLeafComparator#maxValueAsBytes}/{@link
+     * NumericLeafComparator#minValueAsBytes}. For instance, if the sort is 
ascending and top value
+     * is 3, we will use a range on [4, MAX_VALUE].
+     */
+    private void encodeTop() {
+      if (reverse == false) {
+        encodeTop(minValueAsBytes);
+        if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {

Review Comment:
   top values could still competitive when queue is not full. instead: 
   `if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO && queueFull)`
   
   see this case:
   ```java
   public void testSortOptimizationEqualValues1() throws IOException{
       final Directory dir = newDirectory();
       IndexWriterConfig config = new 
IndexWriterConfig().setCodec(TestUtil.getDefaultCodec());
       final IndexWriter writer = new IndexWriter(dir, config);
       final int numDocs = atLeast(TEST_NIGHTLY ? 50_000 : 10_000);
       for (int i = 1; i <= numDocs; ++i) {
         final Document doc = new Document();
         doc.add(new NumericDocValuesField("my_field1", 100)); // all docs have 
the same value of my_field1
         doc.add(new IntPoint("my_field1", 100));
         writer.addDocument(doc);
       }
       writer.flush();
       final IndexReader reader = DirectoryReader.open(writer);
       writer.close();
       IndexSearcher searcher = newSearcher(reader, random().nextBoolean(), 
random().nextBoolean(), false);
       final int numHits = 3;
       final int totalHitsThreshold = 3;
       final int afterValue = 100;
       final int afterDocID = 10 + random().nextInt(1000);
       final SortField sortField = new SortField("my_field1", 
SortField.Type.INT);
       final Sort sort = new Sort(sortField);
       FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[] 
{afterValue});
       CollectorManager<TopFieldCollector, TopFieldDocs> manager = 
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
       TopDocs topDocs1 = searcher.search(new MatchAllDocsQuery(), manager);
       assertEquals(topDocs1.scoreDocs.length, numHits);
   
       // without optimization
       sortField.setOptimizeSortWithPoints(false);
       manager = TopFieldCollector.createSharedManager(sort, numHits, after, 
totalHitsThreshold);
       TopDocs topDocs2 = searcher.search(new MatchAllDocsQuery(), manager);
       assertEquals(topDocs1.scoreDocs.length, topDocs2.scoreDocs.length);
       reader.close();
       dir.close();
     }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

[GitHub] [lucene] LuXugang commented on a diff in pull request #12405: Skip docs with Docvalues in NumericLeafComparator

Reply via email to