jpountz commented on code in PR #12405: URL: https://github.com/apache/lucene/pull/12405#discussion_r1261034033
########## lucene/core/src/java/org/apache/lucene/search/TopDocs.java: ########## @@ -159,7 +159,12 @@ public MergeSortQueue(Sort sort, TopDocs[] shardHits, Comparator<ScoreDoc> tieBr reverseMul = new int[sortFields.length]; for (int compIDX = 0; compIDX < sortFields.length; compIDX++) { final SortField sortField = sortFields[compIDX]; - comparators[compIDX] = sortField.getComparator(1, compIDX == 0); + comparators[compIDX] = + sortField.getComparator( + 1, + compIDX == 0 + ? (sortFields.length > 1 ? Pruning.SKIP : Pruning.SKIP_MORE) + : Pruning.NONE); Review Comment: actually TopDocs#merge can never skip so we could use Pruning.NONE all the time here for simplicity ########## lucene/core/src/java/org/apache/lucene/search/Pruning.java: ########## @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +/** Controls {@link LeafFieldComparator} how to skip documents */ +public enum Pruning { + /** Not allowed to skip documents. */ + NONE, + /** Allowed to skip documents. */ Review Comment: Add a bit more docs about what/how docs may be skipped, e.g. ```suggestion /** Allowed to skip documents that compare strictly better than the top value, or strictly worse than the bottom value. */ ``` ########## lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java: ########## @@ -309,34 +323,100 @@ private void updateSkipInterval(boolean success) { } } - @Override - public DocIdSetIterator competitiveIterator() { - if (enableSkipping == false) return null; - return new DocIdSetIterator() { - private int docID = competitiveIterator.docID(); + private class CompetitiveIterator extends DocIdSetIterator { + + private final LeafReaderContext context; + private final int maxDoc; + private final String field; + private int doc = -1; + private DocIdSetIterator docsWithDocValue; + private DocIdSetIterator docsWithPoint; + private final byte[] minPackedValue; + private final byte[] maxPackedValue; + private final boolean skipWithDocValues; + + CompetitiveIterator( + LeafReaderContext context, + String field, + boolean skipWithDocValues, + byte[] minPackedValue, + byte[] maxPackedValue) { + this.context = context; + this.maxDoc = context.reader().maxDoc(); + this.field = field; + this.skipWithDocValues = skipWithDocValues; + this.minPackedValue = minPackedValue; + this.maxPackedValue = maxPackedValue; + } - @Override - public int nextDoc() throws IOException { - return advance(docID + 1); - } + @Override + public int docID() { + return doc; + } - @Override - public int docID() { - return docID; - } + @Override + public int nextDoc() throws IOException { + return advance(docID() + 1); + } - @Override - public long cost() { - return competitiveIterator.cost(); + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } else if (docsWithPoint != null) { + assert hitsThresholdReached == true; + return doc = docsWithPoint.advance(target); + } else if (docsWithDocValue != null) { + assert hitsThresholdReached == true; + return doc = docsWithDocValue.advance(target); + } else { + return doc = target; } + } - @Override - public int advance(int target) throws IOException { - return docID = competitiveIterator.advance(target); + @Override + public long cost() { + return context.reader().maxDoc(); + } + + private void setDocsWithDocValue() throws IOException { + // if dense == true, all documents have docValues, no sense to skip by docValues + // docsWithDocValue need only init once + // if missing values are always competitive, we can never skip via doc values + if (skipWithDocValues + && docsWithDocValue == null + && isMissingValueNotCompetitive(minPackedValue, maxPackedValue)) { + this.docsWithDocValue = getNumericDocValues(context, field); } - }; + } + + private void updateDocsWithPoint(DocIdSetIterator iterator) { + this.docsWithPoint = iterator; + } + + private long docsWithPointCost() { + return docsWithPoint.cost(); + } } + @Override + public DocIdSetIterator competitiveIterator() { + return competitiveIterator; + } + + /** + * if reverse == true, missing value is non-competitive when it less than minPackedValue, if + * reverse == false, missing value is non-competitive when it great than maxPackedValue + */ + protected abstract boolean isMissingValueNotCompetitive( + byte[] minPackedValue, byte[] maxPackedValue); Review Comment: I don't think we need this anymore? It should alreay be covered by the changes that you made to `isMissingValueCompetitive()`? ########## lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java: ########## @@ -61,8 +63,12 @@ public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws I /** Leaf comparator for {@link DoubleComparator} that provides skipping functionality */ public class DoubleLeafComparator extends NumericLeafComparator { + private final byte[] deltaOne; + public DoubleLeafComparator(LeafReaderContext context) throws IOException { super(context); + deltaOne = new byte[Double.BYTES]; + DoublePoint.encodeDimension(1d, deltaOne, 0); Review Comment: This doesn't look correct to me. E.g. if the bottom value is 4 and the sort is ascending, this would mistakenly ignore 3.5. We need to go to the next double, which can be done by subtracting 1 to the encoded representation: ```suggestion deltaOne[deltaOne.length-1] = 1; ``` ########## lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java: ########## @@ -98,19 +99,30 @@ public void copy(int slot, int doc) throws IOException { @Override protected boolean isMissingValueCompetitive() { int result = Integer.compare(missingValue, bottom); - // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, - // in asc sort missingValue is competitive when it's smaller or equal to bottom - return reverse ? (result >= 0) : (result <= 0); + return reverse + ? (pruning == Pruning.SKIP_MORE ? result > 0 : result >= 0) + : (pruning == Pruning.SKIP_MORE ? result < 0 : result <= 0); } @Override protected void encodeBottom(byte[] packedValue) { - IntPoint.encodeDimension(bottom, packedValue, 0); + if (pruning == Pruning.SKIP_MORE) { + IntPoint.encodeDimension(reverse ? bottom + 1 : bottom - 1, packedValue, 0); + } else { + IntPoint.encodeDimension(bottom, packedValue, 0); + } Review Comment: Nit: I would prefer this +1/-1 logic to be in `NumericComparator` and make `encoreBottom` just encode the bottom value. Also note that we need to handle overflows, e.g. `bottom` could be Integer.MIN_VALUE already, so subtracting 1 would underflow. When this happens, it means that there wouldn't be any competitive hits anymore. ########## lucene/core/src/java/org/apache/lucene/search/Pruning.java: ########## @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +/** Controls {@link LeafFieldComparator} how to skip documents */ +public enum Pruning { + /** Not allowed to skip documents. */ + NONE, + /** Allowed to skip documents. */ + SKIP, + /** + * Allowed to skip documents. {@link LeafFieldComparator} is the only comparator so that could do Review Comment: Likewise here, something like: `Allowed to skip documents that compare strictly better than the top value, or worse than or equal to the bottom value.` I would like to have a better name than `SKIP_MORE`, which doesn't tell much about what more may be skipped. I'm not completely satisfied with my initial proposal but I think it was still a bit better? Pruning.NONE, Pruning.GREATER_THAN (replaces SKIP) and Pruning.GREATER_THAN_OR_EQUAL_TO (replaces SKIP_MORE)? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org