jfreden commented on code in PR #13036: URL: https://github.com/apache/lucene/pull/13036#discussion_r1468906825
########## lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java: ########## @@ -249,10 +249,74 @@ BulkScorer optionalBulkScorer(LeafReaderContext context) throws IOException { return optional.get(0); } + // Calculate count(clause1 OR clause2) as count(clause1) + count(clause2) - count(clause1 AND + // clause2) + if (scoreMode == ScoreMode.COMPLETE_NO_SCORES + && context.reader().hasDeletions() == false + && query.isTwoClauseDisjunctionWithTerms()) { + return twoClauseTermDisjunctionOptimizedScorer(context); + } + return new BooleanScorer( this, optional, Math.max(1, query.getMinimumNumberShouldMatch()), scoreMode.needsScores()); } + private BulkScorer twoClauseTermDisjunctionOptimizedScorer(LeafReaderContext context) + throws IOException { + List<Scorer> optionalScorers = new ArrayList<>(); + final int[] clauseDocFreqSum = new int[1]; + for (WeightedBooleanClause wc : weightedClauses) { + clauseDocFreqSum[0] += wc.weight.count(context); + ScorerSupplier scorerSupplier = wc.weight.scorerSupplier(context); + if (scorerSupplier != null) { + optionalScorers.add(scorerSupplier.get(Long.MAX_VALUE)); + } + } + + final ConjunctionBulkScorer conjunctionBulkScorer = + optionalScorers.size() == 2 ? new ConjunctionBulkScorer(List.of(), optionalScorers) : null; + return new BulkScorer() { + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) + throws IOException { + final int[] intersectionScore = new int[1]; + LeafCollector intersectionCollector = + new LeafCollector() { + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) { + intersectionScore[0]++; + } + + @Override + public void collect(DocIdStream stream) throws IOException { + intersectionScore[0] += stream.count(); + } + }; + + int leadDocId = 0; + if (conjunctionBulkScorer != null) { + leadDocId = conjunctionBulkScorer.score(intersectionCollector, acceptDocs, min, max); + } + + for (int i = 1; i <= clauseDocFreqSum[0] - intersectionScore[0]; i++) { + collector.collect(i); Review Comment: Thanks for looking at this! That helps a lot. Wasn't sure how to proceed since I couldn't come up with a nice way to do this without modifying `IndexSearcher#count` (felt unsure about this since I couldn't find any similar optimizations in `IndexSearcher`) or breaking the contract with `LeafCollector` (like I ended up doing, but only works for the count case where the doc ids are discarded). I've pushed a change to do this in `IndexSearcher#count` instead. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org