dweiss commented on code in PR #12881:
URL: https://github.com/apache/lucene/pull/12881#discussion_r1417504745


##########
lucene/highlighter/src/java/org/apache/lucene/search/matchhighlight/MatchRegionRetriever.java:
##########
@@ -199,34 +343,95 @@ public void highlightDocument(
       LeafReaderContext leafReaderContext,
       int contextDocId,
       FieldValueProvider doc,
-      Predicate<String> acceptField,
+      ToIntFunction<String> maxHitsPerField,
       Map<String, List<OffsetRange>> outputHighlights)
       throws IOException {
     Matches matches = weight.matches(leafReaderContext, contextDocId);
     if (matches == null) {
       return;
     }
 
-    for (String field : affectedFields) {
-      if (acceptField.test(field)) {
-        MatchesIterator matchesIterator = matches.getMatches(field);
-        if (matchesIterator == null) {
-          // No matches on this field, even though the field was part of the 
query. This may be
-          // possible
-          // with complex queries that source non-text fields (have no "hit 
regions" in any textual
-          // representation). Skip.
-        } else {
-          OffsetsRetrievalStrategy offsetStrategy = 
offsetStrategies.get(field);
-          if (offsetStrategy == null) {
-            throw new IOException(
-                "Non-empty matches but no offset retrieval strategy for field: 
" + field);
-          }
-          List<OffsetRange> ranges = offsetStrategy.get(matchesIterator, doc);
-          if (!ranges.isEmpty()) {
-            outputHighlights.put(field, ranges);
-          }
+    for (String field : queryAffectedHighlightedFields) {
+      MatchesIterator matchesIterator = matches.getMatches(field);
+      if (matchesIterator == null) {
+        // No matches on this field, even though the field was part of the 
query. This may be
+        // possible
+        // with complex queries that source non-text fields (have no "hit 
regions" in any textual
+        // representation). Skip.
+      } else {
+        OffsetsRetrievalStrategy offsetStrategy = offsetStrategies.get(field);
+        if (offsetStrategy == null) {
+          throw new IOException(
+              "Non-empty matches but no offset retrieval strategy for field: " 
+ field);
         }
+        var delegate = offsetStrategy;
+
+        // Limit the number of hits so that we're not extracting dozens just 
to trim them to a few
+        // in the end.
+        final int maxHits = maxHitsPerField.applyAsInt(field);
+        if (maxHits != Integer.MAX_VALUE) {
+          offsetStrategy =
+              (matchesIterator1, doc1) ->
+                  delegate.get(new MatchesIteratorWithLimit(matchesIterator1, 
maxHits), doc1);
+        }
+
+        List<OffsetRange> ranges = offsetStrategy.get(matchesIterator, doc);
+        if (!ranges.isEmpty()) {
+          outputHighlights.put(field, ranges);
+        }
+      }
+    }
+  }
+
+  private static class MatchesIteratorWithLimit implements MatchesIterator {

Review Comment:
   Will correct it, thanks Alan.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to