dweiss commented on code in PR #12881: URL: https://github.com/apache/lucene/pull/12881#discussion_r1417504745
########## lucene/highlighter/src/java/org/apache/lucene/search/matchhighlight/MatchRegionRetriever.java: ########## @@ -199,34 +343,95 @@ public void highlightDocument( LeafReaderContext leafReaderContext, int contextDocId, FieldValueProvider doc, - Predicate<String> acceptField, + ToIntFunction<String> maxHitsPerField, Map<String, List<OffsetRange>> outputHighlights) throws IOException { Matches matches = weight.matches(leafReaderContext, contextDocId); if (matches == null) { return; } - for (String field : affectedFields) { - if (acceptField.test(field)) { - MatchesIterator matchesIterator = matches.getMatches(field); - if (matchesIterator == null) { - // No matches on this field, even though the field was part of the query. This may be - // possible - // with complex queries that source non-text fields (have no "hit regions" in any textual - // representation). Skip. - } else { - OffsetsRetrievalStrategy offsetStrategy = offsetStrategies.get(field); - if (offsetStrategy == null) { - throw new IOException( - "Non-empty matches but no offset retrieval strategy for field: " + field); - } - List<OffsetRange> ranges = offsetStrategy.get(matchesIterator, doc); - if (!ranges.isEmpty()) { - outputHighlights.put(field, ranges); - } + for (String field : queryAffectedHighlightedFields) { + MatchesIterator matchesIterator = matches.getMatches(field); + if (matchesIterator == null) { + // No matches on this field, even though the field was part of the query. This may be + // possible + // with complex queries that source non-text fields (have no "hit regions" in any textual + // representation). Skip. + } else { + OffsetsRetrievalStrategy offsetStrategy = offsetStrategies.get(field); + if (offsetStrategy == null) { + throw new IOException( + "Non-empty matches but no offset retrieval strategy for field: " + field); } + var delegate = offsetStrategy; + + // Limit the number of hits so that we're not extracting dozens just to trim them to a few + // in the end. + final int maxHits = maxHitsPerField.applyAsInt(field); + if (maxHits != Integer.MAX_VALUE) { + offsetStrategy = + (matchesIterator1, doc1) -> + delegate.get(new MatchesIteratorWithLimit(matchesIterator1, maxHits), doc1); + } + + List<OffsetRange> ranges = offsetStrategy.get(matchesIterator, doc); + if (!ranges.isEmpty()) { + outputHighlights.put(field, ranges); + } + } + } + } + + private static class MatchesIteratorWithLimit implements MatchesIterator { Review Comment: Will correct it, thanks Alan. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org