dweiss commented on code in PR #12881: URL: https://github.com/apache/lucene/pull/12881#discussion_r1417931314
########## lucene/highlighter/src/java/org/apache/lucene/search/matchhighlight/MatchRegionRetriever.java: ########## @@ -53,99 +58,173 @@ public class MatchRegionRetriever { private final List<LeafReaderContext> leaves; private final Weight weight; - private final TreeSet<String> affectedFields; private final Map<String, OffsetsRetrievalStrategy> offsetStrategies; - private final Set<String> preloadFields; + private final TreeSet<String> queryAffectedHighlightedFields; + private final Predicate<String> shouldLoadStoredField; + private final IndexSearcher searcher; /** - * A callback for accepting a single document (and its associated leaf reader, leaf document ID) - * and its match offset ranges, as indicated by the {@link Matches} interface retrieved for the - * query. + * A callback invoked for each document selected by the query. The callback receives a list of hit + * ranges, document field value accessor, the leaf reader and document ID of the document. */ @FunctionalInterface public interface MatchOffsetsConsumer { + /** + * @param docId Document id (global). + * @param leafReader Document's {@link LeafReader}. + * @param leafDocId Document id (within the {@link LeafReader}). + * @param fieldValueProvider Access to preloaded document fields. See the {@link + * MatchRegionRetriever#MatchRegionRetriever(IndexSearcher, Query, + * OffsetsRetrievalStrategySupplier, Predicate, Predicate)} constructor's documentation for + * guidelines on which fields are available through this interface. + * @param hits A map of field names and offset ranges with query hits. + */ void accept( - int docId, LeafReader leafReader, int leafDocId, Map<String, List<OffsetRange>> hits) + int docId, + LeafReader leafReader, + int leafDocId, + FieldValueProvider fieldValueProvider, + Map<String, List<OffsetRange>> hits) throws IOException; } /** - * An abstraction that provides document values for a given field. The default implementation just - * reaches to a preloaded {@link Document}. It is possible to write a more efficient - * implementation on top of a reusable character buffer (that reuses the buffer while retrieving - * hit regions for documents). + * Access to field values of the highlighted document. See the {@link + * MatchRegionRetriever#MatchRegionRetriever(IndexSearcher, Query, + * OffsetsRetrievalStrategySupplier, Predicate, Predicate)} constructor's documentation for + * guidelines on which fields are available through this interface. */ - @FunctionalInterface - public interface FieldValueProvider { - List<CharSequence> getValues(String field) throws IOException; + public interface FieldValueProvider extends Iterable<String> { + /** + * @return Return a list of values for the provided field name or {@code null} if the field is + * not loaded or does not exist for the field. + */ + List<String> getValues(String field); } /** - * A constructor with the default offset strategy supplier. + * This constructor uses the default offset strategy supplier from {@link + * #computeOffsetRetrievalStrategies(IndexReader, Analyzer)}. * + * @param searcher The {@link IndexSearcher} to use for executing the query. Review Comment: Added more documentation, hopefully the wording is clear. These are rather low-level utilities but they're very helpful in getting better highlighting performance. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org