msokolov commented on a change in pull request #656: URL: https://github.com/apache/lucene/pull/656#discussion_r806991813
########## File path: lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java ########## @@ -70,18 +121,125 @@ public Query rewrite(IndexReader reader) throws IOException { return createRewrittenQuery(reader, topK); } - private TopDocs searchLeaf(LeafReaderContext ctx, int kPerLeaf) throws IOException { - Bits liveDocs = ctx.reader().getLiveDocs(); - TopDocs results = ctx.reader().searchNearestVectors(field, target, kPerLeaf, liveDocs); - if (results == null) { + private TopDocs searchLeaf(LeafReaderContext ctx, BitSetCollector filterCollector) + throws IOException { + + if (filterCollector == null) { + Bits acceptDocs = ctx.reader().getLiveDocs(); + return approximateSearch(ctx, acceptDocs, Integer.MAX_VALUE); + } else { + BitSetIterator filterIterator = filterCollector.getIterator(ctx.ord); + if (filterIterator == null || filterIterator.cost() == 0) { + return NO_RESULTS; + } + + if (filterIterator.cost() <= k) { + // If there <= k possible matches, short-circuit and perform exact search, since HNSW must Review comment: "If there are" ########## File path: lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java ########## @@ -58,6 +60,12 @@ protected KnnVectorsReader() {} * true k closest neighbors. For large values of k (for example when k is close to the total * number of documents), the search may also retrieve fewer than k documents. * + * <p>The returned {@link TopDocs} will contain a {@link ScoreDoc} for each nearest neighbor, in + * order of their similarity to the query vector (decreasing scores). The {@link TotalHits} + * contains the number of documents visited during the search. If the search stopped early because + * it hit {@code visitedLimit}, it is indicated through the relation {@code Review comment: Would it be enough to know that `TopDocs.totalHits.value==visitedLimit`? Do we need to use the relation as a sentinel? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org