cpoerschke commented on code in PR #13635: URL: https://github.com/apache/lucene/pull/13635#discussion_r1778419593
########## lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java: ########## @@ -608,6 +614,94 @@ public void testRandomWithFilter() throws IOException { } } + /** Tests with random vectors and a random seed. Uses RandomIndexWriter. */ + public void testRandomWithSeed() throws IOException { + int numDocs = 1000; + int dimension = atLeast(5); + int numIters = atLeast(10); + try (Directory d = newDirectory()) { + // Always use the default kNN format to have predictable behavior around when it hits + // visitedLimit. This is fine since the test targets AbstractKnnVectorQuery logic, not the kNN + // format + // implementation. + IndexWriterConfig iwc = new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + if (random() + .nextBoolean()) { // Randomly skip some vectors to test the mapping from docid to + // ordinals + doc.add(getKnnVectorField("field", randomVector(dimension))); + } + doc.add(new NumericDocValuesField("tag", i)); + doc.add(new IntPoint("tag", i)); + w.addDocument(doc); + } + w.forceMerge(1); + w.close(); + + try (IndexReader reader = DirectoryReader.open(d)) { + IndexSearcher searcher = newSearcher(reader); + for (int i = 0; i < numIters; i++) { + int k = random().nextInt(80) + 1; + int n = random().nextInt(100) + 1; + + // All documents as seeds + Query seed1 = new MatchAllDocsQuery(); + AbstractKnnVectorQuery query = + getKnnVectorQuery("field", randomVector(dimension), k, null, seed1); + TopDocs results = searcher.search(query, n); + int expected = Math.min(Math.min(n, k), reader.numDocs()); + // we may get fewer results than requested if there are deletions, but this test doesn't + // test that + assert reader.hasDeletions() == false; + assertEquals(expected, results.scoreDocs.length); + assertTrue(results.totalHits.value() >= results.scoreDocs.length); + // verify the results are in descending score order + float last = Float.MAX_VALUE; + for (ScoreDoc scoreDoc : results.scoreDocs) { + assertTrue(scoreDoc.score <= last); + last = scoreDoc.score; + } + + // Restrictive seed query -- 6 documents + Query seed2 = IntPoint.newRangeQuery("tag", 1, 6); + query = getKnnVectorQuery("field", randomVector(dimension), k, null, seed2); + results = searcher.search(query, n); + expected = Math.min(Math.min(n, k), reader.numDocs()); + // we may get fewer results than requested if there are deletions, but this test doesn't + // test that + assert reader.hasDeletions() == false; + assertEquals(expected, results.scoreDocs.length); + assertTrue(results.totalHits.value() >= results.scoreDocs.length); + // verify the results are in descending score order + last = Float.MAX_VALUE; + for (ScoreDoc scoreDoc : results.scoreDocs) { + assertTrue(scoreDoc.score <= last); + last = scoreDoc.score; + } + + // No seed documents -- falls back on full approx search + Query seed3 = new BooleanQuery.Builder().build(); Review Comment: Wondering if `MatchNoDocsQuery()` might work here too? ```suggestion Query seed3 = new MatchNoDocsQuery(); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org