cpoerschke commented on code in PR #13635: URL: https://github.com/apache/lucene/pull/13635#discussion_r1778412763
########## lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java: ########## @@ -608,6 +614,94 @@ public void testRandomWithFilter() throws IOException { } } + /** Tests with random vectors and a random seed. Uses RandomIndexWriter. */ + public void testRandomWithSeed() throws IOException { + int numDocs = 1000; + int dimension = atLeast(5); + int numIters = atLeast(10); + try (Directory d = newDirectory()) { + // Always use the default kNN format to have predictable behavior around when it hits + // visitedLimit. This is fine since the test targets AbstractKnnVectorQuery logic, not the kNN + // format + // implementation. + IndexWriterConfig iwc = new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + if (random() + .nextBoolean()) { // Randomly skip some vectors to test the mapping from docid to + // ordinals + doc.add(getKnnVectorField("field", randomVector(dimension))); + } + doc.add(new NumericDocValuesField("tag", i)); + doc.add(new IntPoint("tag", i)); + w.addDocument(doc); + } + w.forceMerge(1); + w.close(); + + try (IndexReader reader = DirectoryReader.open(d)) { + IndexSearcher searcher = newSearcher(reader); + for (int i = 0; i < numIters; i++) { + int k = random().nextInt(80) + 1; + int n = random().nextInt(100) + 1; + + // All documents as seeds + Query seed1 = new MatchAllDocsQuery(); + AbstractKnnVectorQuery query = + getKnnVectorQuery("field", randomVector(dimension), k, null, seed1); + TopDocs results = searcher.search(query, n); + int expected = Math.min(Math.min(n, k), reader.numDocs()); Review Comment: Instead of `reader.numDocs()` should this be the number of docs that have the vector field? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org