benwtrent commented on code in PR #13910:
URL: https://github.com/apache/lucene/pull/13910#discussion_r1803130185


##########
lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java:
##########
@@ -1906,4 +1916,122 @@ public void testMismatchedFields() throws Exception {
 
     IOUtils.close(reader, w2, dir1, dir2);
   }
+
+  /**
+   * Test that the query is a viable approximation to exact search. This test 
is designed to uncover
+   * gross failures only, not to represent the true expected recall.
+   */
+  public void testRecall() throws IOException {
+    VectorSimilarityFunction vectorSimilarityFunction = 
VectorSimilarityFunction.EUCLIDEAN;
+    int dim = 16;
+    try (Directory indexStore = getKnownIndexStore("field", dim, 
vectorSimilarityFunction);
+        IndexReader reader = DirectoryReader.open(indexStore)) {
+      IndexSearcher searcher = newSearcher(reader);
+      float[] queryEmbedding = new float[dim];
+      String queryString = "Apache License";
+      computeLineEmbedding(queryString, queryEmbedding);
+      // computeLineEmbedding("   END OF TERMS AND CONDITIONS", 
queryEmbedding);
+      // pass match-all "filter" to force full traversal, bypassing graph
+      KnnFloatVectorQuery exactQuery =
+          new KnnFloatVectorQuery("field", queryEmbedding, 1000, new 
MatchAllDocsQuery());
+      // indexed 421 lines from LICENSE.txt
+      // indexed 157 lines from NOTICE.txt
+      assertEquals(578, searcher.count(exactQuery)); // Same for exact search
+      KnnFloatVectorQuery query = new KnnFloatVectorQuery("field", 
queryEmbedding, 10);
+      assertEquals(10, searcher.count(query)); // Expect some results without 
timeout
+      TopDocs results = searcher.search(query, 10);
+      Set<Integer> resultDocs = new HashSet<>();
+      for (ScoreDoc scoreDoc : results.scoreDocs) {
+        /*
+        System.out.println(
+            "result " + i++ + ": " + 
reader.storedFields().document(scoreDoc.doc) + " " + scoreDoc);
+        */
+        resultDocs.add(scoreDoc.doc);
+      }
+      TopDocs expected = searcher.search(exactQuery, 10);
+      // int i = 0;
+      int recalled = 0;
+      for (ScoreDoc scoreDoc : expected.scoreDocs) {
+        /*
+        System.out.println(
+            "expected "
+                + i++
+                + ": "
+                + reader.storedFields().document(scoreDoc.doc)
+                + " "
+                + scoreDoc);
+        */
+        if (resultDocs.contains(scoreDoc.doc)) {
+          ++recalled;
+        }
+      }
+      assertTrue("recall should be at least 5/10, got " + recalled, recalled 
>= 5);

Review Comment:
   I think having an `assertAvgRecall` in the base class that can be overridden 
would be really nice.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to