msokolov commented on code in PR #13984:
URL: https://github.com/apache/lucene/pull/13984#discussion_r1840202966


##########
lucene/core/src/java/org/apache/lucene/index/CheckIndex.java:
##########
@@ -2746,6 +2781,106 @@ public static Status.VectorValuesStatus testVectors(
     return status;
   }
 
+  /** Test the HNSW graph. */
+  public static Status.HnswGraphsStatus testHnswGraphs(
+      CodecReader reader, PrintStream infoStream, boolean failFast) throws 
IOException {
+    if (infoStream != null) {
+      infoStream.print("    test: hnsw graphs.........");
+    }
+    long startNS = System.nanoTime();
+    Status.HnswGraphsStatus status = new Status.HnswGraphsStatus();
+    KnnVectorsReader vectorsReader = reader.getVectorReader();
+    FieldInfos fieldInfos = reader.getFieldInfos();
+
+    try {
+      if (fieldInfos.hasVectorValues()) {
+        for (FieldInfo fieldInfo : fieldInfos) {
+          if (fieldInfo.hasVectorValues()) {
+            if (vectorsReader instanceof 
PerFieldKnnVectorsFormat.FieldsReader) {
+              KnnVectorsReader fieldReader =
+                  ((PerFieldKnnVectorsFormat.FieldsReader) vectorsReader)
+                      .getFieldReader(fieldInfo.name);
+              if (fieldReader instanceof HnswGraphProvider) {
+                HnswGraph hnswGraph = ((HnswGraphProvider) 
fieldReader).getGraph(fieldInfo.name);
+                testHnswGraph(hnswGraph, fieldInfo.name, status);
+              }
+            }
+          }
+        }
+      }
+      StringJoiner hnswGraphResultJoiner = new StringJoiner(", ");
+      for (Map.Entry<String, Status.HnswGraphStatus> hnswGraphStatus :
+          status.hnswGraphsStatusByField.entrySet()) {
+        hnswGraphResultJoiner.add(
+            String.format(
+                Locale.ROOT,
+                "(field name: %s, levels: %d, total nodes: %d)",
+                hnswGraphStatus.getKey(),
+                hnswGraphStatus.getValue().numLevels,
+                hnswGraphStatus.getValue().totalNumNodes));
+      }
+      msg(
+          infoStream,
+          String.format(
+              Locale.ROOT,
+              "OK [%d fields%s] [took %.3f sec]",
+              status.hnswGraphsStatusByField.size(),
+              hnswGraphResultJoiner.toString().isEmpty() ? "" : ": " + 
hnswGraphResultJoiner,
+              nsToSec(System.nanoTime() - startNS)));
+    } catch (Throwable e) {
+      if (failFast) {
+        throw IOUtils.rethrowAlways(e);
+      }
+      msg(infoStream, "ERROR: " + e);
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+
+    return status;
+  }
+
+  private static void testHnswGraph(
+      HnswGraph hnswGraph, String fieldName, Status.HnswGraphsStatus status)
+      throws IOException, CheckIndexException {
+    if (hnswGraph != null) {
+      status.hnswGraphsStatusByField.put(fieldName, new 
Status.HnswGraphStatus());
+      final int numLevels = hnswGraph.numLevels();
+      // Perform tests on each level of the HNSW graph
+      for (int level = numLevels - 1; level >= 0; level--) {
+        HnswGraph.NodesIterator nodesIterator = 
hnswGraph.getNodesOnLevel(level);
+        while (nodesIterator.hasNext()) {
+          int node = nodesIterator.nextInt();
+          hnswGraph.seek(level, node);
+          int nbr, lastNeighbor = -1, firstNeighbor = -1;
+          while ((nbr = hnswGraph.nextNeighbor()) != NO_MORE_DOCS) {
+            if (firstNeighbor == -1) {

Review Comment:
   perhaps at some point we could also check for uniqueness of the neighbors, 
and also check that the neighbors are in range [0, numGraphNodes], and finally 
on levels > 0, we would want to assert that the neighbors are on this level. 
But this can all be done separately; maybe we could add a comment here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to