benchaplin commented on code in PR #13984: URL: https://github.com/apache/lucene/pull/13984#discussion_r1838568093
########## lucene/core/src/java/org/apache/lucene/index/CheckIndex.java: ########## @@ -2746,6 +2769,84 @@ public static Status.VectorValuesStatus testVectors( return status; } + private static HnswGraph getHnswGraph(CodecReader reader) throws IOException { + KnnVectorsReader vectorsReader = reader.getVectorReader(); + if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader) { + vectorsReader = ((PerFieldKnnVectorsFormat.FieldsReader) vectorsReader).getFieldReader("knn"); + if (vectorsReader instanceof HnswGraphProvider) { + return ((HnswGraphProvider) vectorsReader).getGraph("knn"); + } + } + return null; + } + + /** Test the HNSW graph. */ + public static Status.HnswGraphStatus testHnswGraph( + CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException { + if (infoStream != null) { + infoStream.print(" test: hnsw graph.........."); + } + long startNS = System.nanoTime(); + Status.HnswGraphStatus status = new Status.HnswGraphStatus(); + + try { + HnswGraph hnswGraph = getHnswGraph(reader); + if (hnswGraph != null) { + final int numLevels = hnswGraph.numLevels(); + // Perform tests on each level of the HNSW graph + for (int level = numLevels - 1; level >= 0; level--) { + HnswGraph.NodesIterator nodesIterator = hnswGraph.getNodesOnLevel(level); + while (nodesIterator.hasNext()) { + int node = nodesIterator.nextInt(); + hnswGraph.seek(level, node); + int nbr, lastNeighbor = -1, firstNeighbor = -1; + while ((nbr = hnswGraph.nextNeighbor()) != NO_MORE_DOCS) { + if (firstNeighbor == -1) { + firstNeighbor = nbr; + } + if (nbr < lastNeighbor) { + throw new CheckIndexException( + "Neighbors out of order for node " + + node + + ": " + + nbr + + "<" + + lastNeighbor + + " 1st=" + + firstNeighbor); + } else if (nbr == lastNeighbor) { + throw new CheckIndexException( + "There are repeated neighbors of node " + node + " with value " + nbr); + } + lastNeighbor = nbr; + } + status.hnswGraphSize++; + } + status.hsnwGraphNumLevels++; Review Comment: I could, but wanted to respect the object as a "status" - so if a check fails, `numLevels` will indicate on which level it failed. However we don't print the status if an exception is thrown. What do you think? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org