msokolov commented on code in PR #11781:
URL: https://github.com/apache/lucene/pull/11781#discussion_r974411586


##########
lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java:
##########
@@ -316,49 +316,49 @@ private boolean isDiverse(BytesRef candidate, 
NeighborArray neighbors, float sco
    */
   private int findWorstNonDiverse(NeighborArray neighbors) throws IOException {
     for (int i = neighbors.size() - 1; i > 0; i--) {
-      if (isWorstNonDiverse(i, neighbors, neighbors.score[i])) {
+      if (isWorstNonDiverse(i, neighbors)) {
         return i;
       }
     }
     return neighbors.size() - 1;
   }
 
-  private boolean isWorstNonDiverse(
-      int candidate, NeighborArray neighbors, float minAcceptedSimilarity) 
throws IOException {
+  private boolean isWorstNonDiverse(int candidateIndex, NeighborArray 
neighbors)
+      throws IOException {
+    int candidateNode = neighbors.node[candidateIndex];
     return switch (vectorEncoding) {
-      case BYTE -> isWorstNonDiverse(
-          candidate, vectors.binaryValue(candidate), neighbors, 
minAcceptedSimilarity);
+      case BYTE -> isWorstNonDiverse(candidateIndex, 
vectors.binaryValue(candidateNode), neighbors);
       case FLOAT32 -> isWorstNonDiverse(
-          candidate, vectors.vectorValue(candidate), neighbors, 
minAcceptedSimilarity);
+          candidateIndex, vectors.vectorValue(candidateNode), neighbors);
     };
   }
 
   private boolean isWorstNonDiverse(
-      int candidateIndex, float[] candidate, NeighborArray neighbors, float 
minAcceptedSimilarity)
-      throws IOException {
-    for (int i = candidateIndex - 1; i > -0; i--) {
+      int candidateIndex, float[] candidateVector, NeighborArray neighbors) 
throws IOException {
+    float minAcceptedSimilarity = neighbors.score[candidateIndex];
+    for (int i = candidateIndex - 1; i >= 0; i--) {
       float neighborSimilarity =
-          similarityFunction.compare(candidate, 
vectorsCopy.vectorValue(neighbors.node[i]));
-      // node i is too similar to node j given its score relative to the base 
node
+          similarityFunction.compare(candidateVector, 
vectorsCopy.vectorValue(neighbors.node[i]));
+      // candidate node is too similar to node i given its score relative to 
the base node
       if (neighborSimilarity >= minAcceptedSimilarity) {
-        return false;
+        return true;
       }
     }
-    return true;
+    return false;
   }
 
   private boolean isWorstNonDiverse(
-      int candidateIndex, BytesRef candidate, NeighborArray neighbors, float 
minAcceptedSimilarity)
-      throws IOException {
-    for (int i = candidateIndex - 1; i > -0; i--) {
+      int candidateIndex, BytesRef candidateVector, NeighborArray neighbors) 
throws IOException {

Review Comment:
   I know - how did this garbage even work at all! :frowning_face: It's kind of 
astonishing how insensitive this whole process is to the diversity checking. 
Initially we didn't have it at all though (just always pick the closest 
neighbors), and things still kind of work. Then I had the wonky implementation 
that did not sort the neighbors while indexing, but did some best effort kind 
of thing, and still it mostly worked. So we need good tests here to ensure we 
are doing the right thing! Because bugs here can lead to small degradation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to