mayya-sharipova commented on code in PR #11781:
URL: https://github.com/apache/lucene/pull/11781#discussion_r974364476


##########
lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java:
##########
@@ -316,49 +316,49 @@ private boolean isDiverse(BytesRef candidate, 
NeighborArray neighbors, float sco
    */
   private int findWorstNonDiverse(NeighborArray neighbors) throws IOException {
     for (int i = neighbors.size() - 1; i > 0; i--) {
-      if (isWorstNonDiverse(i, neighbors, neighbors.score[i])) {
+      if (isWorstNonDiverse(i, neighbors)) {
         return i;
       }
     }
     return neighbors.size() - 1;
   }
 
-  private boolean isWorstNonDiverse(
-      int candidate, NeighborArray neighbors, float minAcceptedSimilarity) 
throws IOException {
+  private boolean isWorstNonDiverse(int candidateIndex, NeighborArray 
neighbors)
+      throws IOException {
+    int candidateNode = neighbors.node[candidateIndex];
     return switch (vectorEncoding) {
-      case BYTE -> isWorstNonDiverse(
-          candidate, vectors.binaryValue(candidate), neighbors, 
minAcceptedSimilarity);
+      case BYTE -> isWorstNonDiverse(candidateIndex, 
vectors.binaryValue(candidateNode), neighbors);
       case FLOAT32 -> isWorstNonDiverse(
-          candidate, vectors.vectorValue(candidate), neighbors, 
minAcceptedSimilarity);
+          candidateIndex, vectors.vectorValue(candidateNode), neighbors);
     };
   }
 
   private boolean isWorstNonDiverse(
-      int candidateIndex, float[] candidate, NeighborArray neighbors, float 
minAcceptedSimilarity)
-      throws IOException {
-    for (int i = candidateIndex - 1; i > -0; i--) {
+      int candidateIndex, float[] candidateVector, NeighborArray neighbors) 
throws IOException {
+    float minAcceptedSimilarity = neighbors.score[candidateIndex];
+    for (int i = candidateIndex - 1; i >= 0; i--) {
       float neighborSimilarity =
-          similarityFunction.compare(candidate, 
vectorsCopy.vectorValue(neighbors.node[i]));
-      // node i is too similar to node j given its score relative to the base 
node
+          similarityFunction.compare(candidateVector, 
vectorsCopy.vectorValue(neighbors.node[i]));
+      // candidate node is too similar to node i given its score relative to 
the base node
       if (neighborSimilarity >= minAcceptedSimilarity) {
-        return false;
+        return true;
       }
     }
-    return true;
+    return false;
   }
 
   private boolean isWorstNonDiverse(
-      int candidateIndex, BytesRef candidate, NeighborArray neighbors, float 
minAcceptedSimilarity)
-      throws IOException {
-    for (int i = candidateIndex - 1; i > -0; i--) {
+      int candidateIndex, BytesRef candidateVector, NeighborArray neighbors) 
throws IOException {

Review Comment:
   I am surprised that with this big change, we had only a small reduction in 
recall. I guess the reason could be that in our tests diversity check was 
really relevant only for small number of nodes; in majority of cases the 
algorithm just eliminated the most distant node.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to