benwtrent commented on code in PR #12910: URL: https://github.com/apache/lucene/pull/12910#discussion_r1422799676
########## lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java: ########## @@ -201,9 +225,69 @@ private int descSortFindRightMostInsertionPoint(float newScore, int bound) { int end = bound - 1; while (start <= end) { int mid = (start + end) / 2; - if (score[mid] < newScore) end = mid - 1; + if (scores[mid] < newScore) end = mid - 1; else start = mid + 1; } return start; } + + /** + * Find first non-diverse neighbour among the list of neighbors starting from the most distant + * neighbours + */ + private int findWorstNonDiverse(int nodeOrd, RandomVectorScorerSupplier scorerSupplier) + throws IOException { + RandomVectorScorer scorer = scorerSupplier.scorer(nodeOrd); + int[] uncheckedIndexes = sort(scorer); + if (uncheckedIndexes == null) { + // all nodes are checked, we will directly return the most distant one + return size - 1; + } Review Comment: This still doesn't make intuitive sense to me. Though its just copying what was done already. "checked" here indicates if the nodes are sorted or not. But this implies sorting only ever occurs when we check for diversity and that the worst diverse neighbor is also the furtherest one. This doesn't seem correct to me. But, that is a discussion for a separate time. ########## lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java: ########## @@ -34,14 +34,14 @@ public class NeighborArray { private final boolean scoresDescOrder; private int size; - float[] score; - int[] node; + private float[] scores; + private int[] nodes; Review Comment: If we are disallowing array size growth, we need to make these `final`. ########## lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java: ########## @@ -51,45 +51,61 @@ public NeighborArray(int maxSize, boolean descOrder) { */ public void addInOrder(int newNode, float newScore) { assert size == sortedNodeSize : "cannot call addInOrder after addOutOfOrder"; - if (size == node.length) { - node = ArrayUtil.grow(node); - score = ArrayUtil.growExact(score, node.length); + if (size == nodes.length) { + throw new IllegalStateException("No growth is allowed"); } if (size > 0) { - float previousScore = score[size - 1]; + float previousScore = scores[size - 1]; assert ((scoresDescOrder && (previousScore >= newScore)) || (scoresDescOrder == false && (previousScore <= newScore))) : "Nodes are added in the incorrect order! Comparing " + newScore + " to " - + Arrays.toString(ArrayUtil.copyOfSubArray(score, 0, size)); + + Arrays.toString(ArrayUtil.copyOfSubArray(scores, 0, size)); } - node[size] = newNode; - score[size] = newScore; + nodes[size] = newNode; + scores[size] = newScore; ++size; ++sortedNodeSize; } /** Add node and newScore but do not insert as sorted */ public void addOutOfOrder(int newNode, float newScore) { - if (size == node.length) { - node = ArrayUtil.grow(node); - score = ArrayUtil.growExact(score, node.length); + if (size == nodes.length) { + throw new IllegalStateException("No growth is allowed"); } - score[size] = newScore; - node[size] = newNode; + scores[size] = newScore; + nodes[size] = newNode; size++; } + /** + * In addition to {@link #addOutOfOrder(int, float)}, this function will also remove the + * least-diverse node if the node array is full after insertion + * + * @param nodeId node Id of the owner of this NeighbourArray Review Comment: Maybe add something about how this isn't threadsafe? ########## lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java: ########## @@ -51,45 +51,61 @@ public NeighborArray(int maxSize, boolean descOrder) { */ public void addInOrder(int newNode, float newScore) { assert size == sortedNodeSize : "cannot call addInOrder after addOutOfOrder"; - if (size == node.length) { - node = ArrayUtil.grow(node); - score = ArrayUtil.growExact(score, node.length); + if (size == nodes.length) { + throw new IllegalStateException("No growth is allowed"); } if (size > 0) { - float previousScore = score[size - 1]; + float previousScore = scores[size - 1]; assert ((scoresDescOrder && (previousScore >= newScore)) || (scoresDescOrder == false && (previousScore <= newScore))) : "Nodes are added in the incorrect order! Comparing " + newScore + " to " - + Arrays.toString(ArrayUtil.copyOfSubArray(score, 0, size)); + + Arrays.toString(ArrayUtil.copyOfSubArray(scores, 0, size)); } - node[size] = newNode; - score[size] = newScore; + nodes[size] = newNode; + scores[size] = newScore; ++size; ++sortedNodeSize; } /** Add node and newScore but do not insert as sorted */ public void addOutOfOrder(int newNode, float newScore) { - if (size == node.length) { - node = ArrayUtil.grow(node); - score = ArrayUtil.growExact(score, node.length); + if (size == nodes.length) { + throw new IllegalStateException("No growth is allowed"); } - score[size] = newScore; - node[size] = newNode; + scores[size] = newScore; + nodes[size] = newNode; size++; } + /** + * In addition to {@link #addOutOfOrder(int, float)}, this function will also remove the + * least-diverse node if the node array is full after insertion + * + * @param nodeId node Id of the owner of this NeighbourArray + */ + public void addAndEnsureDiversity( Review Comment: package private? ########## lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java: ########## @@ -51,45 +51,61 @@ public NeighborArray(int maxSize, boolean descOrder) { */ public void addInOrder(int newNode, float newScore) { assert size == sortedNodeSize : "cannot call addInOrder after addOutOfOrder"; - if (size == node.length) { - node = ArrayUtil.grow(node); - score = ArrayUtil.growExact(score, node.length); + if (size == nodes.length) { + throw new IllegalStateException("No growth is allowed"); } if (size > 0) { - float previousScore = score[size - 1]; + float previousScore = scores[size - 1]; assert ((scoresDescOrder && (previousScore >= newScore)) || (scoresDescOrder == false && (previousScore <= newScore))) : "Nodes are added in the incorrect order! Comparing " + newScore + " to " - + Arrays.toString(ArrayUtil.copyOfSubArray(score, 0, size)); + + Arrays.toString(ArrayUtil.copyOfSubArray(scores, 0, size)); } - node[size] = newNode; - score[size] = newScore; + nodes[size] = newNode; + scores[size] = newScore; ++size; ++sortedNodeSize; } /** Add node and newScore but do not insert as sorted */ public void addOutOfOrder(int newNode, float newScore) { - if (size == node.length) { Review Comment: `addOutOfOrder` can be package private at least? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org