mayya-sharipova commented on a change in pull request #267: URL: https://github.com/apache/lucene/pull/267#discussion_r700574140
########## File path: lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java ########## @@ -107,32 +113,82 @@ public static NeighborQueue search( Random random) throws IOException { int size = graphValues.size(); + int boundedNumSeed = Math.min(numSeed, 2 * size); + NeighborQueue results; + + if (graphValues.maxLevel() == 0) { + // search in SNW; generate a number of entry points randomly + final int[] eps = new int[boundedNumSeed]; + for (int i = 0; i < boundedNumSeed; i++) { + eps[i] = random.nextInt(size); + } + return searchLevel(query, topK, 0, eps, vectors, similarityFunction, graphValues, acceptOrds); + } else { + // search in hierarchical SNW + int[] eps = new int[] {graphValues.entryNode()}; + for (int level = graphValues.maxLevel(); level >= 1; level--) { + results = + HnswGraph.searchLevel( + query, 1, level, eps, vectors, similarityFunction, graphValues, acceptOrds); + eps = new int[] {results.pop()}; + } + results = + HnswGraph.searchLevel( + query, boundedNumSeed, 0, eps, vectors, similarityFunction, graphValues, acceptOrds); + while (results.size() > topK) { + results.pop(); + } + return results; + } + } + /** + * Searches for the nearest neighbors of a query vector in a given level + * + * @param query search query vector + * @param topK the number of nearest to query results to return + * @param level level to search + * @param eps the entry points for search at this level + * @param vectors vector values + * @param similarityFunction similarity function + * @param graphValues the graph values + * @param acceptOrds {@link Bits} that represents the allowed document ordinals to match, or + * {@code null} if they are all allowed to match. + * @return a priority queue holding the closest neighbors found + */ + static NeighborQueue searchLevel( + float[] query, + int topK, + int level, + final int[] eps, + RandomAccessVectorValues vectors, + VectorSimilarityFunction similarityFunction, + KnnGraphValues graphValues, + Bits acceptOrds) + throws IOException { + + int size = graphValues.size(); + int queueSize = Math.max(eps.length, topK); // MIN heap, holding the top results - NeighborQueue results = new NeighborQueue(numSeed, similarityFunction.reversed); + NeighborQueue results = new NeighborQueue(queueSize, similarityFunction.reversed); // MAX heap, from which to pull the candidate nodes - NeighborQueue candidates = new NeighborQueue(numSeed, !similarityFunction.reversed); - + NeighborQueue candidates = new NeighborQueue(queueSize, !similarityFunction.reversed); // set of ordinals that have been visited by search on this layer, used to avoid backtracking SparseFixedBitSet visited = new SparseFixedBitSet(size); - // get initial candidates at random - int boundedNumSeed = Math.min(numSeed, 2 * size); - for (int i = 0; i < boundedNumSeed; i++) { - int entryPoint = random.nextInt(size); - if (visited.get(entryPoint) == false) { - visited.set(entryPoint); - // explore the topK starting points of some random numSeed probes - float score = similarityFunction.compare(query, vectors.vectorValue(entryPoint)); - candidates.add(entryPoint, score); - if (acceptOrds == null || acceptOrds.get(entryPoint)) { - results.add(entryPoint, score); + + for (int i = 0; i < eps.length; i++) { Review comment: Good comment, addressed in 6a05951772cc72a1530f3fd863d906dc0e3bef88 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org