msokolov commented on a change in pull request #416: URL: https://github.com/apache/lucene/pull/416#discussion_r752476787
########## File path: lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java ########## @@ -56,31 +59,50 @@ public final class HnswGraph extends KnnGraphValues { private final int maxConn; + private int numLevels; // the current number of levels in the graph + private int entryNode; // the current graph entry node on the top level - // Each entry lists the top maxConn neighbors of a node. The nodes correspond to vectors added to - // HnswBuilder, and the - // node values are the ordinals of those vectors. - private final List<NeighborArray> graph; + // Nodes by level expressed as the level 0's nodes' ordinals. + // As level 0 contains all nodes, nodesByLevel.get(0) is null. + private final List<int[]> nodesByLevel; + + // graph is a list of graph levels. + // Each level is represented as List<NeighborArray> – nodes' connections on this level. + // Each entry in the list has the top maxConn neighbors of a node. The nodes correspond to vectors + // added to HnswBuilder, and the node values are the ordinals of those vectors. + // Thus, on all levels, neighbors expressed as the level 0's nodes' ordinals. + private final List<List<NeighborArray>> graph; // KnnGraphValues iterator members private int upto; private NeighborArray cur; - HnswGraph(int maxConn) { - graph = new ArrayList<>(); - // Typically with diversity criteria we see nodes not fully occupied; average fanout seems to be - // about 1/2 maxConn. There is some indexing time penalty for under-allocating, but saves RAM - graph.add(new NeighborArray(Math.max(32, maxConn / 4))); + HnswGraph(int maxConn, int levelOfFirstNode) { this.maxConn = maxConn; + this.numLevels = levelOfFirstNode + 1; + this.graph = new ArrayList<>(numLevels); + this.entryNode = 0; + for (int i = 0; i < numLevels; i++) { + graph.add(new ArrayList<>()); + // Typically with diversity criteria we see nodes not fully occupied; + // average fanout seems to be about 1/2 maxConn. + // There is some indexing time penalty for under-allocating, but saves RAM + graph.get(i).add(new NeighborArray(Math.max(32, maxConn / 4))); Review comment: heh, this makes no sense at all! Git-spelunking, it looks like I added this fancy dancing when adding the diversity criteria, but overlooked `addNode` - which does the majority of the allocation! It could be worth taking a lok to see if we can cheaply save some RAM -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org