mayya-sharipova commented on a change in pull request #315: URL: https://github.com/apache/lucene/pull/315#discussion_r717053652
########## File path: lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java ########## @@ -77,13 +80,23 @@ Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION, Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME, checksumRef); - vectorIndex = + graphIndex = openDataInput( state, versionMeta, - Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION, - Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME, + Lucene90HnswVectorsFormat.GRAPH_INDEX_EXTENSION, + Lucene90HnswVectorsFormat.GRAPH_INDEX_CODEC_NAME, checksumRef); + graphData = + openDataInput( + state, + versionMeta, + Lucene90HnswVectorsFormat.GRAPH_DATA_EXTENSION, + Lucene90HnswVectorsFormat.GRAPH_DATA_CODEC_NAME, + checksumRef); + // fill graph nodes and offsets by level. + // TODO: should we do this on the first field access? Review comment: Addressed in 7e60c4d009652fcc6278bdb7f4a02982eb667900. Defer loading of graph nodes and offsets to the first use ########## File path: lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java ########## @@ -77,13 +80,23 @@ Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION, Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME, checksumRef); - vectorIndex = + graphIndex = openDataInput( state, versionMeta, - Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION, - Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME, + Lucene90HnswVectorsFormat.GRAPH_INDEX_EXTENSION, + Lucene90HnswVectorsFormat.GRAPH_INDEX_CODEC_NAME, checksumRef); + graphData = + openDataInput( + state, + versionMeta, + Lucene90HnswVectorsFormat.GRAPH_DATA_EXTENSION, + Lucene90HnswVectorsFormat.GRAPH_DATA_CODEC_NAME, + checksumRef); + // fill graph nodes and offsets by level. + // TODO: should we do this on the first field access? Review comment: Addressed in 7e60c4d009652fcc6278bdb7f4a02982eb667900. Defered loading of graph nodes and offsets to the first use ########## File path: lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java ########## @@ -204,6 +217,41 @@ private FieldEntry readField(DataInput input) throws IOException { return new FieldEntry(input, similarityFunction); } + private void fillGraphNodesAndOffsetsByLevel() throws IOException { + for (FieldEntry entry : fields.values()) { + IndexInput input = + graphIndex.slice("graph-index", entry.graphIndexOffset, entry.graphIndexLength); + int numOfLevels = input.readInt(); + assert entry.numOfLevels == numOfLevels; + int[] numOfNodesByLevel = new int[numOfLevels]; + + // read nodes by level + for (int level = 0; level < numOfLevels; level++) { + numOfNodesByLevel[level] = input.readInt(); + if (level == 0) { + entry.nodesByLevel.add(null); + } else { + final int[] nodesOnLevel = new int[numOfNodesByLevel[level]]; + for (int i = 0; i < numOfNodesByLevel[level]; i++) { + nodesOnLevel[i] = input.readVInt(); + } + entry.nodesByLevel.add(nodesOnLevel); + } + } + + // read offsets by level + long offset = 0; + for (int level = 0; level < numOfLevels; level++) { + long[] ordOffsets = new long[numOfNodesByLevel[level]]; Review comment: There could not be a level without nodes. The reason why we add `nodesByLevel.add(null)` for level 0th, is that this level contains all nodes, so we don't need this info. I've added extra assertions and comments in 7e60c4d009652fcc6278bdb7f4a02982eb667900. Hopefully this clarifies things. ########## File path: lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java ########## @@ -478,12 +531,25 @@ private void readValue(int targetOrd) throws IOException { IndexedKnnGraphReader(FieldEntry entry, IndexInput dataIn) { this.entry = entry; this.dataIn = dataIn; + this.entryNode = + entry.numOfLevels == 1 ? 0 : entry.nodesByLevel.get(entry.numOfLevels - 1)[0]; } @Override public void seek(int level, int targetOrd) throws IOException { + long graphDataOffset; + if (level == 0) { + graphDataOffset = entry.ordOffsetsByLevel.get(0)[targetOrd]; + } else { + int targetIndex = + Arrays.binarySearch( + entry.nodesByLevel.get(level), 0, entry.nodesByLevel.get(level).length, targetOrd); + assert targetIndex >= 0; Review comment: Good notice, I've removed unnecessary assertion `assert targetIndex >= 0;` as in this case we will get AIOOBE ########## File path: lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java ########## @@ -505,13 +571,47 @@ public int nextNeighbor() throws IOException { } @Override - public int maxLevel() throws IOException { - return 0; + public int numOfLevels() throws IOException { + return entry.numOfLevels; } @Override public int entryNode() throws IOException { - return 0; + return entryNode; + } + + @Override + public DocIdSetIterator getAllNodesOnLevel(int level) { + return new DocIdSetIterator() { + int[] nodes = level == 0 ? null : entry.nodesByLevel.get(level); + int numOfNodes = level == 0 ? size() : nodes.length; + int idx = -1; + + @Override + public int docID() { + return level == 0 ? idx : nodes[idx]; Review comment: Addressed in 7e60c4d -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org