Pulkitg64 commented on code in PR #15003:
URL: https://github.com/apache/lucene/pull/15003#discussion_r2251081176


##########
lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java:
##########
@@ -347,12 +350,195 @@ private void reconstructAndWriteNeighbours(
     }
   }
 
+  /**
+   * Processes an off-heap HNSW graph, removing deleted nodes and writing the 
graph structure to the
+   * vector index.
+   *
+   * @param graph The off-heap graph to process
+   * @param docMap Mapping from old document IDs to new document IDs
+   * @param graphSize The size of the graph (number of vectors)
+   * @param offsets Array to store the byte offsets for each node at each level
+   * @return A mock HnswGraph implementation that provides access to the graph 
structure
+   * @throws IOException If an error occurs while writing to the vector index
+   */
+  private HnswGraph deleteNodesWriteGraph(
+      Lucene99HnswVectorsReader.OffHeapHnswGraph graph,
+      MergeState.DocMap docMap,
+      int graphSize,
+      int[][] offsets)
+      throws IOException {
+    if (graph == null) return null;
+
+    int[] scratch = new int[graph.maxConn() * 2];
+    final int numLevels = graph.numLevels();
+    final int[][] validNodesPerLevel = new int[numLevels][];
+
+    // Process all levels
+    for (int level = 0; level < numLevels; level++) {
+      int[] sortedNodes = 
NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
+
+      // Count and collect valid nodes
+      int validNodeCount = 0;
+      for (int node : sortedNodes) {
+        if (docMap.get(node) != -1) {
+          validNodeCount++;
+        }
+      }
+
+      // Special case for top level with no valid nodes
+      if (level == numLevels - 1 && validNodeCount == 0 && level > 0) {
+        validNodeCount = 1; // We'll create one connection to lower level
+      }
+
+      validNodesPerLevel[level] = new int[validNodeCount];
+      offsets[level] = new int[validNodeCount];
+
+      int validNodeIndex = 0;
+      int nodeOffsetId = 0;
+
+      // Process nodes at this level
+      for (int node : sortedNodes) {
+        if (docMap.get(node) == -1) {
+          continue;
+        }
+
+        // Store mapped node ID
+        int mappedNode = docMap.get(node);
+        validNodesPerLevel[level][validNodeIndex++] = mappedNode;
+
+        // Process neighbors
+        graph.seek(level, node);
+        long offsetStart = vectorIndex.getFilePointer();
+
+        // Process and write neighbors with delta encoding
+        writeNeighbors(graph, docMap, scratch, level == 0 ? graphSize : 0);
+
+        offsets[level][nodeOffsetId++] =
+            Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
+      }
+
+      // Special case for empty top level
+      if (level == numLevels - 1 && validNodeIndex == 0 && level > 0) {
+        int entryNode = validNodesPerLevel[level - 1][0];
+        validNodesPerLevel[level][0] = entryNode;
+
+        long offsetStart = vectorIndex.getFilePointer();
+        vectorIndex.writeVInt(1);
+        vectorIndex.writeVInt(entryNode);
+        offsets[level][0] = Math.toIntExact(vectorIndex.getFilePointer() - 
offsetStart);
+      }
+    }
+
+    return new HnswGraph() {
+      @Override
+      public int nextNeighbor() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void seek(int level, int target) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int size() {
+        return graphSize;
+      }
+
+      @Override
+      public int numLevels() throws IOException {
+        return numLevels;
+      }
+
+      @Override
+      public int maxConn() {
+        return graph.maxConn();
+      }
+
+      @Override
+      public int entryNode() {
+        return validNodesPerLevel[0][0];
+      }
+
+      @Override
+      public int neighborCount() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public NodesIterator getNodesOnLevel(int level) {
+        return new ArrayNodesIterator(validNodesPerLevel[level], 
validNodesPerLevel[level].length);
+      }
+    };
+  }
+
+  /** Writes neighbors with delta encoding to the vector index. */
+  private void writeNeighbors(
+      Lucene99HnswVectorsReader.OffHeapHnswGraph graph,
+      MergeState.DocMap docMap,
+      int[] scratch,
+      int maxNode)
+      throws IOException {
+    int lastNode = -1;
+    int actualSize = 0;
+
+    int neighborLength = graph.neighborCount();
+    for (int i = 0; i < neighborLength; i++) {
+      int curNode = docMap.get(graph.nextNeighbor());
+      if (curNode == -1 || curNode == lastNode || (maxNode > 0 && curNode >= 
maxNode)) {
+        continue;
+      }
+
+      scratch[actualSize++] = lastNode == -1 ? curNode : curNode - lastNode;
+      lastNode = curNode;
+    }
+
+    vectorIndex.writeVInt(actualSize);
+    for (int i = 0; i < actualSize; i++) {
+      vectorIndex.writeVInt(scratch[i]);
+    }
+  }
+
   @Override
   public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws 
IOException {
     CloseableRandomVectorScorerSupplier scorerSupplier =
         flatVectorWriter.mergeOneFieldToIndex(fieldInfo, mergeState);
     try {
       long vectorIndexOffset = vectorIndex.getFilePointer();
+
+      if (mergeState.liveDocs.length == 1

Review Comment:
   Just saw that class. I think this is a good idea. Will do it in next 
revision.



##########
lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java:
##########
@@ -347,12 +350,195 @@ private void reconstructAndWriteNeighbours(
     }
   }
 
+  /**
+   * Processes an off-heap HNSW graph, removing deleted nodes and writing the 
graph structure to the
+   * vector index.
+   *
+   * @param graph The off-heap graph to process
+   * @param docMap Mapping from old document IDs to new document IDs
+   * @param graphSize The size of the graph (number of vectors)
+   * @param offsets Array to store the byte offsets for each node at each level
+   * @return A mock HnswGraph implementation that provides access to the graph 
structure
+   * @throws IOException If an error occurs while writing to the vector index
+   */
+  private HnswGraph deleteNodesWriteGraph(
+      Lucene99HnswVectorsReader.OffHeapHnswGraph graph,
+      MergeState.DocMap docMap,
+      int graphSize,
+      int[][] offsets)
+      throws IOException {
+    if (graph == null) return null;
+
+    int[] scratch = new int[graph.maxConn() * 2];
+    final int numLevels = graph.numLevels();
+    final int[][] validNodesPerLevel = new int[numLevels][];
+
+    // Process all levels
+    for (int level = 0; level < numLevels; level++) {
+      int[] sortedNodes = 
NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
+
+      // Count and collect valid nodes
+      int validNodeCount = 0;
+      for (int node : sortedNodes) {
+        if (docMap.get(node) != -1) {
+          validNodeCount++;
+        }
+      }
+
+      // Special case for top level with no valid nodes
+      if (level == numLevels - 1 && validNodeCount == 0 && level > 0) {
+        validNodeCount = 1; // We'll create one connection to lower level
+      }
+
+      validNodesPerLevel[level] = new int[validNodeCount];
+      offsets[level] = new int[validNodeCount];
+
+      int validNodeIndex = 0;
+      int nodeOffsetId = 0;
+
+      // Process nodes at this level
+      for (int node : sortedNodes) {
+        if (docMap.get(node) == -1) {
+          continue;
+        }
+
+        // Store mapped node ID
+        int mappedNode = docMap.get(node);
+        validNodesPerLevel[level][validNodeIndex++] = mappedNode;
+
+        // Process neighbors
+        graph.seek(level, node);
+        long offsetStart = vectorIndex.getFilePointer();
+
+        // Process and write neighbors with delta encoding
+        writeNeighbors(graph, docMap, scratch, level == 0 ? graphSize : 0);
+
+        offsets[level][nodeOffsetId++] =
+            Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
+      }
+
+      // Special case for empty top level
+      if (level == numLevels - 1 && validNodeIndex == 0 && level > 0) {
+        int entryNode = validNodesPerLevel[level - 1][0];
+        validNodesPerLevel[level][0] = entryNode;
+
+        long offsetStart = vectorIndex.getFilePointer();
+        vectorIndex.writeVInt(1);
+        vectorIndex.writeVInt(entryNode);
+        offsets[level][0] = Math.toIntExact(vectorIndex.getFilePointer() - 
offsetStart);
+      }
+    }
+
+    return new HnswGraph() {
+      @Override
+      public int nextNeighbor() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void seek(int level, int target) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int size() {
+        return graphSize;
+      }
+
+      @Override
+      public int numLevels() throws IOException {
+        return numLevels;
+      }
+
+      @Override
+      public int maxConn() {
+        return graph.maxConn();
+      }
+
+      @Override
+      public int entryNode() {
+        return validNodesPerLevel[0][0];
+      }
+
+      @Override
+      public int neighborCount() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public NodesIterator getNodesOnLevel(int level) {
+        return new ArrayNodesIterator(validNodesPerLevel[level], 
validNodesPerLevel[level].length);
+      }
+    };
+  }
+
+  /** Writes neighbors with delta encoding to the vector index. */
+  private void writeNeighbors(
+      Lucene99HnswVectorsReader.OffHeapHnswGraph graph,

Review Comment:
   Sure.



##########
lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java:
##########
@@ -347,12 +350,195 @@ private void reconstructAndWriteNeighbours(
     }
   }
 
+  /**
+   * Processes an off-heap HNSW graph, removing deleted nodes and writing the 
graph structure to the
+   * vector index.
+   *
+   * @param graph The off-heap graph to process
+   * @param docMap Mapping from old document IDs to new document IDs
+   * @param graphSize The size of the graph (number of vectors)
+   * @param offsets Array to store the byte offsets for each node at each level
+   * @return A mock HnswGraph implementation that provides access to the graph 
structure
+   * @throws IOException If an error occurs while writing to the vector index
+   */
+  private HnswGraph deleteNodesWriteGraph(
+      Lucene99HnswVectorsReader.OffHeapHnswGraph graph,
+      MergeState.DocMap docMap,
+      int graphSize,
+      int[][] offsets)
+      throws IOException {
+    if (graph == null) return null;
+
+    int[] scratch = new int[graph.maxConn() * 2];
+    final int numLevels = graph.numLevels();
+    final int[][] validNodesPerLevel = new int[numLevels][];
+
+    // Process all levels
+    for (int level = 0; level < numLevels; level++) {
+      int[] sortedNodes = 
NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
+
+      // Count and collect valid nodes
+      int validNodeCount = 0;
+      for (int node : sortedNodes) {
+        if (docMap.get(node) != -1) {
+          validNodeCount++;
+        }
+      }
+
+      // Special case for top level with no valid nodes
+      if (level == numLevels - 1 && validNodeCount == 0 && level > 0) {
+        validNodeCount = 1; // We'll create one connection to lower level
+      }
+
+      validNodesPerLevel[level] = new int[validNodeCount];

Review Comment:
   Sure



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to