[GitHub] [lucene] jpountz commented on a change in pull request #7: LUCENE-9820: Separate logic for reading the BKD index from logic to intersecting it

GitBox Thu, 15 Apr 2021 09:36:11 -0700


jpountz commented on a change in pull request #7:
URL: https://github.com/apache/lucene/pull/7#discussion_r614222974




##########
File path: lucene/core/src/java/org/apache/lucene/util/bkd/BKDDefaultReader.java
##########
@@ -0,0 +1,899 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.bkd;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Arrays;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MathUtil;
+
+/**
+ * Handles reading a block KD-tree previously written with {@link BKDWriter}.
+ *
+ * @lucene.experimental
+ */
+public class BKDDefaultReader implements BKDReader {
+
+  final BKDConfig config;
+  final int numLeaves;
+  // Packed array of byte[] holding all docs and values:
+  final IndexInput in;
+  final byte[] minPackedValue;
+  final byte[] maxPackedValue;
+  final long pointCount;
+  final int docCount;
+  final int version;
+  final long minLeafBlockFP;
+  // Packed array of byte[] holding all split values in the full binary tree:
+  private final IndexInput packedIndex;
+
+  /**
+   * Caller must pre-seek the provided {@link IndexInput} to the index 
location that {@link
+   * BKDWriter#finish} returned. BKD tree is always stored off-heap.
+   */
+  public BKDDefaultReader(IndexInput metaIn, IndexInput indexIn, IndexInput 
dataIn)
+      throws IOException {
+    version =
+        CodecUtil.checkHeader(
+            metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, 
BKDWriter.VERSION_CURRENT);
+    final int numDims = metaIn.readVInt();
+    final int numIndexDims;
+    if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
+      numIndexDims = metaIn.readVInt();
+    } else {
+      numIndexDims = numDims;
+    }
+    final int maxPointsInLeafNode = metaIn.readVInt();
+    final int bytesPerDim = metaIn.readVInt();
+    config = new BKDConfig(numDims, numIndexDims, bytesPerDim, 
maxPointsInLeafNode);
+
+    // Read index:
+    numLeaves = metaIn.readVInt();
+    assert numLeaves > 0;
+
+    minPackedValue = new byte[config.packedIndexBytesLength];
+    maxPackedValue = new byte[config.packedIndexBytesLength];
+
+    metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength);
+    metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength);
+
+    for (int dim = 0; dim < config.numIndexDims; dim++) {
+      if (Arrays.compareUnsigned(
+              minPackedValue,
+              dim * config.bytesPerDim,
+              dim * config.bytesPerDim + config.bytesPerDim,
+              maxPackedValue,
+              dim * config.bytesPerDim,
+              dim * config.bytesPerDim + config.bytesPerDim)
+          > 0) {
+        throw new CorruptIndexException(
+            "minPackedValue "
+                + new BytesRef(minPackedValue)
+                + " is > maxPackedValue "
+                + new BytesRef(maxPackedValue)
+                + " for dim="
+                + dim,
+            metaIn);
+      }
+    }
+
+    pointCount = metaIn.readVLong();
+    docCount = metaIn.readVInt();
+
+    int numIndexBytes = metaIn.readVInt();
+    long indexStartPointer;
+    if (version >= BKDWriter.VERSION_META_FILE) {
+      minLeafBlockFP = metaIn.readLong();
+      indexStartPointer = metaIn.readLong();
+    } else {
+      indexStartPointer = indexIn.getFilePointer();
+      minLeafBlockFP = indexIn.readVLong();
+      indexIn.seek(indexStartPointer);
+    }
+    this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, 
numIndexBytes);
+    this.in = dataIn;
+  }
+
+  @Override
+  public BKDConfig getConfig() {
+    return config;
+  }
+
+  @Override
+  public byte[] getMinPackedValue() {
+    return minPackedValue.clone();
+  }
+
+  @Override
+  public byte[] getMaxPackedValue() {
+    return maxPackedValue.clone();
+  }
+
+  @Override
+  public long getPointCount() {
+    return pointCount;
+  }
+
+  @Override
+  public int getDocCount() {
+    return docCount;
+  }
+
+  @Override
+  public BKDReader.IndexTree getIndexTree() {
+    return new IndexTree(
+        packedIndex.clone(),
+        this.in.clone(),
+        config,
+        numLeaves,
+        version,
+        minPackedValue,
+        maxPackedValue);
+  }
+
+  private static class IndexTree implements BKDReader.IndexTree {
+    private int nodeID;
+    // during clone, the node root can be different to 1
+    private final int nodeRoot;
+    // level is 1-based so that we can do level-1 w/o checking each time:
+    private int level;
+    // used to read the packed tree off-heap
+    private final IndexInput innerNodes;
+    // used to read the packed leaves off-heap
+    private final IndexInput leafNodes;
+    // holds the minimum (left most) leaf block file pointer for each level 
we've recursed to:
+    private final long[] leafBlockFPStack;
+    // holds the address, in the off-heap index, of the right-node of each 
level:
+    private final int[] rightNodePositions;
+    // holds the splitDim for each level:
+    private final int[] splitDims;
+    // true if the per-dim delta we read for the node at this level is a 
negative offset vs. the
+    // last split on this dim; this is a packed
+    // 2D array, i.e. to access array[level][dim] you read from 
negativeDeltas[level*numDims+dim].
+    // this will be true if the last time we
+    // split on this dimension, we next pushed to the left sub-tree:
+    private final boolean[] negativeDeltas;
+    // holds the packed per-level split values
+    private final byte[][] splitValuesStack;
+    // holds the min / max value of the current node.
+    private final byte[] minPackedValue, maxPackedValue;
+    // holds the previous value of the split dimension
+    private final byte[][] splitDimValueStack;
+    // tree parameters
+    private final BKDConfig config;
+    // number of leaves
+    private final int leafNodeOffset;
+    // version of the index
+    private final int version;
+    // helper objects for reading doc values
+    private final byte[] scratchDataPackedValue,
+        scratchMinIndexPackedValue,
+        scratchMaxIndexPackedValue;
+    private final int[] commonPrefixLengths;
+    private final BKDReaderDocIDSetIterator scratchIterator;
+
+    private IndexTree(
+        IndexInput innerNodes,
+        IndexInput leafNodes,
+        BKDConfig config,
+        int numLeaves,
+        int version,
+        byte[] minPackedValue,
+        byte[] maxPackedValue) {
+      this(
+          innerNodes,
+          leafNodes,
+          config,
+          numLeaves,
+          version,
+          1,
+          1,
+          minPackedValue,
+          maxPackedValue,
+          new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode),
+          new byte[config.packedBytesLength],
+          new byte[config.packedIndexBytesLength],
+          new byte[config.packedIndexBytesLength],
+          new int[config.numDims]);
+      // read root node
+      readNodeData(false);
+    }
+
+    private IndexTree(
+        IndexInput innerNodes,
+        IndexInput leafNodes,
+        BKDConfig config,
+        int numLeaves,
+        int version,
+        int nodeID,
+        int level,
+        byte[] minPackedValue,
+        byte[] maxPackedValue,
+        BKDReaderDocIDSetIterator scratchIterator,
+        byte[] scratchDataPackedValue,
+        byte[] scratchMinIndexPackedValue,
+        byte[] scratchMaxIndexPackedValue,
+        int[] commonPrefixLengths) {
+      this.config = config;
+      this.version = version;
+      this.nodeID = nodeID;
+      this.nodeRoot = nodeID;
+      this.level = level;
+      leafNodeOffset = numLeaves;
+      this.innerNodes = innerNodes;
+      this.leafNodes = leafNodes;
+      this.minPackedValue = minPackedValue.clone();
+      this.maxPackedValue = maxPackedValue.clone();
+      // stack arrays that keep information at different levels
+      int treeDepth = getTreeDepth(numLeaves);
+      splitDimValueStack = new byte[treeDepth + 1][];
+      splitValuesStack = new byte[treeDepth + 1][];
+      splitValuesStack[0] = new byte[config.packedIndexBytesLength];
+      leafBlockFPStack = new long[treeDepth + 1];
+      rightNodePositions = new int[treeDepth + 1];
+      splitDims = new int[treeDepth + 1];
+      negativeDeltas = new boolean[config.numIndexDims * (treeDepth + 1)];
+      // scratch objects, reused between clones
+      this.scratchIterator = scratchIterator;
+      this.commonPrefixLengths = commonPrefixLengths;
+      this.scratchDataPackedValue = scratchDataPackedValue;
+      this.scratchMinIndexPackedValue = scratchMinIndexPackedValue;
+      this.scratchMaxIndexPackedValue = scratchMaxIndexPackedValue;
+    }
+
+    @Override
+    public BKDReader.IndexTree clone() {
+      BKDDefaultReader.IndexTree index =
+          new BKDDefaultReader.IndexTree(
+              innerNodes.clone(),
+              leafNodes.clone(),
+              config,
+              leafNodeOffset,
+              version,
+              nodeID,
+              level,
+              minPackedValue,
+              maxPackedValue,
+              scratchIterator,
+              scratchDataPackedValue,
+              scratchMinIndexPackedValue,
+              scratchMaxIndexPackedValue,
+              commonPrefixLengths);
+      index.leafBlockFPStack[index.level] = leafBlockFPStack[level];
+      if (isLeafNode() == false) {
+        // copy node data
+        index.rightNodePositions[index.level] = rightNodePositions[level];
+        index.splitValuesStack[index.level] = splitValuesStack[level].clone();
+        System.arraycopy(
+            negativeDeltas,
+            level * config.numIndexDims,
+            index.negativeDeltas,
+            level * config.numIndexDims,
+            config.numIndexDims);
+        index.splitDims[level] = splitDims[level];
+      }
+      return index;
+    }
+
+    @Override
+    public byte[] getMinPackedValue() {
+      return minPackedValue;
+    }
+
+    @Override
+    public byte[] getMaxPackedValue() {
+      return maxPackedValue;
+    }
+
+    @Override
+    public boolean moveToChild() {
+      if (isLeafNode()) {
+        return false;
+      }
+      pushLeft();
+      return true;
+    }
+
+    private void pushLeft() {
+      final int splitDimPos = splitDims[level] * config.bytesPerDim;
+      if (splitDimValueStack[level] == null) {
+        splitDimValueStack[level] = new byte[config.bytesPerDim];
+      }
+      // save the dimension we are going to change
+      System.arraycopy(
+          maxPackedValue, splitDimPos, splitDimValueStack[level], 0, 
config.bytesPerDim);
+      assert Arrays.compareUnsigned(
+                  maxPackedValue,
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim,
+                  splitValuesStack[level],
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim)
+              >= 0
+          : "config.bytesPerDim="
+              + config.bytesPerDim
+              + " splitDim="
+              + splitDims[level]
+              + " config.numIndexDims="
+              + config.numIndexDims
+              + " config.numDims="
+              + config.numDims;
+      nodeID *= 2;
+      level++;
+      // add the split dim value:
+      System.arraycopy(
+          splitValuesStack[level - 1],
+          splitDimPos,
+          maxPackedValue,
+          splitDimPos,
+          config.bytesPerDim);
+      readNodeData(true);
+    }
+
+    private void pushRight() {
+      final int splitDimPos = splitDims[level] * config.bytesPerDim;
+      // we should have already visit the left node
+      assert splitDimValueStack[level] != null;
+      // same the dimension we are going to change
+      System.arraycopy(
+          minPackedValue, splitDimPos, splitDimValueStack[level], 0, 
config.bytesPerDim);
+      assert Arrays.compareUnsigned(
+                  minPackedValue,
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim,
+                  splitValuesStack[level],
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim)
+              <= 0
+          : "config.bytesPerDim="
+              + config.bytesPerDim
+              + " splitDim="
+              + splitDims[level]
+              + " config.numIndexDims="
+              + config.numIndexDims
+              + " config.numDims="
+              + config.numDims;
+      final int nodePosition = rightNodePositions[level];
+      assert nodePosition >= innerNodes.getFilePointer()
+          : "nodePosition = " + nodePosition + " < currentPosition=" + 
innerNodes.getFilePointer();
+      try {
+        innerNodes.seek(nodePosition);
+      } catch (IOException e) {
+        throw new UncheckedIOException(e);
+      }

Review comment:
       should we make all these methods throw IOException and remove catch 
blocks?

##########
File path: lucene/core/src/java/org/apache/lucene/util/bkd/BKDDefaultReader.java
##########
@@ -0,0 +1,899 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.bkd;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Arrays;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MathUtil;
+
+/**
+ * Handles reading a block KD-tree previously written with {@link BKDWriter}.
+ *
+ * @lucene.experimental
+ */
+public class BKDDefaultReader implements BKDReader {
+
+  final BKDConfig config;
+  final int numLeaves;
+  // Packed array of byte[] holding all docs and values:
+  final IndexInput in;
+  final byte[] minPackedValue;
+  final byte[] maxPackedValue;
+  final long pointCount;
+  final int docCount;
+  final int version;
+  final long minLeafBlockFP;
+  // Packed array of byte[] holding all split values in the full binary tree:
+  private final IndexInput packedIndex;
+
+  /**
+   * Caller must pre-seek the provided {@link IndexInput} to the index 
location that {@link
+   * BKDWriter#finish} returned. BKD tree is always stored off-heap.
+   */
+  public BKDDefaultReader(IndexInput metaIn, IndexInput indexIn, IndexInput 
dataIn)
+      throws IOException {
+    version =
+        CodecUtil.checkHeader(
+            metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, 
BKDWriter.VERSION_CURRENT);
+    final int numDims = metaIn.readVInt();
+    final int numIndexDims;
+    if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
+      numIndexDims = metaIn.readVInt();
+    } else {
+      numIndexDims = numDims;
+    }
+    final int maxPointsInLeafNode = metaIn.readVInt();
+    final int bytesPerDim = metaIn.readVInt();
+    config = new BKDConfig(numDims, numIndexDims, bytesPerDim, 
maxPointsInLeafNode);
+
+    // Read index:
+    numLeaves = metaIn.readVInt();
+    assert numLeaves > 0;
+
+    minPackedValue = new byte[config.packedIndexBytesLength];
+    maxPackedValue = new byte[config.packedIndexBytesLength];
+
+    metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength);
+    metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength);
+
+    for (int dim = 0; dim < config.numIndexDims; dim++) {
+      if (Arrays.compareUnsigned(
+              minPackedValue,
+              dim * config.bytesPerDim,
+              dim * config.bytesPerDim + config.bytesPerDim,
+              maxPackedValue,
+              dim * config.bytesPerDim,
+              dim * config.bytesPerDim + config.bytesPerDim)
+          > 0) {
+        throw new CorruptIndexException(
+            "minPackedValue "
+                + new BytesRef(minPackedValue)
+                + " is > maxPackedValue "
+                + new BytesRef(maxPackedValue)
+                + " for dim="
+                + dim,
+            metaIn);
+      }
+    }
+
+    pointCount = metaIn.readVLong();
+    docCount = metaIn.readVInt();
+
+    int numIndexBytes = metaIn.readVInt();
+    long indexStartPointer;
+    if (version >= BKDWriter.VERSION_META_FILE) {
+      minLeafBlockFP = metaIn.readLong();
+      indexStartPointer = metaIn.readLong();
+    } else {
+      indexStartPointer = indexIn.getFilePointer();
+      minLeafBlockFP = indexIn.readVLong();
+      indexIn.seek(indexStartPointer);
+    }
+    this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, 
numIndexBytes);
+    this.in = dataIn;
+  }
+
+  @Override
+  public BKDConfig getConfig() {
+    return config;
+  }
+
+  @Override
+  public byte[] getMinPackedValue() {
+    return minPackedValue.clone();
+  }
+
+  @Override
+  public byte[] getMaxPackedValue() {
+    return maxPackedValue.clone();
+  }
+
+  @Override
+  public long getPointCount() {
+    return pointCount;
+  }
+
+  @Override
+  public int getDocCount() {
+    return docCount;
+  }
+
+  @Override
+  public BKDReader.IndexTree getIndexTree() {
+    return new IndexTree(
+        packedIndex.clone(),
+        this.in.clone(),
+        config,
+        numLeaves,
+        version,
+        minPackedValue,
+        maxPackedValue);
+  }
+
+  private static class IndexTree implements BKDReader.IndexTree {
+    private int nodeID;
+    // during clone, the node root can be different to 1
+    private final int nodeRoot;
+    // level is 1-based so that we can do level-1 w/o checking each time:
+    private int level;
+    // used to read the packed tree off-heap
+    private final IndexInput innerNodes;
+    // used to read the packed leaves off-heap
+    private final IndexInput leafNodes;
+    // holds the minimum (left most) leaf block file pointer for each level 
we've recursed to:
+    private final long[] leafBlockFPStack;
+    // holds the address, in the off-heap index, of the right-node of each 
level:
+    private final int[] rightNodePositions;
+    // holds the splitDim for each level:
+    private final int[] splitDims;
+    // true if the per-dim delta we read for the node at this level is a 
negative offset vs. the
+    // last split on this dim; this is a packed
+    // 2D array, i.e. to access array[level][dim] you read from 
negativeDeltas[level*numDims+dim].
+    // this will be true if the last time we
+    // split on this dimension, we next pushed to the left sub-tree:
+    private final boolean[] negativeDeltas;
+    // holds the packed per-level split values
+    private final byte[][] splitValuesStack;
+    // holds the min / max value of the current node.
+    private final byte[] minPackedValue, maxPackedValue;
+    // holds the previous value of the split dimension
+    private final byte[][] splitDimValueStack;
+    // tree parameters
+    private final BKDConfig config;
+    // number of leaves
+    private final int leafNodeOffset;
+    // version of the index
+    private final int version;
+    // helper objects for reading doc values
+    private final byte[] scratchDataPackedValue,
+        scratchMinIndexPackedValue,
+        scratchMaxIndexPackedValue;
+    private final int[] commonPrefixLengths;
+    private final BKDReaderDocIDSetIterator scratchIterator;
+
+    private IndexTree(
+        IndexInput innerNodes,
+        IndexInput leafNodes,
+        BKDConfig config,
+        int numLeaves,
+        int version,
+        byte[] minPackedValue,
+        byte[] maxPackedValue) {
+      this(
+          innerNodes,
+          leafNodes,
+          config,
+          numLeaves,
+          version,
+          1,
+          1,
+          minPackedValue,
+          maxPackedValue,
+          new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode),
+          new byte[config.packedBytesLength],
+          new byte[config.packedIndexBytesLength],
+          new byte[config.packedIndexBytesLength],
+          new int[config.numDims]);
+      // read root node
+      readNodeData(false);
+    }
+
+    private IndexTree(
+        IndexInput innerNodes,
+        IndexInput leafNodes,
+        BKDConfig config,
+        int numLeaves,
+        int version,
+        int nodeID,
+        int level,
+        byte[] minPackedValue,
+        byte[] maxPackedValue,
+        BKDReaderDocIDSetIterator scratchIterator,
+        byte[] scratchDataPackedValue,
+        byte[] scratchMinIndexPackedValue,
+        byte[] scratchMaxIndexPackedValue,
+        int[] commonPrefixLengths) {
+      this.config = config;
+      this.version = version;
+      this.nodeID = nodeID;
+      this.nodeRoot = nodeID;
+      this.level = level;
+      leafNodeOffset = numLeaves;
+      this.innerNodes = innerNodes;
+      this.leafNodes = leafNodes;
+      this.minPackedValue = minPackedValue.clone();
+      this.maxPackedValue = maxPackedValue.clone();
+      // stack arrays that keep information at different levels
+      int treeDepth = getTreeDepth(numLeaves);
+      splitDimValueStack = new byte[treeDepth + 1][];
+      splitValuesStack = new byte[treeDepth + 1][];
+      splitValuesStack[0] = new byte[config.packedIndexBytesLength];
+      leafBlockFPStack = new long[treeDepth + 1];
+      rightNodePositions = new int[treeDepth + 1];
+      splitDims = new int[treeDepth + 1];
+      negativeDeltas = new boolean[config.numIndexDims * (treeDepth + 1)];
+      // scratch objects, reused between clones
+      this.scratchIterator = scratchIterator;
+      this.commonPrefixLengths = commonPrefixLengths;
+      this.scratchDataPackedValue = scratchDataPackedValue;
+      this.scratchMinIndexPackedValue = scratchMinIndexPackedValue;
+      this.scratchMaxIndexPackedValue = scratchMaxIndexPackedValue;
+    }
+
+    @Override
+    public BKDReader.IndexTree clone() {
+      BKDDefaultReader.IndexTree index =
+          new BKDDefaultReader.IndexTree(
+              innerNodes.clone(),
+              leafNodes.clone(),
+              config,
+              leafNodeOffset,
+              version,
+              nodeID,
+              level,
+              minPackedValue,
+              maxPackedValue,
+              scratchIterator,
+              scratchDataPackedValue,
+              scratchMinIndexPackedValue,
+              scratchMaxIndexPackedValue,
+              commonPrefixLengths);
+      index.leafBlockFPStack[index.level] = leafBlockFPStack[level];
+      if (isLeafNode() == false) {
+        // copy node data
+        index.rightNodePositions[index.level] = rightNodePositions[level];
+        index.splitValuesStack[index.level] = splitValuesStack[level].clone();
+        System.arraycopy(
+            negativeDeltas,
+            level * config.numIndexDims,
+            index.negativeDeltas,
+            level * config.numIndexDims,
+            config.numIndexDims);
+        index.splitDims[level] = splitDims[level];
+      }
+      return index;
+    }
+
+    @Override
+    public byte[] getMinPackedValue() {
+      return minPackedValue;
+    }
+
+    @Override
+    public byte[] getMaxPackedValue() {
+      return maxPackedValue;
+    }
+
+    @Override
+    public boolean moveToChild() {
+      if (isLeafNode()) {
+        return false;
+      }
+      pushLeft();
+      return true;
+    }
+
+    private void pushLeft() {
+      final int splitDimPos = splitDims[level] * config.bytesPerDim;
+      if (splitDimValueStack[level] == null) {
+        splitDimValueStack[level] = new byte[config.bytesPerDim];
+      }
+      // save the dimension we are going to change
+      System.arraycopy(
+          maxPackedValue, splitDimPos, splitDimValueStack[level], 0, 
config.bytesPerDim);
+      assert Arrays.compareUnsigned(
+                  maxPackedValue,
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim,
+                  splitValuesStack[level],
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim)
+              >= 0
+          : "config.bytesPerDim="
+              + config.bytesPerDim
+              + " splitDim="
+              + splitDims[level]
+              + " config.numIndexDims="
+              + config.numIndexDims
+              + " config.numDims="
+              + config.numDims;
+      nodeID *= 2;
+      level++;
+      // add the split dim value:
+      System.arraycopy(
+          splitValuesStack[level - 1],
+          splitDimPos,
+          maxPackedValue,
+          splitDimPos,
+          config.bytesPerDim);
+      readNodeData(true);
+    }
+
+    private void pushRight() {
+      final int splitDimPos = splitDims[level] * config.bytesPerDim;
+      // we should have already visit the left node
+      assert splitDimValueStack[level] != null;
+      // same the dimension we are going to change
+      System.arraycopy(
+          minPackedValue, splitDimPos, splitDimValueStack[level], 0, 
config.bytesPerDim);
+      assert Arrays.compareUnsigned(
+                  minPackedValue,
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim,
+                  splitValuesStack[level],
+                  splitDimPos,
+                  splitDimPos + config.bytesPerDim)
+              <= 0
+          : "config.bytesPerDim="
+              + config.bytesPerDim
+              + " splitDim="
+              + splitDims[level]
+              + " config.numIndexDims="
+              + config.numIndexDims
+              + " config.numDims="
+              + config.numDims;
+      final int nodePosition = rightNodePositions[level];
+      assert nodePosition >= innerNodes.getFilePointer()
+          : "nodePosition = " + nodePosition + " < currentPosition=" + 
innerNodes.getFilePointer();
+      try {
+        innerNodes.seek(nodePosition);
+      } catch (IOException e) {
+        throw new UncheckedIOException(e);
+      }
+      nodeID = 2 * nodeID + 1;
+      level++;
+      // add the split dim value:
+      System.arraycopy(
+          splitValuesStack[level - 1],
+          splitDimPos,
+          minPackedValue,
+          splitDimPos,
+          config.bytesPerDim);
+      readNodeData(false);
+    }
+
+    @Override
+    public boolean moveToSibling() {
+      if (nodeID != nodeRoot && (nodeID & 1) == 0) {
+        pop(true);
+        pushRight();
+        assert nodeExists();
+        return true;
+      }
+      return false;
+    }
+
+    private void pop(boolean isLeft) {
+      nodeID /= 2;
+      level--;
+      // restore the split dimension
+      if (isLeft) {
+        System.arraycopy(
+            splitDimValueStack[level],
+            0,
+            maxPackedValue,
+            splitDims[level] * config.bytesPerDim,
+            config.bytesPerDim);
+      } else {
+
+        System.arraycopy(
+            splitDimValueStack[level],
+            0,
+            minPackedValue,
+            splitDims[level] * config.bytesPerDim,
+            config.bytesPerDim);
+      }
+    }
+
+    @Override
+    public boolean moveToParent() {
+      if (nodeID == nodeRoot) {
+        return false;
+      }
+      pop((nodeID & 1) == 0);
+      return true;
+    }
+
+    private boolean isLeafNode() {
+      return nodeID >= leafNodeOffset;
+    }
+
+    private boolean nodeExists() {
+      return nodeID - leafNodeOffset < leafNodeOffset;
+    }
+
+    /** Only valid after pushLeft or pushRight, not pop! */
+    private long getLeafBlockFP() {
+      assert isLeafNode() : "nodeID=" + nodeID + " is not a leaf";
+      return leafBlockFPStack[level];
+    }
+
+    @Override
+    public long size() {
+      int leftMostLeafNode = nodeID;
+      while (leftMostLeafNode < leafNodeOffset) {
+        leftMostLeafNode = leftMostLeafNode * 2;
+      }
+      int rightMostLeafNode = nodeID;
+      while (rightMostLeafNode < leafNodeOffset) {
+        rightMostLeafNode = rightMostLeafNode * 2 + 1;
+      }
+      final int numLeaves;
+      if (rightMostLeafNode >= leftMostLeafNode) {
+        // both are on the same level
+        numLeaves = rightMostLeafNode - leftMostLeafNode + 1;
+      } else {
+        // left is one level deeper than right
+        numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset;
+      }
+      assert numLeaves == getNumLeavesSlow(nodeID) : numLeaves + " " + 
getNumLeavesSlow(nodeID);
+      return (long) numLeaves * config.maxPointsInLeafNode;
+    }
+
+    @Override
+    public void visitDocIDs(PointValues.IntersectVisitor visitor) throws 
IOException {
+      // Leaf node
+      leafNodes.seek(getLeafBlockFP());
+      // How many points are stored in this leaf cell:
+      int count = leafNodes.readVInt();
+      // No need to call grow(), it has been called up-front
+      DocIdsWriter.readInts(leafNodes, count, visitor);
+    }
+
+    @Override
+    public void visitDocValues(PointValues.IntersectVisitor visitor) throws 
IOException {
+      visitDocValues(visitor, getLeafBlockFP());
+    }
+
+    private void visitDocValues(PointValues.IntersectVisitor visitor, long fp) 
throws IOException {
+      // Leaf node; scan and filter all points in this block:
+      int count = readDocIDs(leafNodes, fp, scratchIterator);
+      if (version >= BKDWriter.VERSION_LOW_CARDINALITY_LEAVES) {
+        visitDocValuesWithCardinality(
+            commonPrefixLengths,
+            scratchDataPackedValue,
+            scratchMinIndexPackedValue,
+            scratchMaxIndexPackedValue,
+            leafNodes,
+            scratchIterator,
+            count,
+            visitor);
+      } else {
+        visitDocValuesNoCardinality(
+            commonPrefixLengths,
+            scratchDataPackedValue,
+            scratchMinIndexPackedValue,
+            scratchMaxIndexPackedValue,
+            leafNodes,
+            scratchIterator,
+            count,
+            visitor);
+      }
+    }
+
+    private int readDocIDs(IndexInput in, long blockFP, 
BKDReaderDocIDSetIterator iterator)
+        throws IOException {
+      in.seek(blockFP);
+      // How many points are stored in this leaf cell:
+      int count = in.readVInt();
+
+      DocIdsWriter.readInts(in, count, iterator.docIDs);
+
+      return count;
+    }
+
+    // for assertions
+    private int getNumLeavesSlow(int node) {
+      if (node >= 2 * leafNodeOffset) {
+        return 0;
+      } else if (node >= leafNodeOffset) {
+        return 1;
+      } else {
+        final int leftCount = getNumLeavesSlow(node * 2);
+        final int rightCount = getNumLeavesSlow(node * 2 + 1);
+        return leftCount + rightCount;
+      }
+    }
+
+    private void readNodeData(boolean isLeft) {
+      try {
+        leafBlockFPStack[level] = leafBlockFPStack[level - 1];
+        if (isLeft == false) {
+          // read leaf block FP delta
+          leafBlockFPStack[level] += innerNodes.readVLong();
+        }
+
+        if (isLeafNode() == false) {
+          System.arraycopy(
+              negativeDeltas,
+              (level - 1) * config.numIndexDims,
+              negativeDeltas,
+              level * config.numIndexDims,
+              config.numIndexDims);
+          negativeDeltas[level * config.numIndexDims + splitDims[level - 1]] = 
isLeft;
+
+          if (splitValuesStack[level] == null) {
+            splitValuesStack[level] = splitValuesStack[level - 1].clone();
+          } else {
+            System.arraycopy(
+                splitValuesStack[level - 1],
+                0,
+                splitValuesStack[level],
+                0,
+                config.packedIndexBytesLength);
+          }
+
+          // read split dim, prefix, firstDiffByteDelta encoded as int:
+          int code = innerNodes.readVInt();
+          splitDims[level] = code % config.numIndexDims;
+          code /= config.numIndexDims;
+          int prefix = code % (1 + config.bytesPerDim);
+          int suffix = config.bytesPerDim - prefix;
+
+          if (suffix > 0) {
+            int firstDiffByteDelta = code / (1 + config.bytesPerDim);
+            if (negativeDeltas[level * config.numIndexDims + 
splitDims[level]]) {
+              firstDiffByteDelta = -firstDiffByteDelta;
+            }
+            int oldByte =
+                splitValuesStack[level][splitDims[level] * config.bytesPerDim 
+ prefix] & 0xFF;
+            splitValuesStack[level][splitDims[level] * config.bytesPerDim + 
prefix] =
+                (byte) (oldByte + firstDiffByteDelta);
+            innerNodes.readBytes(
+                splitValuesStack[level],
+                splitDims[level] * config.bytesPerDim + prefix + 1,
+                suffix - 1);
+          } else {
+            // our split value is == last split value in this dim, which can 
happen when there are
+            // many duplicate values
+          }
+
+          int leftNumBytes;
+          if (nodeID * 2 < leafNodeOffset) {
+            leftNumBytes = innerNodes.readVInt();
+          } else {
+            leftNumBytes = 0;
+          }
+          rightNodePositions[level] = 
Math.toIntExact(innerNodes.getFilePointer()) + leftNumBytes;
+        }
+      } catch (IOException e) {
+        throw new UncheckedIOException(e);
+      }

Review comment:
       likewise here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

[GitHub] [lucene] jpountz commented on a change in pull request #7: LUCENE-9820: Separate logic for reading the BKD index from logic to intersecting it

Reply via email to