kishoreg commented on a change in pull request #5240:
URL: https://github.com/apache/incubator-pinot/pull/5240#discussion_r422460835



##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java
##########
@@ -0,0 +1,471 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.segment.creator.impl.inv;
+
+import com.google.common.base.Preconditions;
+import it.unimi.dsi.fastutil.Arrays;
+import it.unimi.dsi.fastutil.Swapper;
+import it.unimi.dsi.fastutil.ints.IntComparator;
+import java.io.BufferedOutputStream;
+import java.io.Closeable;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.core.common.Constants;
+import org.apache.pinot.core.query.utils.Pair;
+import org.apache.pinot.core.segment.creator.InvertedIndexCreator;
+import org.apache.pinot.core.segment.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.MetricFieldSpec;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static 
org.apache.pinot.core.segment.creator.impl.V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION;
+
+
+/**
+ * Implementation of {@link InvertedIndexCreator} that uses off-heap memory.
+ * <p>We use 2 passes to create the range index.
+ * <ul>
+ *
+ *   <li>
+ *     A
+ *   </li>
+ *   <li>
+ *     In the first pass (adding values phase), when add() method is called, 
store the raw values into the forward index
+ *     value buffer (for multi-valued column also store number of values for 
each docId into forward index length
+ *     buffer). We also compute the inverted index length for each dictId 
while adding values.
+ *   </li>
+ *   <li>
+ *     In the second pass (processing values phase), when seal() method is 
called, all the dictIds should already been
+ *     added. We first reorder the values into the inverted index buffers by 
going over the dictIds in forward index
+ *     value buffer (for multi-valued column we also need forward index length 
buffer to get the docId for each dictId).
+ *     <p>Once we have the inverted index buffers, we simply go over them and 
create the bitmap for each dictId and
+ *     serialize them into a file.
+ *   </li>
+ * </ul>
+ * <p>Based on the number of values we need to store, we use direct memory or 
MMap file to allocate the buffer.
+ */
+public final class RangeIndexCreator implements InvertedIndexCreator {
+
+  private static final int RANGE_INDEX_VERSION = 1;
+
+  // Use MMapBuffer if the value buffer size is larger than 2G
+  private static final int NUM_VALUES_THRESHOLD_FOR_MMAP_BUFFER = 500_000_000;
+  private static final int DEFAULT_NUM_RANGES = 20;
+
+  private static final String FORWARD_INDEX_VALUE_BUFFER_SUFFIX = 
".fwd.idx.val.buf";
+  private static final String DOC_ID_VALUE_BUFFER_SUFFIX = ".doc.id.val.buf";
+
+  private final File _invertedIndexFile;
+  private final File _forwardIndexValueBufferFile;
+  private final File _docIdBufferFileForSorting;
+  private final int _numValues;
+  private final boolean _useMMapBuffer;
+
+  // Forward index buffers (from docId to dictId)

Review comment:
       its a temp buffer, will renaming it to valueBuffer help?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to