This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new fe3e5deebd9 Reduce buffer allocation when collecting value size 
(#18012)
fe3e5deebd9 is described below

commit fe3e5deebd981dd40e873d3a0f934dd4e8fddde8
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Sat Mar 28 01:04:09 2026 -0700

    Reduce buffer allocation when collecting value size (#18012)
    
    Auto-merged by automated PR review bot.
---
 .../local/io/util/FixedByteValueReaderWriter.java  | 28 ++++++++++++
 .../pinot/segment/local/io/util/ValueReader.java   |  6 +++
 .../local/io/util/VarLengthValueReader.java        | 13 ++++++
 .../writer/impl/MutableOffHeapByteArrayStore.java  | 23 ++++++++++
 .../converter/stats/MutableColumnStatistics.java   | 43 +++++--------------
 .../BigDecimalOffHeapMutableDictionary.java        |  5 +++
 .../BigDecimalOnHeapMutableDictionary.java         |  8 +++-
 .../dictionary/BytesOffHeapMutableDictionary.java  |  5 +++
 .../dictionary/BytesOnHeapMutableDictionary.java   |  5 +++
 .../dictionary/SameValueMutableDictionary.java     |  5 +++
 .../dictionary/StringOffHeapMutableDictionary.java |  5 +++
 .../dictionary/StringOnHeapMutableDictionary.java  |  6 +++
 ...IndexAndDictionaryBasedForwardIndexCreator.java | 50 +++-------------------
 .../index/readers/BaseImmutableDictionary.java     |  8 ++++
 .../index/readers/BigDecimalDictionary.java        |  5 +++
 .../segment/index/readers/BytesDictionary.java     |  5 +++
 .../readers/ConstantValueBigDecimalDictionary.java | 13 ++++++
 .../readers/ConstantValueBytesDictionary.java      |  5 +++
 .../readers/ConstantValueStringDictionary.java     |  5 +++
 .../index/readers/OnHeapBigDecimalDictionary.java  |  5 +++
 .../index/readers/OnHeapBytesDictionary.java       | 10 +++++
 .../index/readers/OnHeapStringDictionary.java      |  5 +++
 .../segment/index/readers/StringDictionary.java    |  5 +++
 .../PartitionIdVirtualColumnProvider.java          | 15 ++++++-
 .../io/util/VarLengthValueReaderWriterTest.java    |  4 ++
 .../index/readers/ImmutableDictionaryTest.java     |  8 ++++
 .../FixedByteValueReaderWriterTest.java            | 22 ++++++++++
 .../pinot/segment/spi/index/reader/Dictionary.java | 13 ++++--
 .../converter/DictionaryToRawIndexConverter.java   |  8 +---
 29 files changed, 249 insertions(+), 89 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
index 2e85a81c1ae..7395efb4572 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
@@ -100,6 +100,34 @@ public final class FixedByteValueReaderWriter implements 
ValueReader {
     return value;
   }
 
+  @Override
+  public int getUnpaddedByteSize(int index, int numBytesPerValue) {
+    // Based on the ZeroInWord algorithm: 
http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+    long startOffset = (long) index * numBytesPerValue;
+    boolean littleEndian = _dataBuffer.order() == ByteOrder.LITTLE_ENDIAN;
+    int endIndex = numBytesPerValue & 0xFFFFFFF8;
+    int i = 0;
+    for (; i < endIndex; i += Long.BYTES) {
+      long word = _dataBuffer.getLong(startOffset + i);
+      long tmp = ~(((word & 0x7F7F7F7F7F7F7F7FL) + 0x7F7F7F7F7F7F7F7FL) | word 
| 0x7F7F7F7F7F7F7F7FL);
+      if (tmp != 0) {
+        return i + ((littleEndian ? Long.numberOfTrailingZeros(tmp) : 
Long.numberOfLeadingZeros(tmp)) >>> 3);
+      }
+    }
+    for (; i < numBytesPerValue; i++) {
+      byte b = _dataBuffer.getByte(startOffset + i);
+      if (b == 0) {
+        break;
+      }
+    }
+    return i;
+  }
+
+  @Override
+  public int getByteSize(int index, int numBytesPerValue) {
+    return numBytesPerValue;
+  }
+
   @Override
   public int compareUtf8Bytes(int index, int numBytesPerValue, byte[] bytes) {
     long startOffset = (long) index * numBytesPerValue;
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
index acf3d4fea2a..8478ba056ac 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
@@ -77,6 +77,12 @@ public interface ValueReader extends Closeable {
    */
   byte[] getBytes(int index, int numBytesPerValue);
 
+  /// Applicable to STRING only.
+  int getUnpaddedByteSize(int index, int numBytesPerValue);
+
+  /// Applicable to variable sized types other than STRING, i.e. BIG_DECIMAL, 
BYTES.
+  int getByteSize(int index, int numBytesPerValue);
+
   /**
    * NOTE: The passed in reusable buffer should have capacity of at least 
{@code numBytesPerValue}.
    */
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
index 502d38cfaaa..1139d20fc6d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
@@ -128,6 +128,19 @@ public class VarLengthValueReader implements ValueReader {
     return value;
   }
 
+  @Override
+  public int getUnpaddedByteSize(int index, int numBytesPerValue) {
+    return getByteSize(index, numBytesPerValue);
+  }
+
+  @Override
+  public int getByteSize(int index, int numBytesPerValue) {
+    int offsetPosition = _dataSectionStartOffSet + Integer.BYTES * index;
+    int startOffset = _dataBuffer.getInt(offsetPosition);
+    int endOffset = _dataBuffer.getInt(offsetPosition + Integer.BYTES);
+    return endOffset - startOffset;
+  }
+
   @Override
   public int compareUtf8Bytes(int index, int numBytesPerValue, byte[] bytes) {
     int offsetPosition = _dataSectionStartOffSet + Integer.BYTES * index;
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
index 8ddd6d6a87e..276f167ec97 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
@@ -146,6 +146,17 @@ public class MutableOffHeapByteArrayStore implements 
Closeable {
       return value;
     }
 
+    private int getValueSize(int index) {
+      int startOffset = _pinotDataBuffer.getInt(index * Integer.BYTES);
+      int endOffset;
+      if (index != 0) {
+        endOffset = _pinotDataBuffer.getInt((index - 1) * Integer.BYTES);
+      } else {
+        endOffset = _size;
+      }
+      return endOffset - startOffset;
+    }
+
     private int getSize() {
       return _size;
     }
@@ -216,6 +227,18 @@ public class MutableOffHeapByteArrayStore implements 
Closeable {
     throw new RuntimeException("dictionary ID '" + index + "' too low");
   }
 
+  public int getValueSize(int index) {
+    List<Buffer> bufList = _buffers;
+    for (int x = bufList.size() - 1; x >= 0; x--) {
+      Buffer buffer = bufList.get(x);
+      if (index >= buffer.getStartIndex()) {
+        return buffer.getValueSize(index - buffer.getStartIndex());
+      }
+    }
+    // Assumed that we will never ask for an index that does not exist.
+    throw new RuntimeException("dictionary ID '" + index + "' too low");
+  }
+
   // Adds a byte array and returns the index. No verification is made as to 
whether the byte array already exists or not
   public int add(byte[] value) {
     int valueLength = value.length;
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
index 9bb751e3dcd..ca1dc1fba43 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
@@ -19,7 +19,6 @@
 package org.apache.pinot.segment.local.realtime.converter.stats;
 
 import com.google.common.base.Preconditions;
-import com.google.common.base.Utf8;
 import java.util.Map;
 import java.util.Set;
 import javax.annotation.Nullable;
@@ -30,7 +29,6 @@ import 
org.apache.pinot.segment.spi.index.mutable.MutableForwardIndex;
 import org.apache.pinot.segment.spi.index.reader.Dictionary;
 import org.apache.pinot.segment.spi.partition.PartitionFunction;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
-import org.apache.pinot.spi.utils.BigDecimalUtils;
 
 
 /**
@@ -99,37 +97,16 @@ public class MutableColumnStatistics implements 
ColumnStatistics {
     if (storedType.isFixedWidth()) {
       _minElementLength = storedType.size();
       _maxElementLength = storedType.size();
-      return;
-    }
-
-    // If the stored type is not fixed width, iterate over the dictionary to 
find the min/max element length
-    _minElementLength = Integer.MAX_VALUE;
-    _maxElementLength = 0;
-    int length = _dictionary.length();
-    switch (storedType) {
-      case BIG_DECIMAL:
-        for (int i = 0; i < length; i++) {
-          int elementLength = 
BigDecimalUtils.byteSize(_dictionary.getBigDecimalValue(i));
-          _minElementLength = Math.min(_minElementLength, elementLength);
-          _maxElementLength = Math.max(_maxElementLength, elementLength);
-        }
-        break;
-      case STRING:
-        for (int i = 0; i < length; i++) {
-          int elementLength = 
Utf8.encodedLength(_dictionary.getStringValue(i));
-          _minElementLength = Math.min(_minElementLength, elementLength);
-          _maxElementLength = Math.max(_maxElementLength, elementLength);
-        }
-        break;
-      case BYTES:
-        for (int i = 0; i < length; i++) {
-          int elementLength = _dictionary.getBytesValue(i).length;
-          _minElementLength = Math.min(_minElementLength, elementLength);
-          _maxElementLength = Math.max(_maxElementLength, elementLength);
-        }
-        break;
-      default:
-        throw new IllegalStateException("Unsupported stored type: " + 
storedType);
+    } else {
+      // If the stored type is not fixed width, iterate over the dictionary to 
find the min/max element length
+      _minElementLength = Integer.MAX_VALUE;
+      _maxElementLength = 0;
+      int length = _dictionary.length();
+      for (int i = 0; i < length; i++) {
+        int elementLength = _dictionary.getValueSize(i);
+        _minElementLength = Math.min(_minElementLength, elementLength);
+        _maxElementLength = Math.max(_maxElementLength, elementLength);
+      }
     }
   }
 
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
index 6e39aff860a..9eff5af67de 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
@@ -220,6 +220,11 @@ public class BigDecimalOffHeapMutableDictionary extends 
BaseOffHeapMutableDictio
     return getBigDecimalValue(dictId).toPlainString();
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return _byteStore.getValueSize(dictId);
+  }
+
   @Override
   protected void setValue(int dictId, Object value, byte[] serializedValue) {
     _byteStore.add(serializedValue);
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
index ce1882903aa..6e45bc07d97 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
@@ -25,6 +25,7 @@ import java.math.BigDecimal;
 import java.util.Arrays;
 import org.apache.pinot.common.request.context.predicate.RangePredicate;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.BigDecimalUtils;
 
 
 @SuppressWarnings("Duplicates")
@@ -204,7 +205,12 @@ public class BigDecimalOnHeapMutableDictionary extends 
BaseOnHeapMutableDictiona
 
   @Override
   public byte[] getBytesValue(int dictId) {
-    return getBytesValue(dictId);
+    return BigDecimalUtils.serialize(getBigDecimalValue(dictId));
+  }
+
+  @Override
+  public int getValueSize(int dictId) {
+    return BigDecimalUtils.byteSize(getBigDecimalValue(dictId));
   }
 
   private void updateMinMax(BigDecimal value) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
index b465860c474..2f9519c5b45 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
@@ -196,6 +196,11 @@ public class BytesOffHeapMutableDictionary extends 
BaseOffHeapMutableDictionary
     return _byteStore.get(dictId);
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return _byteStore.getValueSize(dictId);
+  }
+
   @Override
   protected void setValue(int dictId, Object value, byte[] serializedValue) {
     _byteStore.add(serializedValue);
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
index a2bd8718276..ac25f855d55 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
@@ -175,6 +175,11 @@ public class BytesOnHeapMutableDictionary extends 
BaseOnHeapMutableDictionary {
     return getByteArrayValue(dictId).getBytes();
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return getByteArrayValue(dictId).length();
+  }
+
   @Override
   public ByteArray getByteArrayValue(int dictId) {
     return (ByteArray) super.get(dictId);
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
index a13f4661559..b94121a329d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
@@ -123,6 +123,11 @@ public class SameValueMutableDictionary implements 
MutableDictionary {
     return _actualValue.toString();
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return _delegate.getValueSize(dictId);
+  }
+
   @Override
   public void close()
       throws IOException {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
index 9fab9aa8398..838723c0830 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
@@ -177,6 +177,11 @@ public class StringOffHeapMutableDictionary extends 
BaseOffHeapMutableDictionary
     return _byteStore.get(dictId);
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return _byteStore.getValueSize(dictId);
+  }
+
   @Override
   protected void setValue(int dictId, Object value, byte[] serializedValue) {
     _byteStore.add(serializedValue);
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
index f8586f011fe..09fde55a585 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.realtime.impl.dictionary;
 
+import com.google.common.base.Utf8;
 import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
 import it.unimi.dsi.fastutil.ints.IntSet;
 import it.unimi.dsi.fastutil.ints.IntSets;
@@ -161,6 +162,11 @@ public class StringOnHeapMutableDictionary extends 
BaseOnHeapMutableDictionary {
     return getStringValue(dictId).getBytes(UTF_8);
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return Utf8.encodedLength(getStringValue(dictId));
+  }
+
   private void updateMinMax(String value) {
     if (_min == null) {
       _min = value;
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
index 066695a0cb2..4d45240dd62 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
@@ -19,7 +19,6 @@
 package org.apache.pinot.segment.local.segment.index.loader;
 
 import com.google.common.base.Preconditions;
-import com.google.common.base.Utf8;
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
@@ -41,7 +40,6 @@ import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
 import org.apache.pinot.segment.spi.store.SegmentDirectory;
 import org.apache.pinot.segment.spi.utils.SegmentMetadataUtils;
 import org.apache.pinot.spi.data.FieldSpec;
-import org.apache.pinot.spi.utils.BigDecimalUtils;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -263,7 +261,7 @@ public class 
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
         int finalDictId = dictId;
         docIdsBitmap.stream().forEach(docId -> 
putInt(_forwardIndexValueBuffer, docId, finalDictId));
         if (!isFixedWidth) {
-          lengthOfLongestEntry = trackLengthOfLongestEntry(dictionary, 
lengthOfLongestEntry, dictId);
+          lengthOfLongestEntry = Math.max(lengthOfLongestEntry, 
dictionary.getValueSize(dictId));
         }
       }
 
@@ -315,7 +313,7 @@ public class 
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
         });
 
         if (!isFixedWidth) {
-          lengthOfLongestEntry = trackLengthOfLongestEntry(dictionary, 
lengthOfLongestEntry, dictId);
+          lengthOfLongestEntry = Math.max(lengthOfLongestEntry, 
dictionary.getValueSize(dictId));
         }
       }
 
@@ -387,49 +385,11 @@ public class 
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
     }
   }
 
-  private int trackLengthOfLongestEntry(Dictionary dictionary, int 
lengthOfLongestEntry, int dictId) {
-    int updatedLengthOfLongestEntry;
-    switch (_storedType) {
-      case STRING:
-        updatedLengthOfLongestEntry = 
Math.max(Utf8.encodedLength(dictionary.getStringValue(dictId)),
-            lengthOfLongestEntry);
-        break;
-      case BYTES:
-        updatedLengthOfLongestEntry = 
Math.max(dictionary.getBytesValue(dictId).length, lengthOfLongestEntry);
-        break;
-      case BIG_DECIMAL:
-        updatedLengthOfLongestEntry = Math.max(
-            BigDecimalUtils.byteSize(dictionary.getBigDecimalValue(dictId)), 
lengthOfLongestEntry);
-        break;
-      default:
-        throw new IllegalStateException("Trying to calculate 
lengthOfLongestEntry for invalid stored type: "
-            + _storedType);
-    }
-    return updatedLengthOfLongestEntry;
-  }
-
   private void trackMaxRowLengthInBytes(Dictionary dictionary, int[] 
maxRowLengthInBytes, int docId, int dictId) {
     int curSizeOfRow = getInt(_forwardIndexMaxSizeBuffer, docId);
-    switch (_storedType) {
-      case STRING:
-        int newSizeOfEntry = 
Utf8.encodedLength(dictionary.getStringValue(dictId)) + curSizeOfRow;
-        putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
-        maxRowLengthInBytes[0] = Math.max(newSizeOfEntry, 
maxRowLengthInBytes[0]);
-        break;
-      case BYTES:
-        newSizeOfEntry = dictionary.getBytesValue(dictId).length + 
curSizeOfRow;
-        putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
-        maxRowLengthInBytes[0] = Math.max(newSizeOfEntry, 
maxRowLengthInBytes[0]);
-        break;
-      case BIG_DECIMAL:
-        newSizeOfEntry = 
BigDecimalUtils.byteSize(dictionary.getBigDecimalValue(dictId)) + curSizeOfRow;
-        putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
-        maxRowLengthInBytes[0] = Math.max(newSizeOfEntry, 
maxRowLengthInBytes[0]);
-        break;
-      default:
-        throw new IllegalStateException("Trying to calculate 
maxRowLengthInBytes for invalid stored type: "
-            + _storedType);
-    }
+    int newSizeOfEntry = dictionary.getValueSize(dictId) + curSizeOfRow;
+    putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
+    maxRowLengthInBytes[0] = Math.max(maxRowLengthInBytes[0], newSizeOfEntry);
   }
 
   private void writeToForwardIndex(Dictionary dictionary, IndexCreationContext 
context)
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
index 4ba8895beaa..32a2f7aa172 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
@@ -278,6 +278,14 @@ public abstract class BaseImmutableDictionary implements 
Dictionary {
     return _valueReader.getBytes(dictId, _numBytesPerValue);
   }
 
+  protected int getUnpaddedByteSize(int dictId) {
+    return _valueReader.getUnpaddedByteSize(dictId, _numBytesPerValue);
+  }
+
+  protected int getByteSize(int dictId) {
+    return _valueReader.getByteSize(dictId, _numBytesPerValue);
+  }
+
   public int get32BitsMurmur3Hash(int dictId, byte[] buffer) {
     return _valueReader.get32BitsMurmur3Hash(dictId, _numBytesPerValue, 
buffer);
   }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
index 00efcc3b61e..7f0aca47938 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
@@ -98,6 +98,11 @@ public class BigDecimalDictionary extends 
BaseImmutableDictionary {
     return getBytes(dictId);
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return getByteSize(dictId);
+  }
+
   @Override
   public void read32BitsMurmur3HashValues(int[] dictIds, int length, int[] 
outValues) {
     byte[] buffer = getBuffer();
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
index db3401d3af5..30467e113b6 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
@@ -105,6 +105,11 @@ public class BytesDictionary extends 
BaseImmutableDictionary {
     return getBytes(dictId);
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return getByteSize(dictId);
+  }
+
   @Override
   public void read32BitsMurmur3HashValues(int[] dictIds, int length, int[] 
outValues) {
     byte[] buffer = getBuffer();
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
index 2e74bb5422a..cfa0bf337be 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
@@ -20,6 +20,7 @@ package org.apache.pinot.segment.local.segment.index.readers;
 
 import java.math.BigDecimal;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.BigDecimalUtils;
 
 
 /**
@@ -27,10 +28,12 @@ import org.apache.pinot.spi.data.FieldSpec.DataType;
  */
 public class ConstantValueBigDecimalDictionary extends BaseImmutableDictionary 
{
   private final BigDecimal _value;
+  private final byte[] _bytes;
 
   public ConstantValueBigDecimalDictionary(BigDecimal value) {
     super(1);
     _value = value;
+    _bytes = BigDecimalUtils.serialize(_value);
   }
 
   @Override
@@ -110,4 +113,14 @@ public class ConstantValueBigDecimalDictionary extends 
BaseImmutableDictionary {
   public String getStringValue(int dictId) {
     return _value.toPlainString();
   }
+
+  @Override
+  public byte[] getBytesValue(int dictId) {
+    return _bytes;
+  }
+
+  @Override
+  public int getValueSize(int dictId) {
+    return _bytes.length;
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
index f6fa9e261d3..d08d835c041 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
@@ -123,4 +123,9 @@ public class ConstantValueBytesDictionary extends 
BaseImmutableDictionary {
   public byte[] getBytesValue(int dictId) {
     return _value;
   }
+
+  @Override
+  public int getValueSize(int dictId) {
+    return _value.length;
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
index 270a8905d2f..4fe32efd536 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
@@ -117,6 +117,11 @@ public class ConstantValueStringDictionary extends 
BaseImmutableDictionary {
     return _bytes;
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return _bytes.length;
+  }
+
   @Override
   public void getDictIds(List<String> values, IntSet dictIds) {
     if (values.contains(_value)) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
index 4642254ff3c..edc458a8327 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
@@ -101,4 +101,9 @@ public class OnHeapBigDecimalDictionary extends 
BaseImmutableDictionary {
   public byte[] getBytesValue(int dictId) {
     return BigDecimalUtils.serialize(_dictIdToVal[dictId]);
   }
+
+  @Override
+  public int getValueSize(int dictId) {
+    return BigDecimalUtils.byteSize(_dictIdToVal[dictId]);
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
index fe47b8360d9..9ce621c8c1a 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
@@ -124,4 +124,14 @@ public class OnHeapBytesDictionary extends 
BaseImmutableDictionary {
   public byte[] getBytesValue(int dictId) {
     return _dictIdToVal[dictId].getBytes();
   }
+
+  @Override
+  public ByteArray getByteArrayValue(int dictId) {
+    return _dictIdToVal[dictId];
+  }
+
+  @Override
+  public int getValueSize(int dictId) {
+    return _dictIdToVal[dictId].length();
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
index 3a647112c55..0f6e52585e4 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
@@ -128,4 +128,9 @@ public class OnHeapStringDictionary extends 
BaseImmutableDictionary {
   public byte[] getBytesValue(int dictId) {
     return _unpaddedBytes[dictId];
   }
+
+  @Override
+  public int getValueSize(int dictId) {
+    return _unpaddedBytes[dictId].length;
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
index 60f604660fb..ff1921a98b3 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
@@ -90,6 +90,11 @@ public class StringDictionary extends 
BaseImmutableDictionary {
     return getUnpaddedBytes(dictId, getBuffer());
   }
 
+  @Override
+  public int getValueSize(int dictId) {
+    return getUnpaddedByteSize(dictId);
+  }
+
   @Override
   public void readIntValues(int[] dictIds, int length, int[] outValues) {
     byte[] buffer = getBuffer();
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
index f44b91a2704..f8beac3baa9 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
@@ -18,7 +18,10 @@
  */
 package org.apache.pinot.segment.local.segment.virtualcolumn;
 
+import com.google.common.base.Utf8;
 import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
+import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -209,6 +212,16 @@ public class PartitionIdVirtualColumnProvider implements 
VirtualColumnProvider {
       return _values.get(dictId);
     }
 
+    @Override
+    public byte[] getBytesValue(int dictId) {
+      return _values.get(dictId).getBytes(StandardCharsets.UTF_8);
+    }
+
+    @Override
+    public int getValueSize(int dictId) {
+      return Utf8.encodedLength(_values.get(dictId));
+    }
+
     @Override
     public int getIntValue(int dictId) {
       throw new UnsupportedOperationException();
@@ -230,7 +243,7 @@ public class PartitionIdVirtualColumnProvider implements 
VirtualColumnProvider {
     }
 
     @Override
-    public java.math.BigDecimal getBigDecimalValue(int dictId) {
+    public BigDecimal getBigDecimalValue(int dictId) {
       throw new UnsupportedOperationException();
     }
   }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
index a7629569046..301db4b130b 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
@@ -83,6 +83,8 @@ public class VarLengthValueReaderWriterTest implements 
PinotBuffersAfterMethodCh
         byte[] buffer = new byte[MAX_STRING_LENGTH];
         assertEquals(reader.getUnpaddedString(0, MAX_STRING_LENGTH, buffer), 
value);
         assertEquals(reader.getBytes(0, MAX_STRING_LENGTH), valueBytes);
+        assertEquals(reader.getByteSize(0, MAX_STRING_LENGTH), 
valueBytes.length);
+        assertEquals(reader.getUnpaddedByteSize(0, MAX_STRING_LENGTH), 
valueBytes.length);
       }
     }
   }
@@ -111,6 +113,8 @@ public class VarLengthValueReaderWriterTest implements 
PinotBuffersAfterMethodCh
         for (int i = 0; i < NUM_VALUES; i++) {
           assertEquals(reader.getUnpaddedString(i, MAX_STRING_LENGTH, buffer), 
values[i]);
           assertEquals(reader.getBytes(i, MAX_STRING_LENGTH), 
valueBytesArray[i]);
+          assertEquals(reader.getByteSize(i, MAX_STRING_LENGTH), 
valueBytesArray[i].length);
+          assertEquals(reader.getUnpaddedByteSize(i, MAX_STRING_LENGTH), 
valueBytesArray[i].length);
         }
       }
     }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
index 220028c36bc..c78f62c3aa2 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.segment.index.readers;
 
+import com.google.common.base.Utf8;
 import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
 import it.unimi.dsi.fastutil.floats.FloatOpenHashSet;
 import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
@@ -205,6 +206,7 @@ public class ImmutableDictionaryTest implements 
PinotBuffersAfterMethodCheckRule
       assertEquals(intDictionary.getFloatValue(i), (float) _intValues[i]);
       assertEquals(intDictionary.getDoubleValue(i), (double) _intValues[i]);
       Assert.assertEquals(Integer.parseInt(intDictionary.getStringValue(i)), 
_intValues[i]);
+      assertEquals(intDictionary.getValueSize(i), Integer.BYTES);
 
       assertEquals(intDictionary.indexOf(String.valueOf(_intValues[i])), i);
 
@@ -242,6 +244,7 @@ public class ImmutableDictionaryTest implements 
PinotBuffersAfterMethodCheckRule
       assertEquals(longDictionary.getFloatValue(i), (float) _longValues[i]);
       assertEquals(longDictionary.getDoubleValue(i), (double) _longValues[i]);
       Assert.assertEquals(Long.parseLong(longDictionary.getStringValue(i)), 
_longValues[i]);
+      assertEquals(longDictionary.getValueSize(i), Long.BYTES);
 
       assertEquals(longDictionary.indexOf(String.valueOf(_longValues[i])), i);
 
@@ -279,6 +282,7 @@ public class ImmutableDictionaryTest implements 
PinotBuffersAfterMethodCheckRule
       assertEquals(floatDictionary.getFloatValue(i), _floatValues[i]);
       assertEquals(floatDictionary.getDoubleValue(i), (double) 
_floatValues[i]);
       Assert.assertEquals(Float.parseFloat(floatDictionary.getStringValue(i)), 
_floatValues[i], 0.0f);
+      assertEquals(floatDictionary.getValueSize(i), Float.BYTES);
 
       assertEquals(floatDictionary.indexOf(String.valueOf(_floatValues[i])), 
i);
 
@@ -316,6 +320,7 @@ public class ImmutableDictionaryTest implements 
PinotBuffersAfterMethodCheckRule
       assertEquals(doubleDictionary.getFloatValue(i), (float) 
_doubleValues[i]);
       assertEquals(doubleDictionary.getDoubleValue(i), _doubleValues[i]);
       
Assert.assertEquals(Double.parseDouble(doubleDictionary.getStringValue(i)), 
_doubleValues[i], 0.0);
+      assertEquals(doubleDictionary.getValueSize(i), Double.BYTES);
 
       assertEquals(doubleDictionary.indexOf(String.valueOf(_doubleValues[i])), 
i);
 
@@ -356,6 +361,7 @@ public class ImmutableDictionaryTest implements 
PinotBuffersAfterMethodCheckRule
       assertEquals(bigDecimalDictionary.getDoubleValue(i), 
_bigDecimalValues[i].doubleValue());
       assertEquals(bigDecimalDictionary.getBigDecimalValue(i), 
_bigDecimalValues[i]);
       Assert.assertEquals(new 
BigDecimal(bigDecimalDictionary.getStringValue(i)), _bigDecimalValues[i]);
+      assertEquals(bigDecimalDictionary.getValueSize(i), 
BigDecimalUtils.byteSize(_bigDecimalValues[i]));
 
       
assertEquals(bigDecimalDictionary.indexOf(String.valueOf(_bigDecimalValues[i])),
 i);
 
@@ -401,6 +407,7 @@ public class ImmutableDictionaryTest implements 
PinotBuffersAfterMethodCheckRule
     for (int i = 0; i < NUM_VALUES; i++) {
       assertEquals(stringDictionary.get(i), _stringValues[i]);
       assertEquals(stringDictionary.getStringValue(i), _stringValues[i]);
+      assertEquals(stringDictionary.getValueSize(i), 
Utf8.encodedLength(_stringValues[i]));
 
       assertEquals(stringDictionary.indexOf(_stringValues[i]), i);
 
@@ -448,6 +455,7 @@ public class ImmutableDictionaryTest implements 
PinotBuffersAfterMethodCheckRule
       assertEquals(bytesDictionary.get(i), _bytesValues[i].getBytes());
       assertEquals(bytesDictionary.getStringValue(i), 
_bytesValues[i].toHexString());
       assertEquals(bytesDictionary.getBytesValue(i), 
_bytesValues[i].getBytes());
+      assertEquals(bytesDictionary.getValueSize(i), BYTES_LENGTH);
 
       assertEquals(bytesDictionary.indexOf(_bytesValues[i].toHexString()), i);
 
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
index 5e47acf8bca..66c0804ebbe 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
@@ -75,6 +75,28 @@ public class FixedByteValueReaderWriterTest implements 
PinotBuffersAfterMethodCh
     }
   }
 
+  @Test(dataProvider = "params")
+  public void testGetValueSize(int maxStringLength, int configuredMaxLength, 
ByteOrder byteOrder)
+      throws IOException {
+    byte[] bytes = new byte[configuredMaxLength];
+    try (PinotDataBuffer buffer = 
PinotDataBuffer.allocateDirect(configuredMaxLength * 1000L, byteOrder,
+        "testGetValueSize")) {
+      FixedByteValueReaderWriter readerWriter = new 
FixedByteValueReaderWriter(buffer);
+      List<Integer> lengths = new ArrayList<>(1000);
+      for (int i = 0; i < 1000; i++) {
+        int length = ThreadLocalRandom.current().nextInt(maxStringLength);
+        Arrays.fill(bytes, 0, length, (byte) 'a');
+        readerWriter.writeBytes(i, configuredMaxLength, bytes);
+        lengths.add(length);
+        Arrays.fill(bytes, 0, length, (byte) 0);
+      }
+      for (int i = 0; i < 1000; i++) {
+        assertEquals(readerWriter.getByteSize(i, configuredMaxLength), 
configuredMaxLength);
+        assertEquals(readerWriter.getUnpaddedByteSize(i, configuredMaxLength), 
(int) lengths.get(i));
+      }
+    }
+  }
+
   @Test(dataProvider = "params")
   public void testFixedByteValueReaderWriterNonAscii(int maxStringLength, int 
configuredMaxLength, ByteOrder byteOrder)
       throws IOException {
diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
index 0f61d3947e1..12a93d2df48 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
@@ -192,9 +192,8 @@ public interface Dictionary extends IndexReader {
 
   String getStringValue(int dictId);
 
-  /**
-   * NOTE: Should be overridden for STRING, BIG_DECIMAL and BYTES dictionary.
-   */
+  /// Returns the bytes representation of the value.
+  /// Should be overridden for variable sized types, i.e. BIG_DECIMAL, STRING, 
BYTES.
   default byte[] getBytesValue(int dictId) {
     throw new UnsupportedOperationException();
   }
@@ -203,6 +202,14 @@ public interface Dictionary extends IndexReader {
     return new ByteArray(getBytesValue(dictId));
   }
 
+  /// Returns the size of the value in bytes.
+  /// Should be overridden for variable sized types, i.e. BIG_DECIMAL, STRING, 
BYTES.
+  /// - For BIG_DECIMAL, returns the length of the serialized bytes
+  /// - For STRING, returns the length of the UTF_8 encoded bytes
+  default int getValueSize(int dictId) {
+    return getValueType().size();
+  }
+
   default int get32BitsMurmur3HashValue(int dictId) {
     return MurmurHashFunctions.murmurHash3X64Bit32(getBytesValue(dictId), 0);
   }
diff --git 
a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
 
b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
index 35b87faa75d..c93b141f8d2 100644
--- 
a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
+++ 
b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
@@ -19,7 +19,6 @@
 package org.apache.pinot.tools.segment.converter;
 
 import com.google.common.base.Preconditions;
-import com.google.common.base.Utf8;
 import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.Field;
@@ -316,7 +315,7 @@ public class DictionaryToRawIndexConverter {
     ChunkCompressionType compressionType = 
ChunkCompressionType.valueOf(_compressionType);
     DataType storedType = dictionary.getValueType();
     int numDocs = segment.getSegmentMetadata().getTotalDocs();
-    int lengthOfLongestEntry = (storedType == DataType.STRING) ? 
getLengthOfLongestEntry(dictionary) : -1;
+    int lengthOfLongestEntry = !storedType.isFixedWidth() ? 
getLengthOfLongestEntry(dictionary) : -1;
 
     try (ForwardIndexCreator rawIndexCreator = 
ForwardIndexCreatorFactory.getRawIndexCreatorForSVColumn(newSegment,
         compressionType, column, storedType, numDocs, lengthOfLongestEntry, 
false,
@@ -386,13 +385,10 @@ public class DictionaryToRawIndexConverter {
    */
   private int getLengthOfLongestEntry(Dictionary dictionary) {
     int lengthOfLongestEntry = 0;
-
     int length = dictionary.length();
     for (int dictId = 0; dictId < length; dictId++) {
-      String value = (String) dictionary.get(dictId);
-      lengthOfLongestEntry = Math.max(lengthOfLongestEntry, 
Utf8.encodedLength(value));
+      lengthOfLongestEntry = Math.max(lengthOfLongestEntry, 
dictionary.getValueSize(dictId));
     }
-
     return lengthOfLongestEntry;
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to