Jackie-Jiang commented on code in PR #10891:
URL: https://github.com/apache/pinot/pull/10891#discussion_r1245946338


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -83,7 +83,7 @@ public class SegmentColumnarIndexCreator implements 
SegmentCreator {
   // TODO Refactor class name to match interface name
   private static final Logger LOGGER = 
LoggerFactory.getLogger(SegmentColumnarIndexCreator.class);
   // Allow at most 512 characters for the metadata property
-  private static final int METADATA_PROPERTY_LENGTH_LIMIT = 512;
+  static final int METADATA_PROPERTY_LENGTH_LIMIT = 512;

Review Comment:
   Let's revert this change and we can discuss it under #10990 



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -119,65 +123,153 @@ private List<String> getColumnsToAddMinMaxValue() {
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    return columnMetadata.hasDictionary() && columnMetadata.getMinValue() == 
null
+    return columnMetadata.getMinValue() == null

Review Comment:
   (nit) reformat



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -119,65 +123,153 @@ private List<String> getColumnsToAddMinMaxValue() {
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    return columnMetadata.hasDictionary() && columnMetadata.getMinValue() == 
null
+    return columnMetadata.getMinValue() == null
         && columnMetadata.getMaxValue() == null && 
!columnMetadata.isMinMaxValueInvalid();
   }
 
   private void addColumnMinMaxValueForColumn(String columnName)
       throws Exception {
     // Skip column without dictionary or with min/max value already set
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    if (!columnMetadata.hasDictionary() || columnMetadata.getMinValue() != null
-        || columnMetadata.getMaxValue() != null) {
+    if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() 
!= null) {
       return;
     }
 
-    PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.dictionary());
     DataType dataType = columnMetadata.getDataType().getStoredType();
-    int length = columnMetadata.getCardinality();
-    switch (dataType) {
-      case INT:
-        try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, 
length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              intDictionary.getStringValue(0), 
intDictionary.getStringValue(length - 1));
-        }
-        break;
-      case LONG:
-        try (LongDictionary longDictionary = new 
LongDictionary(dictionaryBuffer, length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              longDictionary.getStringValue(0), 
longDictionary.getStringValue(length - 1));
-        }
-        break;
-      case FLOAT:
-        try (FloatDictionary floatDictionary = new 
FloatDictionary(dictionaryBuffer, length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              floatDictionary.getStringValue(0), 
floatDictionary.getStringValue(length - 1));
-        }
-        break;
-      case DOUBLE:
-        try (DoubleDictionary doubleDictionary = new 
DoubleDictionary(dictionaryBuffer, length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              doubleDictionary.getStringValue(0), 
doubleDictionary.getStringValue(length - 1));
-        }
-        break;
-      case STRING:
-        try (StringDictionary stringDictionary = new 
StringDictionary(dictionaryBuffer, length,
-            columnMetadata.getColumnMaxLength())) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              stringDictionary.getStringValue(0), 
stringDictionary.getStringValue(length - 1));
-        }
-        break;
-      case BYTES:
-        try (BytesDictionary bytesDictionary = new 
BytesDictionary(dictionaryBuffer, length,
-            columnMetadata.getColumnMaxLength())) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              bytesDictionary.getStringValue(0), 
bytesDictionary.getStringValue(length - 1));
-        }
-        break;
-      default:
-        throw new IllegalStateException("Unsupported data type: " + dataType + 
" for column: " + columnName);
+    if (columnMetadata.hasDictionary()) {
+      PinotDataBuffer dictionaryBuffer = 
_segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary());
+      int length = columnMetadata.getCardinality();
+      switch (dataType) {
+        case INT:
+          try (IntDictionary intDictionary = new 
IntDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                intDictionary.getStringValue(0), 
intDictionary.getStringValue(length - 1));
+          }
+          break;
+        case LONG:
+          try (LongDictionary longDictionary = new 
LongDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                longDictionary.getStringValue(0), 
longDictionary.getStringValue(length - 1));
+          }
+          break;
+        case FLOAT:
+          try (FloatDictionary floatDictionary = new 
FloatDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                floatDictionary.getStringValue(0), 
floatDictionary.getStringValue(length - 1));
+          }
+          break;
+        case DOUBLE:
+          try (DoubleDictionary doubleDictionary = new 
DoubleDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                doubleDictionary.getStringValue(0), 
doubleDictionary.getStringValue(length - 1));
+          }
+          break;
+        case STRING:
+          try (StringDictionary stringDictionary = new 
StringDictionary(dictionaryBuffer, length,

Review Comment:
   (code format) Does this follow [Pinot 
Style](https://docs.pinot.apache.org/developers/developers-and-contributors/code-setup#setup-ide)?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -119,65 +123,153 @@ private List<String> getColumnsToAddMinMaxValue() {
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    return columnMetadata.hasDictionary() && columnMetadata.getMinValue() == 
null
+    return columnMetadata.getMinValue() == null
         && columnMetadata.getMaxValue() == null && 
!columnMetadata.isMinMaxValueInvalid();
   }
 
   private void addColumnMinMaxValueForColumn(String columnName)
       throws Exception {
     // Skip column without dictionary or with min/max value already set
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    if (!columnMetadata.hasDictionary() || columnMetadata.getMinValue() != null
-        || columnMetadata.getMaxValue() != null) {
+    if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() 
!= null) {
       return;
     }
 
-    PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.dictionary());
     DataType dataType = columnMetadata.getDataType().getStoredType();
-    int length = columnMetadata.getCardinality();
-    switch (dataType) {
-      case INT:
-        try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, 
length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              intDictionary.getStringValue(0), 
intDictionary.getStringValue(length - 1));
-        }
-        break;
-      case LONG:
-        try (LongDictionary longDictionary = new 
LongDictionary(dictionaryBuffer, length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              longDictionary.getStringValue(0), 
longDictionary.getStringValue(length - 1));
-        }
-        break;
-      case FLOAT:
-        try (FloatDictionary floatDictionary = new 
FloatDictionary(dictionaryBuffer, length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              floatDictionary.getStringValue(0), 
floatDictionary.getStringValue(length - 1));
-        }
-        break;
-      case DOUBLE:
-        try (DoubleDictionary doubleDictionary = new 
DoubleDictionary(dictionaryBuffer, length)) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              doubleDictionary.getStringValue(0), 
doubleDictionary.getStringValue(length - 1));
-        }
-        break;
-      case STRING:
-        try (StringDictionary stringDictionary = new 
StringDictionary(dictionaryBuffer, length,
-            columnMetadata.getColumnMaxLength())) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              stringDictionary.getStringValue(0), 
stringDictionary.getStringValue(length - 1));
-        }
-        break;
-      case BYTES:
-        try (BytesDictionary bytesDictionary = new 
BytesDictionary(dictionaryBuffer, length,
-            columnMetadata.getColumnMaxLength())) {
-          
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-              bytesDictionary.getStringValue(0), 
bytesDictionary.getStringValue(length - 1));
-        }
-        break;
-      default:
-        throw new IllegalStateException("Unsupported data type: " + dataType + 
" for column: " + columnName);
+    if (columnMetadata.hasDictionary()) {
+      PinotDataBuffer dictionaryBuffer = 
_segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary());
+      int length = columnMetadata.getCardinality();
+      switch (dataType) {
+        case INT:
+          try (IntDictionary intDictionary = new 
IntDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                intDictionary.getStringValue(0), 
intDictionary.getStringValue(length - 1));
+          }
+          break;
+        case LONG:
+          try (LongDictionary longDictionary = new 
LongDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                longDictionary.getStringValue(0), 
longDictionary.getStringValue(length - 1));
+          }
+          break;
+        case FLOAT:
+          try (FloatDictionary floatDictionary = new 
FloatDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                floatDictionary.getStringValue(0), 
floatDictionary.getStringValue(length - 1));
+          }
+          break;
+        case DOUBLE:
+          try (DoubleDictionary doubleDictionary = new 
DoubleDictionary(dictionaryBuffer, length)) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                doubleDictionary.getStringValue(0), 
doubleDictionary.getStringValue(length - 1));
+          }
+          break;
+        case STRING:
+          try (StringDictionary stringDictionary = new 
StringDictionary(dictionaryBuffer, length,
+              columnMetadata.getColumnMaxLength())) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                stringDictionary.getStringValue(0), 
stringDictionary.getStringValue(length - 1));
+          }
+          break;
+        case BYTES:
+          try (BytesDictionary bytesDictionary = new 
BytesDictionary(dictionaryBuffer, length,
+              columnMetadata.getColumnMaxLength())) {
+            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
+                bytesDictionary.getStringValue(0), 
bytesDictionary.getStringValue(length - 1));
+          }
+          break;
+        default:
+          throw new IllegalStateException("Unsupported data type: " + dataType 
+ " for column: " + columnName);
+      }
+    } else if 
(_segmentMetadata.getSchema().getFieldSpecFor(columnName).isSingleValueField()) 
{
+      // setting min/max for non-dictionary columns.
+      PinotDataBuffer forwardBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.forward());
+      switch (dataType) {
+        case INT:
+          try (FixedByteChunkSVForwardIndexReader rawIndexReader = new 
FixedByteChunkSVForwardIndexReader(forwardBuffer,
+              DataType.INT); ChunkReaderContext readerContext = 
rawIndexReader.createContext()) {
+            Integer[] minMaxValue = {Integer.MAX_VALUE, Integer.MIN_VALUE};
+            for (int docs = 0; docs < columnMetadata.getTotalDocs(); docs++) {
+              minMaxValue = getMinMaxValue(minMaxValue, 
rawIndexReader.getInt(docs, readerContext));
+            }

Review Comment:
   Suggest directly comparing the primitive value to avoid the boxing/unboxing. 
Boxing/unboxing has quite expensive overhead. Same for other types
   ```suggestion
               int min = Integer.MAX_VALUE;
               int max = Integer.MIN_VALUE;
               int numDocs = columnMetadata.getTotalDocs();
               for (int i = 0; i < numDocs; i++) {
                 int value = rawIndexReader.getInt(docs, readerContext);
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to