This is an automated email from the ASF dual-hosted git repository.

gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 6323ee9683 Honor the column max length property while populating 
min/max values for column metadata (#13897)
6323ee9683 is described below

commit 6323ee9683563a17a070d26da37384898feff898
Author: RAGHVENDRA KUMAR YADAV <[email protected]>
AuthorDate: Wed Aug 28 00:16:09 2024 -0700

    Honor the column max length property while populating min/max values for 
column metadata (#13897)
    
    If minString for a column is longer than default max limit 512 then 
minValue was set to null. This causes the segment to be preProcessed every time 
segment is reloaded.
---
 .../creator/SegmentGenerationWithMinMaxTest.java   | 44 ++++++++++++++++
 .../index/loader/SegmentPreProcessorTest.java      | 35 +++++++++++++
 .../spi/index/metadata/ColumnMetadataImpl.java     | 61 ++++++++++------------
 3 files changed, 107 insertions(+), 33 deletions(-)

diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
index be2d64951b..1d6900a0c0 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
@@ -28,6 +28,7 @@ import 
org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
 import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
@@ -42,6 +43,7 @@ import org.testng.annotations.Test;
  */
 public class SegmentGenerationWithMinMaxTest {
   private static final String STRING_COLUMN = "col1";
+
   private static final String[] STRING_VALUES_WITH_COMMA_CHARACTER = {"A,,", 
",B,", "C,Z,", "D,", "E,"};
   private static final String[] STRING_VALUES_WITH_WHITESPACE_CHARACTERS = {"A 
", " B ", "  Z ", "  \r D", "E"};
   private static final String[] STRING_VALUES_VALID = {"A", "B", "C", "D", 
"E"};
@@ -63,6 +65,39 @@ public class SegmentGenerationWithMinMaxTest {
         .addMetric(LONG_COLUMN, FieldSpec.DataType.LONG).build();
   }
 
+  @Test
+  public void testMinMaxlength() throws Exception {
+    //Test1: Min String length is greater than default 512
+    DimensionFieldSpec d = new DimensionFieldSpec(STRING_COLUMN, 
FieldSpec.DataType.STRING, true, 15000, "null");
+    _schema = new Schema.SchemaBuilder().addField(d)
+        .addMetric(LONG_COLUMN, FieldSpec.DataType.LONG).build();
+
+    FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
+    //minLong stRING
+    String longString = generateLongString(15000, true);
+    String[] longValues = {"{}", "dd", "ff", "ee", longString};
+    File segmentDir = buildSegment(_tableConfig, _schema, longValues);
+    SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
+    Assert.assertEquals(metadata.getTotalDocs(), 5);
+    
Assert.assertFalse(metadata.getColumnMetadataFor("col1").isMinMaxValueInvalid());
+    Assert.assertNull(metadata.getColumnMetadataFor("col1").getMinValue());
+    Assert.assertEquals(metadata.getColumnMetadataFor("col1").getMaxValue(), 
"{}");
+
+    FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
+
+    //maxLong String
+    longString = generateLongString(15000, false);
+    longValues = new String[]{"aa", "dd", "ff", "ee", longString};
+    segmentDir = buildSegment(_tableConfig, _schema, longValues);
+    metadata = new SegmentMetadataImpl(segmentDir);
+    Assert.assertEquals(metadata.getTotalDocs(), 5);
+    
Assert.assertFalse(metadata.getColumnMetadataFor("col1").isMinMaxValueInvalid());
+    Assert.assertEquals(metadata.getColumnMetadataFor("col1").getMinValue(), 
"aa");
+    Assert.assertNull(metadata.getColumnMetadataFor("col1").getMaxValue());
+
+    FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
+  }
+
   @Test
   public void testMinMaxInMetadata()
       throws Exception {
@@ -111,4 +146,13 @@ public class SegmentGenerationWithMinMaxTest {
     driver.getOutputDirectory().deleteOnExit();
     return driver.getOutputDirectory();
   }
+
+  private String generateLongString(int length, boolean isMin) {
+    char repeatingChar = isMin ? 'a' : 'z';
+    StringBuilder sb = new StringBuilder(length);
+    for (int i = 0; i < length; i++) {
+      sb.append(repeatingChar);
+    }
+    return sb.toString();
+  }
 }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
index 6e46127ff3..a02efd77d4 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
@@ -3008,4 +3008,39 @@ public class SegmentPreProcessorTest {
     
_indexLoadingConfig.removeNoDictionaryColumns(EXISTING_FORWARD_INDEX_DISABLED_COL_SV,
         EXISTING_FORWARD_INDEX_DISABLED_COL_MV, EXISTING_STRING_COL_DICT);
   }
+
+  @Test
+  public void testNeedAddMinMaxLength()
+      throws Exception {
+
+    String longString = generateLongString(15000, true);
+    String[] stringValuesValid = {"B", "C", "D", "E", longString};
+    long[] longValues = {1588316400000L, 1588489200000L, 1588662000000L, 
1588834800000L, 1589007600000L};
+    TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").build();
+    Schema schema = new 
Schema.SchemaBuilder().addSingleValueDimension("stringCol", 
FieldSpec.DataType.STRING)
+        .addMetric("longCol", FieldSpec.DataType.LONG).build();
+
+    FileUtils.deleteQuietly(INDEX_DIR);
+
+    // build good segment, no needPreprocess
+    File segment = buildTestSegment(tableConfig, schema, "validSegment", 
stringValuesValid, longValues);
+    SegmentDirectory segmentDirectory = 
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
+        .load(segment.toURI(),
+            new 
SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build());
+    IndexLoadingConfig indexLoadingConfig = getDefaultIndexLoadingConfig();
+    
indexLoadingConfig.setColumnMinMaxValueGeneratorMode(ColumnMinMaxValueGeneratorMode.ALL);
+    SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory, 
indexLoadingConfig, schema);
+    assertFalse(processor.needProcess());
+
+    FileUtils.deleteQuietly(INDEX_DIR);
+  }
+
+  private String generateLongString(int length, boolean isMin) {
+    char repeatingChar = isMin ? 'a' : 'z';
+    StringBuilder sb = new StringBuilder(length);
+    for (int i = 0; i < length; i++) {
+      sb.append(repeatingChar);
+    }
+    return sb.toString();
+  }
 }
diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
index c599ae707e..fdd1f5c290 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
@@ -264,39 +264,13 @@ public class ColumnMetadataImpl implements ColumnMetadata 
{
     // TODO: Use getProperty() for other properties as well to avoid the 
overhead of variable substitution
     String minString = (String) config.getProperty(Column.getKeyFor(column, 
Column.MIN_VALUE));
     String maxString = (String) config.getProperty(Column.getKeyFor(column, 
Column.MAX_VALUE));
-    if (minString != null && maxString != null) {
-      switch (storedType) {
-        case INT:
-          builder.setMinValue(Integer.valueOf(minString));
-          builder.setMaxValue(Integer.valueOf(maxString));
-          break;
-        case LONG:
-          builder.setMinValue(Long.valueOf(minString));
-          builder.setMaxValue(Long.valueOf(maxString));
-          break;
-        case FLOAT:
-          builder.setMinValue(Float.valueOf(minString));
-          builder.setMaxValue(Float.valueOf(maxString));
-          break;
-        case DOUBLE:
-          builder.setMinValue(Double.valueOf(minString));
-          builder.setMaxValue(Double.valueOf(maxString));
-          break;
-        case BIG_DECIMAL:
-          builder.setMinValue(new BigDecimal(minString));
-          builder.setMaxValue(new BigDecimal(maxString));
-          break;
-        case STRING:
-          
builder.setMinValue(CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(minString));
-          
builder.setMaxValue(CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(maxString));
-          break;
-        case BYTES:
-          builder.setMinValue(BytesUtils.toByteArray(minString));
-          builder.setMaxValue(BytesUtils.toByteArray(maxString));
-          break;
-        default:
-          throw new IllegalStateException("Unsupported data type: " + dataType 
+ " for column: " + column);
-      }
+    // Set min/max value if available
+    if (minString != null) {
+      builder.setMinValue(builder.parseValue(storedType, column, minString));
+    }
+
+    if (maxString != null) {
+      builder.setMaxValue(builder.parseValue(storedType, column, maxString));
     }
     builder.setMinMaxValueInvalid(config.getBoolean(Column.getKeyFor(column, 
Column.MIN_MAX_VALUE_INVALID), false));
 
@@ -443,5 +417,26 @@ public class ColumnMetadataImpl implements ColumnMetadata {
           _minMaxValueInvalid, _hasDictionary, _columnMaxLength, 
_bitsPerElement, _maxNumberOfMultiValues,
           _totalNumberOfEntries, _partitionFunction, _partitions, 
_indexSizeMap, _autoGenerated);
     }
+
+    private Comparable<?> parseValue(DataType storedType, String column, 
String valueString) {
+      switch (storedType) {
+        case INT:
+          return Integer.valueOf(valueString);
+        case LONG:
+          return Long.valueOf(valueString);
+        case FLOAT:
+          return Float.valueOf(valueString);
+        case DOUBLE:
+          return Double.valueOf(valueString);
+        case BIG_DECIMAL:
+          return new BigDecimal(valueString);
+        case STRING:
+          return 
CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(valueString);
+        case BYTES:
+          return BytesUtils.toByteArray(valueString);
+        default:
+          throw new IllegalStateException("Unsupported data type: " + 
storedType + " for column: " + column);
+      }
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to