This is an automated email from the ASF dual-hosted git repository.
gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 6323ee9683 Honor the column max length property while populating
min/max values for column metadata (#13897)
6323ee9683 is described below
commit 6323ee9683563a17a070d26da37384898feff898
Author: RAGHVENDRA KUMAR YADAV <[email protected]>
AuthorDate: Wed Aug 28 00:16:09 2024 -0700
Honor the column max length property while populating min/max values for
column metadata (#13897)
If minString for a column is longer than default max limit 512 then
minValue was set to null. This causes the segment to be preProcessed every time
segment is reloaded.
---
.../creator/SegmentGenerationWithMinMaxTest.java | 44 ++++++++++++++++
.../index/loader/SegmentPreProcessorTest.java | 35 +++++++++++++
.../spi/index/metadata/ColumnMetadataImpl.java | 61 ++++++++++------------
3 files changed, 107 insertions(+), 33 deletions(-)
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
index be2d64951b..1d6900a0c0 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithMinMaxTest.java
@@ -28,6 +28,7 @@ import
org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
@@ -42,6 +43,7 @@ import org.testng.annotations.Test;
*/
public class SegmentGenerationWithMinMaxTest {
private static final String STRING_COLUMN = "col1";
+
private static final String[] STRING_VALUES_WITH_COMMA_CHARACTER = {"A,,",
",B,", "C,Z,", "D,", "E,"};
private static final String[] STRING_VALUES_WITH_WHITESPACE_CHARACTERS = {"A
", " B ", " Z ", " \r D", "E"};
private static final String[] STRING_VALUES_VALID = {"A", "B", "C", "D",
"E"};
@@ -63,6 +65,39 @@ public class SegmentGenerationWithMinMaxTest {
.addMetric(LONG_COLUMN, FieldSpec.DataType.LONG).build();
}
+ @Test
+ public void testMinMaxlength() throws Exception {
+ //Test1: Min String length is greater than default 512
+ DimensionFieldSpec d = new DimensionFieldSpec(STRING_COLUMN,
FieldSpec.DataType.STRING, true, 15000, "null");
+ _schema = new Schema.SchemaBuilder().addField(d)
+ .addMetric(LONG_COLUMN, FieldSpec.DataType.LONG).build();
+
+ FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
+ //minLong stRING
+ String longString = generateLongString(15000, true);
+ String[] longValues = {"{}", "dd", "ff", "ee", longString};
+ File segmentDir = buildSegment(_tableConfig, _schema, longValues);
+ SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
+ Assert.assertEquals(metadata.getTotalDocs(), 5);
+
Assert.assertFalse(metadata.getColumnMetadataFor("col1").isMinMaxValueInvalid());
+ Assert.assertNull(metadata.getColumnMetadataFor("col1").getMinValue());
+ Assert.assertEquals(metadata.getColumnMetadataFor("col1").getMaxValue(),
"{}");
+
+ FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
+
+ //maxLong String
+ longString = generateLongString(15000, false);
+ longValues = new String[]{"aa", "dd", "ff", "ee", longString};
+ segmentDir = buildSegment(_tableConfig, _schema, longValues);
+ metadata = new SegmentMetadataImpl(segmentDir);
+ Assert.assertEquals(metadata.getTotalDocs(), 5);
+
Assert.assertFalse(metadata.getColumnMetadataFor("col1").isMinMaxValueInvalid());
+ Assert.assertEquals(metadata.getColumnMetadataFor("col1").getMinValue(),
"aa");
+ Assert.assertNull(metadata.getColumnMetadataFor("col1").getMaxValue());
+
+ FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
+ }
+
@Test
public void testMinMaxInMetadata()
throws Exception {
@@ -111,4 +146,13 @@ public class SegmentGenerationWithMinMaxTest {
driver.getOutputDirectory().deleteOnExit();
return driver.getOutputDirectory();
}
+
+ private String generateLongString(int length, boolean isMin) {
+ char repeatingChar = isMin ? 'a' : 'z';
+ StringBuilder sb = new StringBuilder(length);
+ for (int i = 0; i < length; i++) {
+ sb.append(repeatingChar);
+ }
+ return sb.toString();
+ }
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
index 6e46127ff3..a02efd77d4 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
@@ -3008,4 +3008,39 @@ public class SegmentPreProcessorTest {
_indexLoadingConfig.removeNoDictionaryColumns(EXISTING_FORWARD_INDEX_DISABLED_COL_SV,
EXISTING_FORWARD_INDEX_DISABLED_COL_MV, EXISTING_STRING_COL_DICT);
}
+
+ @Test
+ public void testNeedAddMinMaxLength()
+ throws Exception {
+
+ String longString = generateLongString(15000, true);
+ String[] stringValuesValid = {"B", "C", "D", "E", longString};
+ long[] longValues = {1588316400000L, 1588489200000L, 1588662000000L,
1588834800000L, 1589007600000L};
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").build();
+ Schema schema = new
Schema.SchemaBuilder().addSingleValueDimension("stringCol",
FieldSpec.DataType.STRING)
+ .addMetric("longCol", FieldSpec.DataType.LONG).build();
+
+ FileUtils.deleteQuietly(INDEX_DIR);
+
+ // build good segment, no needPreprocess
+ File segment = buildTestSegment(tableConfig, schema, "validSegment",
stringValuesValid, longValues);
+ SegmentDirectory segmentDirectory =
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
+ .load(segment.toURI(),
+ new
SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build());
+ IndexLoadingConfig indexLoadingConfig = getDefaultIndexLoadingConfig();
+
indexLoadingConfig.setColumnMinMaxValueGeneratorMode(ColumnMinMaxValueGeneratorMode.ALL);
+ SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory,
indexLoadingConfig, schema);
+ assertFalse(processor.needProcess());
+
+ FileUtils.deleteQuietly(INDEX_DIR);
+ }
+
+ private String generateLongString(int length, boolean isMin) {
+ char repeatingChar = isMin ? 'a' : 'z';
+ StringBuilder sb = new StringBuilder(length);
+ for (int i = 0; i < length; i++) {
+ sb.append(repeatingChar);
+ }
+ return sb.toString();
+ }
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
index c599ae707e..fdd1f5c290 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
@@ -264,39 +264,13 @@ public class ColumnMetadataImpl implements ColumnMetadata
{
// TODO: Use getProperty() for other properties as well to avoid the
overhead of variable substitution
String minString = (String) config.getProperty(Column.getKeyFor(column,
Column.MIN_VALUE));
String maxString = (String) config.getProperty(Column.getKeyFor(column,
Column.MAX_VALUE));
- if (minString != null && maxString != null) {
- switch (storedType) {
- case INT:
- builder.setMinValue(Integer.valueOf(minString));
- builder.setMaxValue(Integer.valueOf(maxString));
- break;
- case LONG:
- builder.setMinValue(Long.valueOf(minString));
- builder.setMaxValue(Long.valueOf(maxString));
- break;
- case FLOAT:
- builder.setMinValue(Float.valueOf(minString));
- builder.setMaxValue(Float.valueOf(maxString));
- break;
- case DOUBLE:
- builder.setMinValue(Double.valueOf(minString));
- builder.setMaxValue(Double.valueOf(maxString));
- break;
- case BIG_DECIMAL:
- builder.setMinValue(new BigDecimal(minString));
- builder.setMaxValue(new BigDecimal(maxString));
- break;
- case STRING:
-
builder.setMinValue(CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(minString));
-
builder.setMaxValue(CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(maxString));
- break;
- case BYTES:
- builder.setMinValue(BytesUtils.toByteArray(minString));
- builder.setMaxValue(BytesUtils.toByteArray(maxString));
- break;
- default:
- throw new IllegalStateException("Unsupported data type: " + dataType
+ " for column: " + column);
- }
+ // Set min/max value if available
+ if (minString != null) {
+ builder.setMinValue(builder.parseValue(storedType, column, minString));
+ }
+
+ if (maxString != null) {
+ builder.setMaxValue(builder.parseValue(storedType, column, maxString));
}
builder.setMinMaxValueInvalid(config.getBoolean(Column.getKeyFor(column,
Column.MIN_MAX_VALUE_INVALID), false));
@@ -443,5 +417,26 @@ public class ColumnMetadataImpl implements ColumnMetadata {
_minMaxValueInvalid, _hasDictionary, _columnMaxLength,
_bitsPerElement, _maxNumberOfMultiValues,
_totalNumberOfEntries, _partitionFunction, _partitions,
_indexSizeMap, _autoGenerated);
}
+
+ private Comparable<?> parseValue(DataType storedType, String column,
String valueString) {
+ switch (storedType) {
+ case INT:
+ return Integer.valueOf(valueString);
+ case LONG:
+ return Long.valueOf(valueString);
+ case FLOAT:
+ return Float.valueOf(valueString);
+ case DOUBLE:
+ return Double.valueOf(valueString);
+ case BIG_DECIMAL:
+ return new BigDecimal(valueString);
+ case STRING:
+ return
CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(valueString);
+ case BYTES:
+ return BytesUtils.toByteArray(valueString);
+ default:
+ throw new IllegalStateException("Unsupported data type: " +
storedType + " for column: " + column);
+ }
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]