This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 4442d4199e Fix range index on raw TIMESTAMP column (#15589) 4442d4199e is described below commit 4442d4199e843451896ca31c7878aca2596bd2f7 Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Fri Apr 18 18:28:38 2025 -0600 Fix range index on raw TIMESTAMP column (#15589) --- .../pinot/core/data/manager/TableIndexingTest.java | 19 ++++------ .../src/test/resources/TableIndexingTest.csv | 42 ++++++++++++++++------ .../impl/inv/BitSlicedRangeIndexCreator.java | 15 ++++---- .../creator/impl/inv/RangeIndexCreator.java | 2 +- .../loader/invertedindex/RangeIndexHandler.java | 4 +-- .../local/segment/index/range/RangeIndexType.java | 13 +++---- .../creator/CombinedInvertedIndexCreator.java | 27 +++++++++----- 7 files changed, 74 insertions(+), 48 deletions(-) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java index 6def6bc101..90b7e4c0c4 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java @@ -149,7 +149,7 @@ public class TableIndexingTest { String schemaName = enc + "_" + cardType + "_" + dataType; TestCase testCase = _testCaseMap.get(new TestCase(schemaName, -1, indexType)); if (testCase == null) { - throw new AssertionError("Expected testCase not found: " + testCase); + Assert.fail("Expected testCase not found: " + schemaName); } else { testCase._expectedSuccess = Boolean.valueOf(result); testCase._expectedMessage = error; @@ -164,19 +164,13 @@ public class TableIndexingTest { } for (String encoding : List.of("raw", "dict")) { - if (type == DataType.BOOLEAN && "dict".equals(encoding)) { - // pinot doesn't support dictionary encoding for boolean type - continue; - } - if (type == DataType.TIMESTAMP) { //create separate tables for all data types _schemas.add(new Schema.SchemaBuilder().setSchemaName(encoding + "_sv_" + type.name()) .addDateTime(COLUMN_NAME, type, "1:MILLISECONDS:TIMESTAMP", "1:MILLISECONDS") .build()); - _schemas.add(new Schema.SchemaBuilder().setSchemaName(encoding + "_mv_" + type.name()) - .addDateTime(COLUMN_NAME, type, "1:MILLISECONDS:TIMESTAMP", "1:MILLISECONDS") + .addMultiValueDimension(COLUMN_NAME, type) .build()); } else { _schemas.add(new Schema.SchemaBuilder().setSchemaName(encoding + "_sv_" + type.name()) @@ -509,13 +503,12 @@ public class TableIndexingTest { } if (testCase._expectedSuccess == null) { - throw new AssertionError("No expected status found for test case: " + testCase); + Assert.fail("No expected status found for test case: " + testCase); } else if (testCase._expectedSuccess && testCase._error != null) { - throw new AssertionError("Expected success for test case: " + testCase + " but got error: " + testCase._error); + Assert.fail("Expected success for test case: " + testCase + " but got error: " + testCase._error); } else if (!testCase._expectedSuccess && !testCase.getErrorMessage().equals(testCase._expectedMessage)) { - throw new AssertionError( - "Expected error: \"" + testCase._expectedMessage + "\" for test case: " + testCase + " but got: \"" - + testCase.getErrorMessage() + " \""); + Assert.fail("Expected error: \"" + testCase._expectedMessage + "\" for test case: " + testCase + " but got: \"" + + testCase.getErrorMessage() + "\""); } } diff --git a/pinot-core/src/test/resources/TableIndexingTest.csv b/pinot-core/src/test/resources/TableIndexingTest.csv index d3b046a908..9ad8a25c30 100644 --- a/pinot-core/src/test/resources/TableIndexingTest.csv +++ b/pinot-core/src/test/resources/TableIndexingTest.csv @@ -205,7 +205,7 @@ BOOLEAN;sv;raw;inverted_index;false;Cannot create inverted index for raw index c BOOLEAN;sv;raw;json_index;false;Json index is currently only supported on STRING columns BOOLEAN;sv;raw;native_text_index;false;Cannot create text index on column: col, it can only be applied to string columns BOOLEAN;sv;raw;text_index;false;Cannot create text index on column: col, it can only be applied to string columns -BOOLEAN;sv;raw;range_index;false;Unsupported data type BOOLEAN for range index +BOOLEAN;sv;raw;range_index;true; BOOLEAN;sv;raw;startree_index;false;Dimension: col does not have dictionary BOOLEAN;sv;raw;vector_index;false;Vector index is currently only supported on float array columns BOOLEAN;mv;raw;timestamp_index;false;Caught exception while reading data @@ -216,9 +216,31 @@ BOOLEAN;mv;raw;inverted_index;false;Cannot create inverted index for raw index c BOOLEAN;mv;raw;json_index;false;Json index is currently only supported on single-value columns BOOLEAN;mv;raw;native_text_index;false;Cannot create text index on column: col, it can only be applied to string columns BOOLEAN;mv;raw;text_index;false;Cannot create text index on column: col, it can only be applied to string columns -BOOLEAN;mv;raw;range_index;false;Range index is not supported for columns of data type:BOOLEAN +BOOLEAN;mv;raw;range_index;true; BOOLEAN;mv;raw;startree_index;false;Column Name col defined in StarTreeIndex Config must be a single value column BOOLEAN;mv;raw;vector_index;false;Vector index is currently only supported on float array columns +BOOLEAN;sv;dict;timestamp_index;true; +BOOLEAN;sv;dict;bloom_filter;false;Cannot create a bloom filter on boolean column col +BOOLEAN;sv;dict;fst_index;false;Cannot create FST index on column: col, it can only be applied to dictionary encoded single value string columns +BOOLEAN;sv;dict;h3_index;false;H3 index is currently only supported on BYTES columns +BOOLEAN;sv;dict;inverted_index;true; +BOOLEAN;sv;dict;json_index;false;Json index is currently only supported on STRING columns +BOOLEAN;sv;dict;native_text_index;false;Cannot create text index on column: col, it can only be applied to string columns +BOOLEAN;sv;dict;text_index;false;Cannot create text index on column: col, it can only be applied to string columns +BOOLEAN;sv;dict;range_index;true; +BOOLEAN;sv;dict;startree_index;true; +BOOLEAN;sv;dict;vector_index;false;Vector index is currently only supported on float array columns +BOOLEAN;mv;dict;timestamp_index;false;Caught exception while reading data +BOOLEAN;mv;dict;bloom_filter;false;Cannot create a bloom filter on boolean column col +BOOLEAN;mv;dict;fst_index;false;Cannot create FST index on column: col, it can only be applied to dictionary encoded single value string columns +BOOLEAN;mv;dict;h3_index;false;H3 index is currently only supported on single-value columns +BOOLEAN;mv;dict;inverted_index;true; +BOOLEAN;mv;dict;json_index;false;Json index is currently only supported on single-value columns +BOOLEAN;mv;dict;native_text_index;false;Cannot create text index on column: col, it can only be applied to string columns +BOOLEAN;mv;dict;text_index;false;Cannot create text index on column: col, it can only be applied to string columns +BOOLEAN;mv;dict;range_index;true; +BOOLEAN;mv;dict;startree_index;false;Column Name col defined in StarTreeIndex Config must be a single value column +BOOLEAN;mv;dict;vector_index;false;Vector index is currently only supported on float array columns TIMESTAMP;sv;raw;timestamp_index;true; TIMESTAMP;sv;raw;bloom_filter;true; TIMESTAMP;sv;raw;fst_index;false;Cannot create FST index on column: col, it can only be applied to dictionary encoded single value string columns @@ -227,19 +249,19 @@ TIMESTAMP;sv;raw;inverted_index;false;Cannot create inverted index for raw index TIMESTAMP;sv;raw;json_index;false;Json index is currently only supported on STRING columns TIMESTAMP;sv;raw;native_text_index;false;Cannot create text index on column: col, it can only be applied to string columns TIMESTAMP;sv;raw;text_index;false;Cannot create text index on column: col, it can only be applied to string columns -TIMESTAMP;sv;raw;range_index;false;Unsupported data type TIMESTAMP for range index +TIMESTAMP;sv;raw;range_index;true; TIMESTAMP;sv;raw;startree_index;false;Dimension: col does not have dictionary TIMESTAMP;sv;raw;vector_index;false;Vector index is currently only supported on float array columns TIMESTAMP;mv;raw;timestamp_index;true; TIMESTAMP;mv;raw;bloom_filter;true; TIMESTAMP;mv;raw;fst_index;false;Cannot create FST index on column: col, it can only be applied to dictionary encoded single value string columns -TIMESTAMP;mv;raw;h3_index;false;H3 index is currently only supported on BYTES columns +TIMESTAMP;mv;raw;h3_index;false;H3 index is currently only supported on single-value columns TIMESTAMP;mv;raw;inverted_index;false;Cannot create inverted index for raw index column: col -TIMESTAMP;mv;raw;json_index;false;Json index is currently only supported on STRING columns +TIMESTAMP;mv;raw;json_index;false;Json index is currently only supported on single-value columns TIMESTAMP;mv;raw;native_text_index;false;Cannot create text index on column: col, it can only be applied to string columns TIMESTAMP;mv;raw;text_index;false;Cannot create text index on column: col, it can only be applied to string columns -TIMESTAMP;mv;raw;range_index;false;Unsupported data type TIMESTAMP for range index -TIMESTAMP;mv;raw;startree_index;false;Dimension: col does not have dictionary +TIMESTAMP;mv;raw;range_index;true; +TIMESTAMP;mv;raw;startree_index;false;Column Name col defined in StarTreeIndex Config must be a single value column TIMESTAMP;mv;raw;vector_index;false;Vector index is currently only supported on float array columns TIMESTAMP;sv;dict;timestamp_index;true; TIMESTAMP;sv;dict;bloom_filter;true; @@ -255,13 +277,13 @@ TIMESTAMP;sv;dict;vector_index;false;Vector index is currently only supported on TIMESTAMP;mv;dict;timestamp_index;true; TIMESTAMP;mv;dict;bloom_filter;true; TIMESTAMP;mv;dict;fst_index;false;Cannot create FST index on column: col, it can only be applied to dictionary encoded single value string columns -TIMESTAMP;mv;dict;h3_index;false;H3 index is currently only supported on BYTES columns +TIMESTAMP;mv;dict;h3_index;false;H3 index is currently only supported on single-value columns TIMESTAMP;mv;dict;inverted_index;true; -TIMESTAMP;mv;dict;json_index;false;Json index is currently only supported on STRING columns +TIMESTAMP;mv;dict;json_index;false;Json index is currently only supported on single-value columns TIMESTAMP;mv;dict;native_text_index;false;Cannot create text index on column: col, it can only be applied to string columns TIMESTAMP;mv;dict;text_index;false;Cannot create text index on column: col, it can only be applied to string columns TIMESTAMP;mv;dict;range_index;true; -TIMESTAMP;mv;dict;startree_index;true; +TIMESTAMP;mv;dict;startree_index;false;Column Name col defined in StarTreeIndex Config must be a single value column TIMESTAMP;mv;dict;vector_index;false;Vector index is currently only supported on float array columns STRING;sv;raw;timestamp_index;false;Caught exception while reading data STRING;sv;raw;bloom_filter;true; diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java index 296272190a..b9b008fe30 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java @@ -24,6 +24,7 @@ import java.io.IOException; import org.apache.pinot.segment.local.utils.FPOrdering; import org.apache.pinot.segment.spi.index.creator.CombinedInvertedIndexCreator; import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.FieldSpec.DataType; import org.roaringbitmap.RangeBitmap; import static org.apache.pinot.segment.spi.V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION; @@ -40,10 +41,10 @@ public class BitSlicedRangeIndexCreator implements CombinedInvertedIndexCreator private final RangeBitmap.Appender _appender; private final File _rangeIndexFile; private final long _minValue; - private final FieldSpec.DataType _valueType; + private final DataType _valueType; private BitSlicedRangeIndexCreator(File indexDir, FieldSpec fieldSpec, long minValue, long maxValue, - FieldSpec.DataType valueType) { + DataType valueType) { Preconditions.checkArgument(fieldSpec.isSingleValueField(), "MV columns not supported"); _rangeIndexFile = new File(indexDir, fieldSpec.getName() + BITMAP_RANGE_INDEX_FILE_EXTENSION); _appender = RangeBitmap.appender(maxValue); @@ -58,7 +59,7 @@ public class BitSlicedRangeIndexCreator implements CombinedInvertedIndexCreator * @param cardinality the cardinality of the dictionary */ public BitSlicedRangeIndexCreator(File indexDir, FieldSpec fieldSpec, int cardinality) { - this(indexDir, fieldSpec, 0, cardinality - 1, fieldSpec.getDataType()); + this(indexDir, fieldSpec, 0, cardinality - 1, fieldSpec.getDataType().getStoredType()); } /** @@ -71,11 +72,11 @@ public class BitSlicedRangeIndexCreator implements CombinedInvertedIndexCreator public BitSlicedRangeIndexCreator(File indexDir, FieldSpec fieldSpec, Comparable<?> minValue, Comparable<?> maxValue) { this(indexDir, fieldSpec, minValue(fieldSpec, minValue), maxValue(fieldSpec, minValue, maxValue), - fieldSpec.getDataType()); + fieldSpec.getDataType().getStoredType()); } @Override - public FieldSpec.DataType getDataType() { + public DataType getValueType() { return _valueType; } @@ -140,7 +141,7 @@ public class BitSlicedRangeIndexCreator implements CombinedInvertedIndexCreator } private static long maxValue(FieldSpec fieldSpec, Comparable<?> minValue, Comparable<?> maxValue) { - FieldSpec.DataType storedType = fieldSpec.getDataType().getStoredType(); + DataType storedType = fieldSpec.getDataType().getStoredType(); if (storedType == INT || storedType == LONG) { return ((Number) maxValue).longValue() - ((Number) minValue).longValue(); } @@ -154,7 +155,7 @@ public class BitSlicedRangeIndexCreator implements CombinedInvertedIndexCreator } private static long minValue(FieldSpec fieldSpec, Comparable<?> minValue) { - FieldSpec.DataType storedType = fieldSpec.getDataType().getStoredType(); + DataType storedType = fieldSpec.getDataType().getStoredType(); if (storedType == INT || storedType == LONG) { return ((Number) minValue).longValue(); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java index a97c45fec6..1fcb0e21d3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java @@ -162,7 +162,7 @@ public final class RangeIndexCreator implements CombinedInvertedIndexCreator { } @Override - public DataType getDataType() { + public DataType getValueType() { return _valueType; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java index 32af73bbdf..18252ad520 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java @@ -189,7 +189,7 @@ public class RangeIndexHandler extends BaseIndexHandler { CombinedInvertedIndexCreator rangeIndexCreator = newRangeIndexCreator(columnMetadata)) { if (columnMetadata.isSingleValue()) { // Single-value column. - switch (columnMetadata.getDataType()) { + switch (columnMetadata.getDataType().getStoredType()) { case INT: for (int i = 0; i < numDocs; i++) { rangeIndexCreator.add(forwardIndexReader.getInt(i, readerContext)); @@ -216,7 +216,7 @@ public class RangeIndexHandler extends BaseIndexHandler { } else { // Multi-value column int maxNumValuesPerMVEntry = columnMetadata.getMaxNumberOfMultiValues(); - switch (columnMetadata.getDataType()) { + switch (columnMetadata.getDataType().getStoredType()) { case INT: int[] intValues = new int[maxNumValuesPerMVEntry]; for (int i = 0; i < numDocs; i++) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java index 43c6332f5c..cecbc3ec2c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java @@ -103,17 +103,18 @@ public class RangeIndexType @Override public CombinedInvertedIndexCreator createIndexCreator(IndexCreationContext context, RangeIndexConfig indexConfig) throws IOException { - if (indexConfig.getVersion() == BitSlicedRangeIndexCreator.VERSION && context.getFieldSpec().isSingleValueField()) { + FieldSpec fieldSpec = context.getFieldSpec(); + if (indexConfig.getVersion() == BitSlicedRangeIndexCreator.VERSION && fieldSpec.isSingleValueField()) { if (context.hasDictionary()) { - return new BitSlicedRangeIndexCreator(context.getIndexDir(), context.getFieldSpec(), context.getCardinality()); + return new BitSlicedRangeIndexCreator(context.getIndexDir(), fieldSpec, context.getCardinality()); } - return new BitSlicedRangeIndexCreator(context.getIndexDir(), context.getFieldSpec(), context.getMinValue(), + return new BitSlicedRangeIndexCreator(context.getIndexDir(), fieldSpec, context.getMinValue(), context.getMaxValue()); } // default to RangeIndexCreator for the time being - return new RangeIndexCreator(context.getIndexDir(), context.getFieldSpec(), - context.hasDictionary() ? FieldSpec.DataType.INT : context.getFieldSpec().getDataType(), -1, - -1, context.getTotalDocs(), context.getTotalNumberOfEntries()); + return new RangeIndexCreator(context.getIndexDir(), fieldSpec, + context.hasDictionary() ? FieldSpec.DataType.INT : fieldSpec.getDataType().getStoredType(), -1, -1, + context.getTotalDocs(), context.getTotalNumberOfEntries()); } @Override diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java index 0011c5b1c8..d75011b14c 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java @@ -18,9 +18,8 @@ */ package org.apache.pinot.segment.spi.index.creator; -import javax.annotation.Nonnull; import javax.annotation.Nullable; -import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.FieldSpec.DataType; /** @@ -29,14 +28,23 @@ import org.apache.pinot.spi.data.FieldSpec; public interface CombinedInvertedIndexCreator extends DictionaryBasedInvertedIndexCreator, RawValueBasedInvertedIndexCreator { - FieldSpec.DataType getDataType(); + @Deprecated + default DataType getDataType() { + throw new UnsupportedOperationException(); + } + + /// Returns the data type of the values in the index. The type returned should be the internal stored type. + default DataType getValueType() { + return getDataType().getStoredType(); + } @Override - default void add(@Nonnull Object value, int dictId) { + default void add(Object value, int dictId) { if (dictId >= 0) { add(dictId); } else { - switch (getDataType()) { + DataType valueType = getValueType(); + switch (valueType) { case INT: add((Integer) value); break; @@ -50,17 +58,18 @@ public interface CombinedInvertedIndexCreator add((Double) value); break; default: - throw new RuntimeException("Unsupported data type " + getDataType() + " for range index"); + throw new RuntimeException("Unsupported data type " + valueType + " for range index"); } } } @Override - default void add(@Nonnull Object[] values, @Nullable int[] dictIds) { + default void add(Object[] values, @Nullable int[] dictIds) { if (dictIds != null) { add(dictIds, dictIds.length); } else { - switch (getDataType()) { + DataType valueType = getValueType(); + switch (valueType) { case INT: int[] intValues = new int[values.length]; for (int i = 0; i < values.length; i++) { @@ -90,7 +99,7 @@ public interface CombinedInvertedIndexCreator add(doubleValues, values.length); break; default: - throw new RuntimeException("Unsupported data type " + getDataType() + " for range index"); + throw new RuntimeException("Unsupported data type " + valueType + " for range index"); } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org