This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 760e952616 Add support for creating raw derived columns during segment reload (#13037) 760e952616 is described below commit 760e952616b79e455536bbf958edb84c8d0d3472 Author: Yash Mayya <yash.ma...@gmail.com> AuthorDate: Thu May 9 13:33:49 2024 +0530 Add support for creating raw derived columns during segment reload (#13037) * Add support for creating raw derived columns during segment reload * Remove createDictionary check for default value columns ColumnIndexCreationInfo; convert arrays of primitive wrapper values to primitive arrays for MV raw index * Minor updates * Fix compilation error caused by rebase on 31ae6a32aa426a6b9364b75e774a5e80c37eb336 * Refactor index creation to use ForwardIndexCreatorFactory for both dictionary and raw cases * Extract common forward index creator stuff into separate method --- .../tests/OfflineClusterIntegrationTest.java | 19 +- .../defaultcolumn/BaseDefaultColumnHandler.java | 202 +++++++++++++++------ .../defaultcolumn/V3DefaultColumnHandler.java | 14 +- .../index/loader/SegmentPreProcessorTest.java | 26 ++- .../src/test/resources/data/newColumnsSchema1.json | 5 + 5 files changed, 206 insertions(+), 60 deletions(-) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java index 8de4713aed..a90f3ca48c 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java @@ -1501,6 +1501,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet * <li>"NewAddedDerivedMVStringDimension", DIMENSION, STRING, multi-value, split(DestCityName, ', ')</li> * <li>"NewAddedDerivedDivAirportSeqIDs", DIMENSION, INT, multi-value, DivAirportSeqIDs</li> * <li>"NewAddedDerivedDivAirportSeqIDsString", DIMENSION, STRING, multi-value, DivAirportSeqIDs</li> + * <li>"NewAddedRawDerivedStringDimension", DIMENSION, STRING, single-value, "null"</li> * </ul> */ @Test(dependsOnMethods = "testAggregateMetadataAPI", dataProvider = "useBothQueryEngines") @@ -1513,7 +1514,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet reloadWithExtraColumns(); JsonNode queryResponse = postQuery(SELECT_STAR_QUERY); assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs); - assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(), 100); + assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(), 101); testNewAddedColumns(); testExpressionOverride(); @@ -1593,6 +1594,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet schema.addField(new DimensionFieldSpec("NewAddedDerivedMVStringDimension", DataType.STRING, false)); schema.addField(new DimensionFieldSpec("NewAddedDerivedDivAirportSeqIDs", DataType.INT, false)); schema.addField(new DimensionFieldSpec("NewAddedDerivedDivAirportSeqIDsString", DataType.STRING, false)); + schema.addField(new DimensionFieldSpec("NewAddedRawDerivedStringDimension", DataType.STRING, true)); addSchema(schema); TableConfig tableConfig = getOfflineTableConfig(); @@ -1602,10 +1604,13 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet new TransformConfig("NewAddedDerivedSVBooleanDimension", "ActualElapsedTime > 0"), new TransformConfig("NewAddedDerivedMVStringDimension", "split(DestCityName, ', ')"), new TransformConfig("NewAddedDerivedDivAirportSeqIDs", "DivAirportSeqIDs"), - new TransformConfig("NewAddedDerivedDivAirportSeqIDsString", "DivAirportSeqIDs")); + new TransformConfig("NewAddedDerivedDivAirportSeqIDsString", "DivAirportSeqIDs"), + new TransformConfig("NewAddedRawDerivedStringDimension", "reverse(DestCityName)")); IngestionConfig ingestionConfig = new IngestionConfig(); ingestionConfig.setTransformConfigs(transformConfigs); tableConfig.setIngestionConfig(ingestionConfig); + // Ensure that we can reload segments with a new raw derived column + tableConfig.getIndexingConfig().getNoDictionaryColumns().add("NewAddedRawDerivedStringDimension"); List<FieldConfig> fieldConfigList = tableConfig.getFieldConfigList(); assertNotNull(fieldConfigList); fieldConfigList.add( @@ -1623,12 +1628,14 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet // Verify the index sizes JsonNode columnIndexSizeMap = JsonUtils.stringToJsonNode(sendGetRequest( getControllerBaseApiUrl() + "/tables/mytable/metadata?columns=DivAirportSeqIDs" - + "&columns=NewAddedDerivedDivAirportSeqIDs&columns=NewAddedDerivedDivAirportSeqIDsString")) + + "&columns=NewAddedDerivedDivAirportSeqIDs&columns=NewAddedDerivedDivAirportSeqIDsString" + + "&columns=NewAddedRawDerivedStringDimension")) .get("columnIndexSizeMap"); - assertEquals(columnIndexSizeMap.size(), 3); + assertEquals(columnIndexSizeMap.size(), 4); JsonNode originalColumnIndexSizes = columnIndexSizeMap.get("DivAirportSeqIDs"); JsonNode derivedColumnIndexSizes = columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDs"); JsonNode derivedStringColumnIndexSizes = columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDsString"); + JsonNode derivedRawStringColumnIndex = columnIndexSizeMap.get("NewAddedRawDerivedStringDimension"); // Derived int column should have the same dictionary size as the original column double originalColumnDictionarySize = originalColumnIndexSizes.get("dictionary").asDouble(); @@ -1639,6 +1646,9 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet double derivedColumnForwardIndexSize = derivedColumnIndexSizes.get("forward_index").asDouble(); assertTrue(derivedColumnForwardIndexSize < originalColumnIndexSizes.get("forward_index").asDouble()); assertEquals(derivedStringColumnIndexSizes.get("forward_index").asDouble(), derivedColumnForwardIndexSize); + + assertTrue(derivedRawStringColumnIndex.has("forward_index")); + assertFalse(derivedRawStringColumnIndex.has("dictionary")); } private void reloadWithMissingColumns() @@ -1647,6 +1657,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet TableConfig tableConfig = getOfflineTableConfig(); tableConfig.setIngestionConfig(null); tableConfig.setFieldConfigList(getFieldConfigs()); + tableConfig.getIndexingConfig().getNoDictionaryColumns().remove("NewAddedRawDerivedStringDimension"); updateTableConfig(tableConfig); // Need to first delete then add the schema because removing columns is backward-incompatible change diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java index 67e92fd63a..e75ff0923e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java @@ -30,16 +30,15 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.configuration2.PropertiesConfiguration; +import org.apache.commons.lang.ArrayUtils; import org.apache.pinot.common.function.FunctionUtils; import org.apache.pinot.common.utils.PinotDataType; import org.apache.pinot.segment.local.function.FunctionEvaluator; import org.apache.pinot.segment.local.function.FunctionEvaluatorFactory; import org.apache.pinot.segment.local.segment.creator.impl.SegmentColumnarIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.SegmentDictionaryCreator; -import org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueEntryDictForwardIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator; -import org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator; import org.apache.pinot.segment.local.segment.creator.impl.stats.BytesColumnPredIndexStatsCollector; @@ -49,6 +48,7 @@ import org.apache.pinot.segment.local.segment.creator.impl.stats.IntColumnPreInd import org.apache.pinot.segment.local.segment.creator.impl.stats.LongColumnPreIndexStatsCollector; import org.apache.pinot.segment.local.segment.creator.impl.stats.StringColumnPreIndexStatsCollector; import org.apache.pinot.segment.local.segment.index.dictionary.DictionaryIndexType; +import org.apache.pinot.segment.local.segment.index.forward.ForwardIndexCreatorFactory; import org.apache.pinot.segment.local.segment.index.forward.ForwardIndexPlugin; import org.apache.pinot.segment.local.segment.index.forward.ForwardIndexType; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; @@ -57,8 +57,8 @@ import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader; import org.apache.pinot.segment.spi.ColumnMetadata; import org.apache.pinot.segment.spi.SegmentMetadata; import org.apache.pinot.segment.spi.V1Constants; -import org.apache.pinot.segment.spi.compression.DictIdCompressionType; import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo; +import org.apache.pinot.segment.spi.creator.IndexCreationContext; import org.apache.pinot.segment.spi.creator.StatsCollectorConfig; import org.apache.pinot.segment.spi.index.FieldIndexConfigs; import org.apache.pinot.segment.spi.index.ForwardIndexConfig; @@ -383,15 +383,6 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { argumentsMetadata.add(columnMetadata); } - // TODO: Support raw derived column - if (_indexLoadingConfig.getNoDictionaryColumns().contains(column)) { - LOGGER.warn("Skip creating raw derived column: {}", column); - if (errorOnFailure) { - throw new UnsupportedOperationException(String.format("Failed to create raw derived column: %s", column)); - } - return false; - } - // TODO: Support forward index disabled derived column if (_indexLoadingConfig.getForwardIndexDisabledColumns().contains(column)) { LOGGER.warn("Skip creating forward index disabled derived column: {}", column); @@ -487,7 +478,7 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { new ColumnIndexCreationInfo(columnStatistics, true/*createDictionary*/, false, true/*isAutoGenerated*/, defaultValue/*defaultNullValue*/); - // Create dictionary. + // We always create a dictionary for default value columns. // We will have only one value in the dictionary. int dictionaryElementSize; try (SegmentDictionaryCreator creator = new SegmentDictionaryCreator(fieldSpec, _indexDir, false)) { @@ -563,7 +554,6 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { * Helper method to create the V1 indices (dictionary and forward index) for a column with derived values. * TODO: * - Support chained derived column - * - Support raw derived column * - Support forward index disabled derived column */ private void createDerivedColumnV1Indices(String column, FunctionEvaluator functionEvaluator, @@ -595,6 +585,7 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { } } + boolean createDictionary = !_indexLoadingConfig.getNoDictionaryColumns().contains(column); FieldSpec fieldSpec = _schema.getFieldSpecFor(column); StatsCollectorConfig statsCollectorConfig = new StatsCollectorConfig(_indexLoadingConfig.getTableConfig(), _schema, null); @@ -621,7 +612,8 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { } statsCollector.seal(); indexCreationInfo = - new ColumnIndexCreationInfo(statsCollector, true, false, true, fieldSpec.getDefaultNullValue()); + new ColumnIndexCreationInfo(statsCollector, createDictionary, false, true, + fieldSpec.getDefaultNullValue()); break; } case LONG: { @@ -644,7 +636,8 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { } statsCollector.seal(); indexCreationInfo = - new ColumnIndexCreationInfo(statsCollector, true, false, true, fieldSpec.getDefaultNullValue()); + new ColumnIndexCreationInfo(statsCollector, createDictionary, false, true, + fieldSpec.getDefaultNullValue()); break; } case FLOAT: { @@ -667,7 +660,8 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { } statsCollector.seal(); indexCreationInfo = - new ColumnIndexCreationInfo(statsCollector, true, false, true, fieldSpec.getDefaultNullValue()); + new ColumnIndexCreationInfo(statsCollector, createDictionary, false, true, + fieldSpec.getDefaultNullValue()); break; } case DOUBLE: { @@ -690,7 +684,8 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { } statsCollector.seal(); indexCreationInfo = - new ColumnIndexCreationInfo(statsCollector, true, false, true, fieldSpec.getDefaultNullValue()); + new ColumnIndexCreationInfo(statsCollector, createDictionary, false, true, + fieldSpec.getDefaultNullValue()); break; } case BIG_DECIMAL: { @@ -705,7 +700,8 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { } statsCollector.seal(); indexCreationInfo = - new ColumnIndexCreationInfo(statsCollector, true, false, true, fieldSpec.getDefaultNullValue()); + new ColumnIndexCreationInfo(statsCollector, createDictionary, false, true, + fieldSpec.getDefaultNullValue()); break; } case STRING: { @@ -727,7 +723,7 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { statsCollector.collect(value); } statsCollector.seal(); - indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, true, + indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, createDictionary, _indexLoadingConfig.getVarLengthDictionaryColumns().contains(column), true, fieldSpec.getDefaultNullValue()); break; @@ -757,57 +753,157 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { } else { useVarLengthDictionary = _indexLoadingConfig.getVarLengthDictionaryColumns().contains(column); } - indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, true, useVarLengthDictionary, true, - new ByteArray((byte[]) fieldSpec.getDefaultNullValue())); + indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, createDictionary, useVarLengthDictionary, + true, new ByteArray((byte[]) fieldSpec.getDefaultNullValue())); break; } default: throw new IllegalStateException(); } - // Create dictionary - try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(fieldSpec, _indexDir, - indexCreationInfo.isUseVarLengthDictionary())) { - dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray()); + if (createDictionary) { + createDerivedColumnForwardIndexWithDictionary(column, fieldSpec, outputValues, indexCreationInfo); + } else { + createDerivedColumnForwardIndexWithoutDictionary(column, fieldSpec, outputValues, indexCreationInfo); + } + } finally { + for (ValueReader valueReader : valueReaders) { + valueReader.close(); + } + } + } + + /** + * Helper method to create the dictionary and forward indices for a column with derived values. + */ + private void createDerivedColumnForwardIndexWithDictionary(String column, FieldSpec fieldSpec, Object[] outputValues, + ColumnIndexCreationInfo indexCreationInfo) throws Exception { + + // Create dictionary + try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(fieldSpec, _indexDir, + indexCreationInfo.isUseVarLengthDictionary())) { + dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray()); + + int numDocs = outputValues.length; - // Create forward index - int cardinality = indexCreationInfo.getDistinctValueCount(); + // Create forward index + boolean isSingleValue = fieldSpec.isSingleValueField(); + + try (ForwardIndexCreator forwardIndexCreator + = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, column, true)) { if (isSingleValue) { - try (ForwardIndexCreator forwardIndexCreator = indexCreationInfo.isSorted() - ? new SingleValueSortedForwardIndexCreator(_indexDir, column, cardinality) - : new SingleValueUnsortedForwardIndexCreator(_indexDir, column, cardinality, numDocs)) { - for (int i = 0; i < numDocs; i++) { - forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValues[i])); - } + for (Object outputValue : outputValues) { + forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValue)); } } else { - DictIdCompressionType dictIdCompressionType = null; - FieldIndexConfigs fieldIndexConfig = _indexLoadingConfig.getFieldIndexConfig(column); - if (fieldIndexConfig != null) { - ForwardIndexConfig forwardIndexConfig = fieldIndexConfig.getConfig(new ForwardIndexPlugin().getIndexType()); - if (forwardIndexConfig != null) { - dictIdCompressionType = forwardIndexConfig.getDictIdCompressionType(); - } - } - try (ForwardIndexCreator forwardIndexCreator = dictIdCompressionType == DictIdCompressionType.MV_ENTRY_DICT - ? new MultiValueEntryDictForwardIndexCreator(_indexDir, column, cardinality, numDocs) - : new MultiValueUnsortedForwardIndexCreator(_indexDir, column, cardinality, numDocs, - indexCreationInfo.getTotalNumberOfEntries())) { - for (int i = 0; i < numDocs; i++) { - forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValues[i])); - } + for (Object outputValue : outputValues) { + forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValue)); } } - // Add the column metadata SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, indexCreationInfo, numDocs, fieldSpec, true, dictionaryCreator.getNumBytesPerEntry()); } - } finally { - for (ValueReader valueReader : valueReaders) { - valueReader.close(); + } + } + + /** + * Helper method to create a forward index for a raw encoded column with derived values. + */ + private void createDerivedColumnForwardIndexWithoutDictionary(String column, FieldSpec fieldSpec, + Object[] outputValues, ColumnIndexCreationInfo indexCreationInfo) + throws Exception { + + // Create forward index + int numDocs = outputValues.length; + boolean isSingleValue = fieldSpec.isSingleValueField(); + + try (ForwardIndexCreator forwardIndexCreator + = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, column, false)) { + if (isSingleValue) { + for (Object outputValue : outputValues) { + switch (fieldSpec.getDataType().getStoredType()) { + // Casts are safe here because we've already done the conversion in createDerivedColumnV1Indices + case INT: + forwardIndexCreator.putInt((int) outputValue); + break; + case LONG: + forwardIndexCreator.putLong((long) outputValue); + break; + case FLOAT: + forwardIndexCreator.putFloat((float) outputValue); + break; + case DOUBLE: + forwardIndexCreator.putDouble((double) outputValue); + break; + case BIG_DECIMAL: + forwardIndexCreator.putBigDecimal((BigDecimal) outputValue); + break; + case STRING: + forwardIndexCreator.putString((String) outputValue); + break; + case BYTES: + forwardIndexCreator.putBytes((byte[]) outputValue); + break; + default: + throw new IllegalStateException(); + } + } + } else { + for (Object outputValue : outputValues) { + switch (fieldSpec.getDataType().getStoredType()) { + // Casts are safe here because we've already done the conversion in createDerivedColumnV1Indices + case INT: + forwardIndexCreator.putIntMV(ArrayUtils.toPrimitive((Integer[]) outputValue)); + break; + case LONG: + forwardIndexCreator.putLongMV(ArrayUtils.toPrimitive((Long[]) outputValue)); + break; + case FLOAT: + forwardIndexCreator.putFloatMV(ArrayUtils.toPrimitive((Float[]) outputValue)); + break; + case DOUBLE: + forwardIndexCreator.putDoubleMV(ArrayUtils.toPrimitive((Double[]) outputValue)); + break; + case STRING: + forwardIndexCreator.putStringMV((String[]) outputValue); + break; + case BYTES: + forwardIndexCreator.putBytesMV((byte[][]) outputValue); + break; + default: + throw new IllegalStateException(); + } + } } } + + // Add the column metadata + SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, indexCreationInfo, numDocs, + fieldSpec, false, 0); + } + + private ForwardIndexCreator getForwardIndexCreator(FieldSpec fieldSpec, ColumnIndexCreationInfo indexCreationInfo, + int numDocs, String column, boolean hasDictionary) throws Exception { + + IndexCreationContext indexCreationContext = IndexCreationContext.builder() + .withIndexDir(_indexDir) + .withFieldSpec(fieldSpec) + .withColumnIndexCreationInfo(indexCreationInfo) + .withTotalDocs(numDocs) + .withDictionary(hasDictionary) + .build(); + + ForwardIndexConfig forwardIndexConfig = null; + FieldIndexConfigs fieldIndexConfig = _indexLoadingConfig.getFieldIndexConfig(column); + if (fieldIndexConfig != null) { + forwardIndexConfig = fieldIndexConfig.getConfig(new ForwardIndexPlugin().getIndexType()); + } + if (forwardIndexConfig == null) { + forwardIndexConfig = new ForwardIndexConfig(false, null, null, null, null, null); + } + + return ForwardIndexCreatorFactory.createIndexCreator(indexCreationContext, forwardIndexConfig); } @SuppressWarnings("rawtypes") diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java index 4f8dc7e8be..7fa10155ee 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java @@ -65,27 +65,39 @@ public class V3DefaultColumnHandler extends BaseDefaultColumnHandler { boolean forwardIndexDisabled = !isSingleValue && isForwardIndexDisabled(column); File forwardIndexFile = null; File invertedIndexFile = null; + if (isSingleValue) { forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.SORTED_SV_FORWARD_INDEX_FILE_EXTENSION); if (!forwardIndexFile.exists()) { forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION); } + if (!forwardIndexFile.exists()) { + forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION); + } } else { if (forwardIndexDisabled) { // An inverted index is created instead of forward index for multi-value columns with forward index disabled + // Note that we don't currently support creation of forward index disabled derived columns invertedIndexFile = new File(_indexDir, column + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION); } else { forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION); + if (!forwardIndexFile.exists()) { + forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.RAW_MV_FORWARD_INDEX_FILE_EXTENSION); + } } } + if (forwardIndexFile != null) { LoaderUtils.writeIndexToV3Format(_segmentWriter, column, forwardIndexFile, StandardIndexes.forward()); } if (invertedIndexFile != null) { LoaderUtils.writeIndexToV3Format(_segmentWriter, column, invertedIndexFile, StandardIndexes.inverted()); } + File dictionaryFile = new File(_indexDir, column + V1Constants.Dict.FILE_EXTENSION); - LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile, StandardIndexes.dictionary()); + if (dictionaryFile.exists()) { + LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile, StandardIndexes.dictionary()); + } File nullValueVectorFile = new File(_indexDir, column + V1Constants.Indexes.NULLVALUE_VECTOR_FILE_EXTENSION); if (nullValueVectorFile.exists()) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java index acdc679256..80178b3fda 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.segment.local.segment.index.loader; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; import java.io.File; @@ -141,6 +142,7 @@ public class SegmentPreProcessorTest { private static final String NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME = "newBooleanSVDimension"; private static final String NEW_INT_SV_DIMENSION_COLUMN_NAME = "newIntSVDimension"; private static final String NEW_STRING_MV_DIMENSION_COLUMN_NAME = "newStringMVDimension"; + private static final String NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME = "newRawStringSVDimension"; private static final String NEW_HLL_BYTE_METRIC_COLUMN_NAME = "newHLLByteMetric"; private static final String NEW_TDIGEST_BYTE_METRIC_COLUMN_NAME = "newTDigestByteMetric"; @@ -1101,9 +1103,13 @@ public class SegmentPreProcessorTest { Collections.emptyList()); IngestionConfig ingestionConfig = new IngestionConfig(); ingestionConfig.setTransformConfigs( - Collections.singletonList(new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)"))); + ImmutableList.of( + new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)"), + new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, "reverse(column3)") + )); _tableConfig.setIngestionConfig(ingestionConfig); _indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX); + _indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME); checkUpdateDefaultColumns(); // Try to use the third schema and update default value again. @@ -1148,9 +1154,13 @@ public class SegmentPreProcessorTest { IngestionConfig ingestionConfig = new IngestionConfig(); ingestionConfig.setTransformConfigs( - Collections.singletonList(new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)"))); + ImmutableList.of( + new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)"), + new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, "reverse(column3)") + )); _tableConfig.setIngestionConfig(ingestionConfig); _indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX); + _indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME); checkUpdateDefaultColumns(); // Try to use the third schema and update default value again. @@ -1257,6 +1267,15 @@ public class SegmentPreProcessorTest { assertEquals(columnMetadata.getMinValue(), (int) originalColumnMetadata.getMinValue() + 1); assertEquals(columnMetadata.getMaxValue(), (int) originalColumnMetadata.getMaxValue() + 1); + columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME); + assertEquals(columnMetadata.getFieldSpec(), + _newColumnsSchema1.getFieldSpecFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME)); + assertTrue(columnMetadata.isAutoGenerated()); + originalColumnMetadata = segmentMetadata.getColumnMetadataFor("column3"); + assertEquals(columnMetadata.getCardinality(), originalColumnMetadata.getCardinality()); + assertEquals(columnMetadata.getBitsPerElement(), originalColumnMetadata.getBitsPerElement()); + assertEquals(columnMetadata.getTotalNumberOfEntries(), originalColumnMetadata.getTotalNumberOfEntries()); + // Check dictionary and forward index exist. try (SegmentDirectory segmentDirectory = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader() .load(_indexDir.toURI(), @@ -1276,6 +1295,9 @@ public class SegmentPreProcessorTest { assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, StandardIndexes.forward())); assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, StandardIndexes.dictionary())); assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, StandardIndexes.forward())); + // Dictionary shouldn't be created for raw derived column + assertFalse(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.dictionary())); + assertTrue(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.forward())); assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, StandardIndexes.nullValueVector())); assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, StandardIndexes.nullValueVector())); diff --git a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json index b6ef01c991..32ba7dcf58 100644 --- a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json +++ b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json @@ -32,6 +32,11 @@ "dataType": "STRING", "name": "newStringMVDimension", "singleValueField": false + }, + { + "dataType": "STRING", + "name": "newRawStringSVDimension", + "defaultNullValue": "null" } ] } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org