This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 760e952616 Add support for creating raw derived columns during segment 
reload (#13037)
760e952616 is described below

commit 760e952616b79e455536bbf958edb84c8d0d3472
Author: Yash Mayya <yash.ma...@gmail.com>
AuthorDate: Thu May 9 13:33:49 2024 +0530

    Add support for creating raw derived columns during segment reload (#13037)
    
    * Add support for creating raw derived columns during segment reload
    
    * Remove createDictionary check for default value columns 
ColumnIndexCreationInfo; convert arrays of primitive wrapper values to 
primitive arrays for MV raw index
    
    * Minor updates
    
    * Fix compilation error caused by rebase on 
31ae6a32aa426a6b9364b75e774a5e80c37eb336
    
    * Refactor index creation to use ForwardIndexCreatorFactory for both 
dictionary and raw cases
    
    * Extract common forward index creator stuff into separate method
---
 .../tests/OfflineClusterIntegrationTest.java       |  19 +-
 .../defaultcolumn/BaseDefaultColumnHandler.java    | 202 +++++++++++++++------
 .../defaultcolumn/V3DefaultColumnHandler.java      |  14 +-
 .../index/loader/SegmentPreProcessorTest.java      |  26 ++-
 .../src/test/resources/data/newColumnsSchema1.json |   5 +
 5 files changed, 206 insertions(+), 60 deletions(-)

diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index 8de4713aed..a90f3ca48c 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -1501,6 +1501,7 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
    *   <li>"NewAddedDerivedMVStringDimension", DIMENSION, STRING, multi-value, 
split(DestCityName, ', ')</li>
    *   <li>"NewAddedDerivedDivAirportSeqIDs", DIMENSION, INT, multi-value, 
DivAirportSeqIDs</li>
    *   <li>"NewAddedDerivedDivAirportSeqIDsString", DIMENSION, STRING, 
multi-value, DivAirportSeqIDs</li>
+   *   <li>"NewAddedRawDerivedStringDimension", DIMENSION, STRING, 
single-value, "null"</li>
    * </ul>
    */
   @Test(dependsOnMethods = "testAggregateMetadataAPI", dataProvider = 
"useBothQueryEngines")
@@ -1513,7 +1514,7 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
     reloadWithExtraColumns();
     JsonNode queryResponse = postQuery(SELECT_STAR_QUERY);
     assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
-    
assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(),
 100);
+    
assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(),
 101);
 
     testNewAddedColumns();
     testExpressionOverride();
@@ -1593,6 +1594,7 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
     schema.addField(new DimensionFieldSpec("NewAddedDerivedMVStringDimension", 
DataType.STRING, false));
     schema.addField(new DimensionFieldSpec("NewAddedDerivedDivAirportSeqIDs", 
DataType.INT, false));
     schema.addField(new 
DimensionFieldSpec("NewAddedDerivedDivAirportSeqIDsString", DataType.STRING, 
false));
+    schema.addField(new 
DimensionFieldSpec("NewAddedRawDerivedStringDimension", DataType.STRING, true));
     addSchema(schema);
 
     TableConfig tableConfig = getOfflineTableConfig();
@@ -1602,10 +1604,13 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
             new TransformConfig("NewAddedDerivedSVBooleanDimension", 
"ActualElapsedTime > 0"),
             new TransformConfig("NewAddedDerivedMVStringDimension", 
"split(DestCityName, ', ')"),
             new TransformConfig("NewAddedDerivedDivAirportSeqIDs", 
"DivAirportSeqIDs"),
-            new TransformConfig("NewAddedDerivedDivAirportSeqIDsString", 
"DivAirportSeqIDs"));
+            new TransformConfig("NewAddedDerivedDivAirportSeqIDsString", 
"DivAirportSeqIDs"),
+            new TransformConfig("NewAddedRawDerivedStringDimension", 
"reverse(DestCityName)"));
     IngestionConfig ingestionConfig = new IngestionConfig();
     ingestionConfig.setTransformConfigs(transformConfigs);
     tableConfig.setIngestionConfig(ingestionConfig);
+    // Ensure that we can reload segments with a new raw derived column
+    
tableConfig.getIndexingConfig().getNoDictionaryColumns().add("NewAddedRawDerivedStringDimension");
     List<FieldConfig> fieldConfigList = tableConfig.getFieldConfigList();
     assertNotNull(fieldConfigList);
     fieldConfigList.add(
@@ -1623,12 +1628,14 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
     // Verify the index sizes
     JsonNode columnIndexSizeMap = JsonUtils.stringToJsonNode(sendGetRequest(
             getControllerBaseApiUrl() + 
"/tables/mytable/metadata?columns=DivAirportSeqIDs"
-                + 
"&columns=NewAddedDerivedDivAirportSeqIDs&columns=NewAddedDerivedDivAirportSeqIDsString"))
+                + 
"&columns=NewAddedDerivedDivAirportSeqIDs&columns=NewAddedDerivedDivAirportSeqIDsString"
+                + "&columns=NewAddedRawDerivedStringDimension"))
         .get("columnIndexSizeMap");
-    assertEquals(columnIndexSizeMap.size(), 3);
+    assertEquals(columnIndexSizeMap.size(), 4);
     JsonNode originalColumnIndexSizes = 
columnIndexSizeMap.get("DivAirportSeqIDs");
     JsonNode derivedColumnIndexSizes = 
columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDs");
     JsonNode derivedStringColumnIndexSizes = 
columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDsString");
+    JsonNode derivedRawStringColumnIndex = 
columnIndexSizeMap.get("NewAddedRawDerivedStringDimension");
 
     // Derived int column should have the same dictionary size as the original 
column
     double originalColumnDictionarySize = 
originalColumnIndexSizes.get("dictionary").asDouble();
@@ -1639,6 +1646,9 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
     double derivedColumnForwardIndexSize = 
derivedColumnIndexSizes.get("forward_index").asDouble();
     assertTrue(derivedColumnForwardIndexSize < 
originalColumnIndexSizes.get("forward_index").asDouble());
     
assertEquals(derivedStringColumnIndexSizes.get("forward_index").asDouble(), 
derivedColumnForwardIndexSize);
+
+    assertTrue(derivedRawStringColumnIndex.has("forward_index"));
+    assertFalse(derivedRawStringColumnIndex.has("dictionary"));
   }
 
   private void reloadWithMissingColumns()
@@ -1647,6 +1657,7 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
     TableConfig tableConfig = getOfflineTableConfig();
     tableConfig.setIngestionConfig(null);
     tableConfig.setFieldConfigList(getFieldConfigs());
+    
tableConfig.getIndexingConfig().getNoDictionaryColumns().remove("NewAddedRawDerivedStringDimension");
     updateTableConfig(tableConfig);
 
     // Need to first delete then add the schema because removing columns is 
backward-incompatible change
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
index 67e92fd63a..e75ff0923e 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
@@ -30,16 +30,15 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.configuration2.PropertiesConfiguration;
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.pinot.common.function.FunctionUtils;
 import org.apache.pinot.common.utils.PinotDataType;
 import org.apache.pinot.segment.local.function.FunctionEvaluator;
 import org.apache.pinot.segment.local.function.FunctionEvaluatorFactory;
 import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentColumnarIndexCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentDictionaryCreator;
-import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueEntryDictForwardIndexCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator;
-import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.stats.BytesColumnPredIndexStatsCollector;
@@ -49,6 +48,7 @@ import 
org.apache.pinot.segment.local.segment.creator.impl.stats.IntColumnPreInd
 import 
org.apache.pinot.segment.local.segment.creator.impl.stats.LongColumnPreIndexStatsCollector;
 import 
org.apache.pinot.segment.local.segment.creator.impl.stats.StringColumnPreIndexStatsCollector;
 import 
org.apache.pinot.segment.local.segment.index.dictionary.DictionaryIndexType;
+import 
org.apache.pinot.segment.local.segment.index.forward.ForwardIndexCreatorFactory;
 import org.apache.pinot.segment.local.segment.index.forward.ForwardIndexPlugin;
 import org.apache.pinot.segment.local.segment.index.forward.ForwardIndexType;
 import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
@@ -57,8 +57,8 @@ import 
org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
 import org.apache.pinot.segment.spi.ColumnMetadata;
 import org.apache.pinot.segment.spi.SegmentMetadata;
 import org.apache.pinot.segment.spi.V1Constants;
-import org.apache.pinot.segment.spi.compression.DictIdCompressionType;
 import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
 import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
 import org.apache.pinot.segment.spi.index.FieldIndexConfigs;
 import org.apache.pinot.segment.spi.index.ForwardIndexConfig;
@@ -383,15 +383,6 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
             argumentsMetadata.add(columnMetadata);
           }
 
-          // TODO: Support raw derived column
-          if (_indexLoadingConfig.getNoDictionaryColumns().contains(column)) {
-            LOGGER.warn("Skip creating raw derived column: {}", column);
-            if (errorOnFailure) {
-              throw new UnsupportedOperationException(String.format("Failed to 
create raw derived column: %s", column));
-            }
-            return false;
-          }
-
           // TODO: Support forward index disabled derived column
           if 
(_indexLoadingConfig.getForwardIndexDisabledColumns().contains(column)) {
             LOGGER.warn("Skip creating forward index disabled derived column: 
{}", column);
@@ -487,7 +478,7 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
         new ColumnIndexCreationInfo(columnStatistics, 
true/*createDictionary*/, false, true/*isAutoGenerated*/,
             defaultValue/*defaultNullValue*/);
 
-    // Create dictionary.
+    // We always create a dictionary for default value columns.
     // We will have only one value in the dictionary.
     int dictionaryElementSize;
     try (SegmentDictionaryCreator creator = new 
SegmentDictionaryCreator(fieldSpec, _indexDir, false)) {
@@ -563,7 +554,6 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
    * Helper method to create the V1 indices (dictionary and forward index) for 
a column with derived values.
    * TODO:
    *   - Support chained derived column
-   *   - Support raw derived column
    *   - Support forward index disabled derived column
    */
   private void createDerivedColumnV1Indices(String column, FunctionEvaluator 
functionEvaluator,
@@ -595,6 +585,7 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
         }
       }
 
+      boolean createDictionary = 
!_indexLoadingConfig.getNoDictionaryColumns().contains(column);
       FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
       StatsCollectorConfig statsCollectorConfig =
           new StatsCollectorConfig(_indexLoadingConfig.getTableConfig(), 
_schema, null);
@@ -621,7 +612,8 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
           }
           statsCollector.seal();
           indexCreationInfo =
-              new ColumnIndexCreationInfo(statsCollector, true, false, true, 
fieldSpec.getDefaultNullValue());
+              new ColumnIndexCreationInfo(statsCollector, createDictionary, 
false, true,
+                  fieldSpec.getDefaultNullValue());
           break;
         }
         case LONG: {
@@ -644,7 +636,8 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
           }
           statsCollector.seal();
           indexCreationInfo =
-              new ColumnIndexCreationInfo(statsCollector, true, false, true, 
fieldSpec.getDefaultNullValue());
+              new ColumnIndexCreationInfo(statsCollector, createDictionary, 
false, true,
+                  fieldSpec.getDefaultNullValue());
           break;
         }
         case FLOAT: {
@@ -667,7 +660,8 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
           }
           statsCollector.seal();
           indexCreationInfo =
-              new ColumnIndexCreationInfo(statsCollector, true, false, true, 
fieldSpec.getDefaultNullValue());
+              new ColumnIndexCreationInfo(statsCollector, createDictionary, 
false, true,
+                  fieldSpec.getDefaultNullValue());
           break;
         }
         case DOUBLE: {
@@ -690,7 +684,8 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
           }
           statsCollector.seal();
           indexCreationInfo =
-              new ColumnIndexCreationInfo(statsCollector, true, false, true, 
fieldSpec.getDefaultNullValue());
+              new ColumnIndexCreationInfo(statsCollector, createDictionary, 
false, true,
+                  fieldSpec.getDefaultNullValue());
           break;
         }
         case BIG_DECIMAL: {
@@ -705,7 +700,8 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
           }
           statsCollector.seal();
           indexCreationInfo =
-              new ColumnIndexCreationInfo(statsCollector, true, false, true, 
fieldSpec.getDefaultNullValue());
+              new ColumnIndexCreationInfo(statsCollector, createDictionary, 
false, true,
+                  fieldSpec.getDefaultNullValue());
           break;
         }
         case STRING: {
@@ -727,7 +723,7 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
             statsCollector.collect(value);
           }
           statsCollector.seal();
-          indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, true,
+          indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, 
createDictionary,
               
_indexLoadingConfig.getVarLengthDictionaryColumns().contains(column), true,
               fieldSpec.getDefaultNullValue());
           break;
@@ -757,57 +753,157 @@ public abstract class BaseDefaultColumnHandler 
implements DefaultColumnHandler {
           } else {
             useVarLengthDictionary = 
_indexLoadingConfig.getVarLengthDictionaryColumns().contains(column);
           }
-          indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, 
true, useVarLengthDictionary, true,
-              new ByteArray((byte[]) fieldSpec.getDefaultNullValue()));
+          indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, 
createDictionary, useVarLengthDictionary,
+              true, new ByteArray((byte[]) fieldSpec.getDefaultNullValue()));
           break;
         }
         default:
           throw new IllegalStateException();
       }
 
-      // Create dictionary
-      try (SegmentDictionaryCreator dictionaryCreator = new 
SegmentDictionaryCreator(fieldSpec, _indexDir,
-          indexCreationInfo.isUseVarLengthDictionary())) {
-        
dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray());
+      if (createDictionary) {
+        createDerivedColumnForwardIndexWithDictionary(column, fieldSpec, 
outputValues, indexCreationInfo);
+      } else {
+        createDerivedColumnForwardIndexWithoutDictionary(column, fieldSpec, 
outputValues, indexCreationInfo);
+      }
+    } finally {
+      for (ValueReader valueReader : valueReaders) {
+        valueReader.close();
+      }
+    }
+  }
+
+  /**
+   * Helper method to create the dictionary and forward indices for a column 
with derived values.
+   */
+  private void createDerivedColumnForwardIndexWithDictionary(String column, 
FieldSpec fieldSpec, Object[] outputValues,
+      ColumnIndexCreationInfo indexCreationInfo) throws Exception {
+
+    // Create dictionary
+    try (SegmentDictionaryCreator dictionaryCreator = new 
SegmentDictionaryCreator(fieldSpec, _indexDir,
+        indexCreationInfo.isUseVarLengthDictionary())) {
+      
dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray());
+
+      int numDocs = outputValues.length;
 
-        // Create forward index
-        int cardinality = indexCreationInfo.getDistinctValueCount();
+      // Create forward index
+      boolean isSingleValue = fieldSpec.isSingleValueField();
+
+      try (ForwardIndexCreator forwardIndexCreator
+          = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, 
column, true)) {
         if (isSingleValue) {
-          try (ForwardIndexCreator forwardIndexCreator = 
indexCreationInfo.isSorted()
-              ? new SingleValueSortedForwardIndexCreator(_indexDir, column, 
cardinality)
-              : new SingleValueUnsortedForwardIndexCreator(_indexDir, column, 
cardinality, numDocs)) {
-            for (int i = 0; i < numDocs; i++) {
-              
forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValues[i]));
-            }
+          for (Object outputValue : outputValues) {
+            
forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValue));
           }
         } else {
-          DictIdCompressionType dictIdCompressionType = null;
-          FieldIndexConfigs fieldIndexConfig = 
_indexLoadingConfig.getFieldIndexConfig(column);
-          if (fieldIndexConfig != null) {
-            ForwardIndexConfig forwardIndexConfig = 
fieldIndexConfig.getConfig(new ForwardIndexPlugin().getIndexType());
-            if (forwardIndexConfig != null) {
-              dictIdCompressionType = 
forwardIndexConfig.getDictIdCompressionType();
-            }
-          }
-          try (ForwardIndexCreator forwardIndexCreator = dictIdCompressionType 
== DictIdCompressionType.MV_ENTRY_DICT
-              ? new MultiValueEntryDictForwardIndexCreator(_indexDir, column, 
cardinality, numDocs)
-              : new MultiValueUnsortedForwardIndexCreator(_indexDir, column, 
cardinality, numDocs,
-                  indexCreationInfo.getTotalNumberOfEntries())) {
-            for (int i = 0; i < numDocs; i++) {
-              
forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValues[i]));
-            }
+          for (Object outputValue : outputValues) {
+            
forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValue));
           }
         }
-
         // Add the column metadata
         SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, 
column, indexCreationInfo, numDocs,
             fieldSpec, true, dictionaryCreator.getNumBytesPerEntry());
       }
-    } finally {
-      for (ValueReader valueReader : valueReaders) {
-        valueReader.close();
+    }
+  }
+
+  /**
+   * Helper method to create a forward index for a raw encoded column with 
derived values.
+   */
+  private void createDerivedColumnForwardIndexWithoutDictionary(String column, 
FieldSpec fieldSpec,
+      Object[] outputValues, ColumnIndexCreationInfo indexCreationInfo)
+      throws Exception {
+
+    // Create forward index
+    int numDocs = outputValues.length;
+    boolean isSingleValue = fieldSpec.isSingleValueField();
+
+    try (ForwardIndexCreator forwardIndexCreator
+        = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, 
column, false)) {
+      if (isSingleValue) {
+        for (Object outputValue : outputValues) {
+          switch (fieldSpec.getDataType().getStoredType()) {
+            // Casts are safe here because we've already done the conversion 
in createDerivedColumnV1Indices
+            case INT:
+              forwardIndexCreator.putInt((int) outputValue);
+              break;
+            case LONG:
+              forwardIndexCreator.putLong((long) outputValue);
+              break;
+            case FLOAT:
+              forwardIndexCreator.putFloat((float) outputValue);
+              break;
+            case DOUBLE:
+              forwardIndexCreator.putDouble((double) outputValue);
+              break;
+            case BIG_DECIMAL:
+              forwardIndexCreator.putBigDecimal((BigDecimal) outputValue);
+              break;
+            case STRING:
+              forwardIndexCreator.putString((String) outputValue);
+              break;
+            case BYTES:
+              forwardIndexCreator.putBytes((byte[]) outputValue);
+              break;
+            default:
+              throw new IllegalStateException();
+          }
+        }
+      } else {
+        for (Object outputValue : outputValues) {
+          switch (fieldSpec.getDataType().getStoredType()) {
+            // Casts are safe here because we've already done the conversion 
in createDerivedColumnV1Indices
+            case INT:
+              forwardIndexCreator.putIntMV(ArrayUtils.toPrimitive((Integer[]) 
outputValue));
+              break;
+            case LONG:
+              forwardIndexCreator.putLongMV(ArrayUtils.toPrimitive((Long[]) 
outputValue));
+              break;
+            case FLOAT:
+              forwardIndexCreator.putFloatMV(ArrayUtils.toPrimitive((Float[]) 
outputValue));
+              break;
+            case DOUBLE:
+              
forwardIndexCreator.putDoubleMV(ArrayUtils.toPrimitive((Double[]) outputValue));
+              break;
+            case STRING:
+              forwardIndexCreator.putStringMV((String[]) outputValue);
+              break;
+            case BYTES:
+              forwardIndexCreator.putBytesMV((byte[][]) outputValue);
+              break;
+            default:
+              throw new IllegalStateException();
+          }
+        }
       }
     }
+
+    // Add the column metadata
+    SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, 
column, indexCreationInfo, numDocs,
+        fieldSpec, false, 0);
+  }
+
+  private ForwardIndexCreator getForwardIndexCreator(FieldSpec fieldSpec, 
ColumnIndexCreationInfo indexCreationInfo,
+      int numDocs, String column, boolean hasDictionary) throws Exception {
+
+    IndexCreationContext indexCreationContext = IndexCreationContext.builder()
+        .withIndexDir(_indexDir)
+        .withFieldSpec(fieldSpec)
+        .withColumnIndexCreationInfo(indexCreationInfo)
+        .withTotalDocs(numDocs)
+        .withDictionary(hasDictionary)
+        .build();
+
+    ForwardIndexConfig forwardIndexConfig = null;
+    FieldIndexConfigs fieldIndexConfig = 
_indexLoadingConfig.getFieldIndexConfig(column);
+    if (fieldIndexConfig != null) {
+      forwardIndexConfig = fieldIndexConfig.getConfig(new 
ForwardIndexPlugin().getIndexType());
+    }
+    if (forwardIndexConfig == null) {
+      forwardIndexConfig = new ForwardIndexConfig(false, null, null, null, 
null, null);
+    }
+
+    return ForwardIndexCreatorFactory.createIndexCreator(indexCreationContext, 
forwardIndexConfig);
   }
 
   @SuppressWarnings("rawtypes")
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
index 4f8dc7e8be..7fa10155ee 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
@@ -65,27 +65,39 @@ public class V3DefaultColumnHandler extends 
BaseDefaultColumnHandler {
     boolean forwardIndexDisabled = !isSingleValue && 
isForwardIndexDisabled(column);
     File forwardIndexFile = null;
     File invertedIndexFile = null;
+
     if (isSingleValue) {
       forwardIndexFile = new File(_indexDir, column + 
V1Constants.Indexes.SORTED_SV_FORWARD_INDEX_FILE_EXTENSION);
       if (!forwardIndexFile.exists()) {
         forwardIndexFile = new File(_indexDir, column + 
V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION);
       }
+      if (!forwardIndexFile.exists()) {
+        forwardIndexFile = new File(_indexDir, column + 
V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
+      }
     } else {
       if (forwardIndexDisabled) {
         // An inverted index is created instead of forward index for 
multi-value columns with forward index disabled
+        // Note that we don't currently support creation of forward index 
disabled derived columns
         invertedIndexFile = new File(_indexDir, column + 
V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
       } else {
         forwardIndexFile = new File(_indexDir, column + 
V1Constants.Indexes.UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION);
+        if (!forwardIndexFile.exists()) {
+          forwardIndexFile = new File(_indexDir, column + 
V1Constants.Indexes.RAW_MV_FORWARD_INDEX_FILE_EXTENSION);
+        }
       }
     }
+
     if (forwardIndexFile != null) {
       LoaderUtils.writeIndexToV3Format(_segmentWriter, column, 
forwardIndexFile, StandardIndexes.forward());
     }
     if (invertedIndexFile != null) {
       LoaderUtils.writeIndexToV3Format(_segmentWriter, column, 
invertedIndexFile, StandardIndexes.inverted());
     }
+
     File dictionaryFile = new File(_indexDir, column + 
V1Constants.Dict.FILE_EXTENSION);
-    LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile, 
StandardIndexes.dictionary());
+    if (dictionaryFile.exists()) {
+      LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile, 
StandardIndexes.dictionary());
+    }
 
     File nullValueVectorFile = new File(_indexDir, column + 
V1Constants.Indexes.NULLVALUE_VECTOR_FILE_EXTENSION);
     if (nullValueVectorFile.exists()) {
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
index acdc679256..80178b3fda 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.segment.index.loader;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Sets;
 import java.io.File;
@@ -141,6 +142,7 @@ public class SegmentPreProcessorTest {
   private static final String NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME = 
"newBooleanSVDimension";
   private static final String NEW_INT_SV_DIMENSION_COLUMN_NAME = 
"newIntSVDimension";
   private static final String NEW_STRING_MV_DIMENSION_COLUMN_NAME = 
"newStringMVDimension";
+  private static final String NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME = 
"newRawStringSVDimension";
   private static final String NEW_HLL_BYTE_METRIC_COLUMN_NAME = 
"newHLLByteMetric";
   private static final String NEW_TDIGEST_BYTE_METRIC_COLUMN_NAME = 
"newTDigestByteMetric";
 
@@ -1101,9 +1103,13 @@ public class SegmentPreProcessorTest {
         Collections.emptyList());
     IngestionConfig ingestionConfig = new IngestionConfig();
     ingestionConfig.setTransformConfigs(
-        Collections.singletonList(new 
TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)")));
+        ImmutableList.of(
+            new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, 
"plus(column1, 1)"),
+            new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, 
"reverse(column3)")
+        ));
     _tableConfig.setIngestionConfig(ingestionConfig);
     _indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX);
+    
_indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME);
     checkUpdateDefaultColumns();
 
     // Try to use the third schema and update default value again.
@@ -1148,9 +1154,13 @@ public class SegmentPreProcessorTest {
 
     IngestionConfig ingestionConfig = new IngestionConfig();
     ingestionConfig.setTransformConfigs(
-        Collections.singletonList(new 
TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)")));
+        ImmutableList.of(
+            new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, 
"plus(column1, 1)"),
+            new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, 
"reverse(column3)")
+        ));
     _tableConfig.setIngestionConfig(ingestionConfig);
     _indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX);
+    
_indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME);
     checkUpdateDefaultColumns();
 
     // Try to use the third schema and update default value again.
@@ -1257,6 +1267,15 @@ public class SegmentPreProcessorTest {
     assertEquals(columnMetadata.getMinValue(), (int) 
originalColumnMetadata.getMinValue() + 1);
     assertEquals(columnMetadata.getMaxValue(), (int) 
originalColumnMetadata.getMaxValue() + 1);
 
+    columnMetadata = 
segmentMetadata.getColumnMetadataFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME);
+    assertEquals(columnMetadata.getFieldSpec(),
+        
_newColumnsSchema1.getFieldSpecFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME));
+    assertTrue(columnMetadata.isAutoGenerated());
+    originalColumnMetadata = segmentMetadata.getColumnMetadataFor("column3");
+    assertEquals(columnMetadata.getCardinality(), 
originalColumnMetadata.getCardinality());
+    assertEquals(columnMetadata.getBitsPerElement(), 
originalColumnMetadata.getBitsPerElement());
+    assertEquals(columnMetadata.getTotalNumberOfEntries(), 
originalColumnMetadata.getTotalNumberOfEntries());
+
     // Check dictionary and forward index exist.
     try (SegmentDirectory segmentDirectory = 
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
         .load(_indexDir.toURI(),
@@ -1276,6 +1295,9 @@ public class SegmentPreProcessorTest {
       assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, 
StandardIndexes.forward()));
       assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, 
StandardIndexes.dictionary()));
       assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, 
StandardIndexes.forward()));
+      // Dictionary shouldn't be created for raw derived column
+      assertFalse(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, 
StandardIndexes.dictionary()));
+      assertTrue(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, 
StandardIndexes.forward()));
 
       assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, 
StandardIndexes.nullValueVector()));
       assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, 
StandardIndexes.nullValueVector()));
diff --git a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json 
b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json
index b6ef01c991..32ba7dcf58 100644
--- a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json
+++ b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json
@@ -32,6 +32,11 @@
       "dataType": "STRING",
       "name": "newStringMVDimension",
       "singleValueField": false
+    },
+    {
+      "dataType": "STRING",
+      "name": "newRawStringSVDimension",
+      "defaultNullValue": "null"
     }
   ]
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to