This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 6208e7cb46 Fix the issue with dict -> noDict (noDict -> dict) + 
Startree creation (#11537)
6208e7cb46 is described below

commit 6208e7cb467dd98216bcceafd14c0feb7b3bba6a
Author: Seunghyun Lee <seungh...@startree.ai>
AuthorDate: Thu Sep 7 12:59:19 2023 -0700

    Fix the issue with dict -> noDict (noDict -> dict) + Startree creation 
(#11537)
    
    * Fix the issue with dict -> noDict (noDict -> dict) + Startree creation
    
    - Currently, our segment preprocessor throws the exception when
      dict -> noDict (or noDict -> dict) + startree creation happens
      together.
    - The root cause of the issue is that startree creation process
      load the segment again while we haven't update the currently
      loaded segment properly (we flush the changes on closing the
      writer).
    - The solution is to first process forward index -> other indices,
      close the segment writer to make sure all the changes are applied,
      open the writer again, and process the startree updates.
    
    * Moving min/max update to happen before startree creation
---
 .../segment/index/loader/SegmentPreProcessor.java  | 16 +++--
 .../index/loader/SegmentPreProcessorTest.java      | 75 +++++++++++++++++++++-
 2 files changed, 82 insertions(+), 9 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java
index 4913088fc5..97fcf710e5 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java
@@ -115,7 +115,6 @@ public class SegmentPreProcessor implements AutoCloseable {
       forwardHandler.updateIndices(segmentWriter);
 
       // Now that ForwardIndexHandler.updateIndices has been updated, we can 
run all other indexes in any order
-
       _segmentMetadata = new SegmentMetadataImpl(indexDir);
       _segmentDirectory.reloadMetadata();
 
@@ -131,16 +130,12 @@ public class SegmentPreProcessor implements AutoCloseable 
{
         }
       }
 
-      // Create/modify/remove star-trees if required.
-      processStarTrees(indexDir);
-
-      // Perform post-cleanup operations on the index handlers. This should be 
called after processing the startrees
+      // Perform post-cleanup operations on the index handlers.
       for (IndexHandler handler : indexHandlers) {
         handler.postUpdateIndicesCleanup(segmentWriter);
       }
 
       // Add min/max value to column metadata according to the prune mode.
-      // For star-tree index, because it can only increase the range, so 
min/max value can still be used in pruner.
       ColumnMinMaxValueGeneratorMode columnMinMaxValueGeneratorMode =
           _indexLoadingConfig.getColumnMinMaxValueGeneratorMode();
       if (columnMinMaxValueGeneratorMode != 
ColumnMinMaxValueGeneratorMode.NONE) {
@@ -153,6 +148,15 @@ public class SegmentPreProcessor implements AutoCloseable {
 
       segmentWriter.save();
     }
+
+    // Startree creation will load the segment again, so we need to close and 
re-open the segment writer to make sure
+    // that the other required indices (e.g. forward index) are up-to-date.
+    try (SegmentDirectory.Writer segmentWriter = 
_segmentDirectory.createWriter()) {
+      // Create/modify/remove star-trees if required.
+      processStarTrees(indexDir);
+      _segmentDirectory.reloadMetadata();
+      segmentWriter.save();
+    }
   }
 
   private IndexHandler createHandler(IndexType<?, ?, ?> type) {
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
index 0bdaefca0a..7b2e7ea9ec 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
@@ -64,6 +64,7 @@ import 
org.apache.pinot.segment.spi.utils.SegmentMetadataUtils;
 import org.apache.pinot.spi.config.table.BloomFilterConfig;
 import org.apache.pinot.spi.config.table.IndexConfig;
 import org.apache.pinot.spi.config.table.IndexingConfig;
+import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
@@ -1774,7 +1775,7 @@ public class SegmentPreProcessorTest {
     FileUtils.deleteQuietly(INDEX_DIR);
 
     // build good segment, no needPreprocess
-    File segment = buildTestSegmentForMinMax(tableConfig, schema, 
"validSegment", stringValuesValid, longValues);
+    File segment = buildTestSegment(tableConfig, schema, "validSegment", 
stringValuesValid, longValues);
     SegmentDirectory segmentDirectory = 
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
         .load(segment.toURI(),
             new 
SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build());
@@ -1785,7 +1786,7 @@ public class SegmentPreProcessorTest {
 
     // build bad segment, still no needPreprocess, since minMaxInvalid flag 
should be set
     FileUtils.deleteQuietly(INDEX_DIR);
-    segment = buildTestSegmentForMinMax(tableConfig, schema, "invalidSegment", 
stringValuesInvalid, longValues);
+    segment = buildTestSegment(tableConfig, schema, "invalidSegment", 
stringValuesInvalid, longValues);
     segmentDirectory = 
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader().load(segment.toURI(),
         new 
SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build());
     indexLoadingConfig = getDefaultIndexLoadingConfig();
@@ -1807,7 +1808,75 @@ public class SegmentPreProcessorTest {
     FileUtils.deleteQuietly(INDEX_DIR);
   }
 
-  private File buildTestSegmentForMinMax(final TableConfig tableConfig, final 
Schema schema, String segmentName,
+  @Test
+  public void testStarTreeCreationWithDictionaryChanges()
+      throws Exception {
+    // Build the sample segment
+    String[] stringValues = {"A", "C", "B", "C", "D", "E", "E", "E"};
+    long[] longValues = {2, 1, 2, 3, 4, 5, 3, 2};
+    TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").build();
+    Schema schema = new 
Schema.SchemaBuilder().addSingleValueDimension("stringCol", 
FieldSpec.DataType.STRING)
+        .addMetric("longCol", DataType.LONG).build();
+    FileUtils.deleteQuietly(INDEX_DIR);
+
+    // Build good segment, no need for preprocess
+    File segment = buildTestSegment(tableConfig, schema, "validSegment", 
stringValues, longValues);
+    SegmentDirectory segmentDirectory = 
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
+        .load(segment.toURI(),
+            new 
SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build());
+
+    IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, 
tableConfig);
+    try (SegmentPreProcessor processor = new 
SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) {
+      assertFalse(processor.needProcess());
+    }
+
+    // Update table config to convert dict to noDict for longCol
+    indexLoadingConfig.setNoDictionaryColumns(Set.of("longCol"));
+    try (SegmentPreProcessor processor = new 
SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) {
+      assertTrue(processor.needProcess());
+      processor.process();
+    }
+
+    // Update table config to convert noDict to dict for longCol
+    indexLoadingConfig.setNoDictionaryColumns(Set.of());
+    try (SegmentPreProcessor processor = new 
SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) {
+      assertTrue(processor.needProcess());
+      processor.process();
+    }
+
+    // Update table config to convert dict to noDict for longCol and add the 
Startree index config
+    StarTreeIndexConfig starTreeIndexConfig =
+        new StarTreeIndexConfig(List.of("stringCol"), null, 
List.of("SUM__longCol"), 1000);
+    
tableConfig.getIndexingConfig().setStarTreeIndexConfigs(List.of(starTreeIndexConfig));
+    tableConfig.getIndexingConfig().setEnableDynamicStarTreeCreation(true);
+    tableConfig.getIndexingConfig().setNoDictionaryColumns(List.of("longCol"));
+    indexLoadingConfig = new IndexLoadingConfig(null, tableConfig);
+    try (SegmentPreProcessor processor = new 
SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) {
+      assertTrue(processor.needProcess());
+      processor.process();
+    }
+
+    // Remove Startree index but keep the no dictionary for longCol
+    tableConfig.getIndexingConfig().setStarTreeIndexConfigs(List.of());
+    tableConfig.getIndexingConfig().setEnableDynamicStarTreeCreation(true);
+    indexLoadingConfig = new IndexLoadingConfig(null, tableConfig);
+    try (SegmentPreProcessor processor = new 
SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) {
+      assertTrue(processor.needProcess());
+      processor.process();
+    }
+
+    // Update table config to convert noDict to dict for longCol and also add 
the Startree index
+    
tableConfig.getIndexingConfig().setStarTreeIndexConfigs(List.of(starTreeIndexConfig));
+    tableConfig.getIndexingConfig().setEnableDynamicStarTreeCreation(true);
+    tableConfig.getIndexingConfig().setNoDictionaryColumns(List.of());
+    indexLoadingConfig = new IndexLoadingConfig(null, tableConfig);
+    try (SegmentPreProcessor processor = new 
SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) {
+      assertTrue(processor.needProcess());
+      processor.process();
+    }
+  }
+
+  private File buildTestSegment(final TableConfig tableConfig, final Schema 
schema, String segmentName,
       String[] stringValues, long[] longValues)
       throws Exception {
     SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, 
schema);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to