This is an automated email from the ASF dual-hosted git repository. snlee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 6208e7cb46 Fix the issue with dict -> noDict (noDict -> dict) + Startree creation (#11537) 6208e7cb46 is described below commit 6208e7cb467dd98216bcceafd14c0feb7b3bba6a Author: Seunghyun Lee <seungh...@startree.ai> AuthorDate: Thu Sep 7 12:59:19 2023 -0700 Fix the issue with dict -> noDict (noDict -> dict) + Startree creation (#11537) * Fix the issue with dict -> noDict (noDict -> dict) + Startree creation - Currently, our segment preprocessor throws the exception when dict -> noDict (or noDict -> dict) + startree creation happens together. - The root cause of the issue is that startree creation process load the segment again while we haven't update the currently loaded segment properly (we flush the changes on closing the writer). - The solution is to first process forward index -> other indices, close the segment writer to make sure all the changes are applied, open the writer again, and process the startree updates. * Moving min/max update to happen before startree creation --- .../segment/index/loader/SegmentPreProcessor.java | 16 +++-- .../index/loader/SegmentPreProcessorTest.java | 75 +++++++++++++++++++++- 2 files changed, 82 insertions(+), 9 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java index 4913088fc5..97fcf710e5 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java @@ -115,7 +115,6 @@ public class SegmentPreProcessor implements AutoCloseable { forwardHandler.updateIndices(segmentWriter); // Now that ForwardIndexHandler.updateIndices has been updated, we can run all other indexes in any order - _segmentMetadata = new SegmentMetadataImpl(indexDir); _segmentDirectory.reloadMetadata(); @@ -131,16 +130,12 @@ public class SegmentPreProcessor implements AutoCloseable { } } - // Create/modify/remove star-trees if required. - processStarTrees(indexDir); - - // Perform post-cleanup operations on the index handlers. This should be called after processing the startrees + // Perform post-cleanup operations on the index handlers. for (IndexHandler handler : indexHandlers) { handler.postUpdateIndicesCleanup(segmentWriter); } // Add min/max value to column metadata according to the prune mode. - // For star-tree index, because it can only increase the range, so min/max value can still be used in pruner. ColumnMinMaxValueGeneratorMode columnMinMaxValueGeneratorMode = _indexLoadingConfig.getColumnMinMaxValueGeneratorMode(); if (columnMinMaxValueGeneratorMode != ColumnMinMaxValueGeneratorMode.NONE) { @@ -153,6 +148,15 @@ public class SegmentPreProcessor implements AutoCloseable { segmentWriter.save(); } + + // Startree creation will load the segment again, so we need to close and re-open the segment writer to make sure + // that the other required indices (e.g. forward index) are up-to-date. + try (SegmentDirectory.Writer segmentWriter = _segmentDirectory.createWriter()) { + // Create/modify/remove star-trees if required. + processStarTrees(indexDir); + _segmentDirectory.reloadMetadata(); + segmentWriter.save(); + } } private IndexHandler createHandler(IndexType<?, ?, ?> type) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java index 0bdaefca0a..7b2e7ea9ec 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java @@ -64,6 +64,7 @@ import org.apache.pinot.segment.spi.utils.SegmentMetadataUtils; import org.apache.pinot.spi.config.table.BloomFilterConfig; import org.apache.pinot.spi.config.table.IndexConfig; import org.apache.pinot.spi.config.table.IndexingConfig; +import org.apache.pinot.spi.config.table.StarTreeIndexConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; import org.apache.pinot.spi.config.table.ingestion.IngestionConfig; @@ -1774,7 +1775,7 @@ public class SegmentPreProcessorTest { FileUtils.deleteQuietly(INDEX_DIR); // build good segment, no needPreprocess - File segment = buildTestSegmentForMinMax(tableConfig, schema, "validSegment", stringValuesValid, longValues); + File segment = buildTestSegment(tableConfig, schema, "validSegment", stringValuesValid, longValues); SegmentDirectory segmentDirectory = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader() .load(segment.toURI(), new SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build()); @@ -1785,7 +1786,7 @@ public class SegmentPreProcessorTest { // build bad segment, still no needPreprocess, since minMaxInvalid flag should be set FileUtils.deleteQuietly(INDEX_DIR); - segment = buildTestSegmentForMinMax(tableConfig, schema, "invalidSegment", stringValuesInvalid, longValues); + segment = buildTestSegment(tableConfig, schema, "invalidSegment", stringValuesInvalid, longValues); segmentDirectory = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader().load(segment.toURI(), new SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build()); indexLoadingConfig = getDefaultIndexLoadingConfig(); @@ -1807,7 +1808,75 @@ public class SegmentPreProcessorTest { FileUtils.deleteQuietly(INDEX_DIR); } - private File buildTestSegmentForMinMax(final TableConfig tableConfig, final Schema schema, String segmentName, + @Test + public void testStarTreeCreationWithDictionaryChanges() + throws Exception { + // Build the sample segment + String[] stringValues = {"A", "C", "B", "C", "D", "E", "E", "E"}; + long[] longValues = {2, 1, 2, 3, 4, 5, 3, 2}; + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").build(); + Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("stringCol", FieldSpec.DataType.STRING) + .addMetric("longCol", DataType.LONG).build(); + FileUtils.deleteQuietly(INDEX_DIR); + + // Build good segment, no need for preprocess + File segment = buildTestSegment(tableConfig, schema, "validSegment", stringValues, longValues); + SegmentDirectory segmentDirectory = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader() + .load(segment.toURI(), + new SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build()); + + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, tableConfig); + try (SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) { + assertFalse(processor.needProcess()); + } + + // Update table config to convert dict to noDict for longCol + indexLoadingConfig.setNoDictionaryColumns(Set.of("longCol")); + try (SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) { + assertTrue(processor.needProcess()); + processor.process(); + } + + // Update table config to convert noDict to dict for longCol + indexLoadingConfig.setNoDictionaryColumns(Set.of()); + try (SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) { + assertTrue(processor.needProcess()); + processor.process(); + } + + // Update table config to convert dict to noDict for longCol and add the Startree index config + StarTreeIndexConfig starTreeIndexConfig = + new StarTreeIndexConfig(List.of("stringCol"), null, List.of("SUM__longCol"), 1000); + tableConfig.getIndexingConfig().setStarTreeIndexConfigs(List.of(starTreeIndexConfig)); + tableConfig.getIndexingConfig().setEnableDynamicStarTreeCreation(true); + tableConfig.getIndexingConfig().setNoDictionaryColumns(List.of("longCol")); + indexLoadingConfig = new IndexLoadingConfig(null, tableConfig); + try (SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) { + assertTrue(processor.needProcess()); + processor.process(); + } + + // Remove Startree index but keep the no dictionary for longCol + tableConfig.getIndexingConfig().setStarTreeIndexConfigs(List.of()); + tableConfig.getIndexingConfig().setEnableDynamicStarTreeCreation(true); + indexLoadingConfig = new IndexLoadingConfig(null, tableConfig); + try (SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) { + assertTrue(processor.needProcess()); + processor.process(); + } + + // Update table config to convert noDict to dict for longCol and also add the Startree index + tableConfig.getIndexingConfig().setStarTreeIndexConfigs(List.of(starTreeIndexConfig)); + tableConfig.getIndexingConfig().setEnableDynamicStarTreeCreation(true); + tableConfig.getIndexingConfig().setNoDictionaryColumns(List.of()); + indexLoadingConfig = new IndexLoadingConfig(null, tableConfig); + try (SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectory, indexLoadingConfig, schema)) { + assertTrue(processor.needProcess()); + processor.process(); + } + } + + private File buildTestSegment(final TableConfig tableConfig, final Schema schema, String segmentName, String[] stringValues, long[] longValues) throws Exception { SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org