Jackie-Jiang commented on a change in pull request #7286: URL: https://github.com/apache/pinot/pull/7286#discussion_r696206001
########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java ########## @@ -57,55 +57,59 @@ private ImmutableSegmentLoader() { private static final Logger LOGGER = LoggerFactory.getLogger(ImmutableSegmentLoader.class); /** - * For tests only. + * load with empty schema and IndexLoadingConfig is used to get a handler of the segment for Review comment: (nit) Capitalize it, same for other javadocs ```suggestion * Loads the segment with... ``` ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java ########## @@ -164,6 +168,29 @@ public static ImmutableSegment load(File indexDir, IndexLoadingConfig indexLoadi return segment; } + private static void tryToConvertSegment(File indexDir, IndexLoadingConfig indexLoadingConfig, + SegmentMetadataImpl localSegmentMetadata) + throws Exception { + SegmentVersion segmentVersionToLoad = indexLoadingConfig.getSegmentVersion(); + if (segmentVersionToLoad == null || SegmentDirectoryPaths.segmentDirectoryFor(indexDir, segmentVersionToLoad) + .isDirectory()) { + return; + } + SegmentVersion segmentVersionOnDisk = localSegmentMetadata.getVersion(); + if (segmentVersionOnDisk == segmentVersionToLoad) { + return; + } + String segmentName = indexDir.getName(); + LOGGER.info("Segment: {} needs to be converted from version: {} to {}", segmentName, segmentVersionOnDisk, + segmentVersionToLoad); + SegmentFormatConverter converter = + SegmentFormatConverterFactory.getConverter(segmentVersionOnDisk, segmentVersionToLoad); + LOGGER.info("Using converter: {} to up-convert segment: {}", converter.getClass().getName(), segmentName); + converter.convert(indexDir); + LOGGER.info("Successfully up-converted segment: {} from version: {} to {}", segmentName, segmentVersionOnDisk, + segmentVersionToLoad); + } + private static void preprocessSegment(File indexDir, IndexLoadingConfig indexLoadingConfig, Schema schema) Review comment: Annotate schema as nullable ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java ########## @@ -57,55 +57,59 @@ private ImmutableSegmentLoader() { private static final Logger LOGGER = LoggerFactory.getLogger(ImmutableSegmentLoader.class); /** - * For tests only. + * load with empty schema and IndexLoadingConfig is used to get a handler of the segment for Review comment: This method should be called when we just want to load the segment without modifying it. Let's make it clear in the javadoc. ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java ########## @@ -164,6 +168,29 @@ public static ImmutableSegment load(File indexDir, IndexLoadingConfig indexLoadi return segment; } + private static void tryToConvertSegment(File indexDir, IndexLoadingConfig indexLoadingConfig, Review comment: Rename it to `convertSegmentFormatIfNeeded` ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/bloomfilter/BloomFilterHandler.java ########## @@ -67,12 +67,20 @@ public BloomFilterHandler(File indexDir, SegmentMetadataImpl segmentMetadata, In _segmentVersion = segmentMetadata.getVersion(); _bloomFilterConfigs = indexLoadingConfig.getBloomFilterConfigs(); - for (String column : _bloomFilterConfigs.keySet()) { + Set<String> columnsInCfg = _bloomFilterConfigs.keySet(); + for (String column : columnsInCfg) { ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column); if (columnMetadata != null) { _bloomFilterColumns.add(columnMetadata); } } + // Remove indices not set in table config any more + Set<String> localColumns = _segmentWriter.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.BLOOM_FILTER); + for (String column : localColumns) { + if (!columnsInCfg.contains(column)) { + _segmentWriter.removeIndex(column, ColumnIndexType.BLOOM_FILTER); + } + } Review comment: Exclude local columns from the columns to generate. Same for other handlers ```suggestion Set<String> existingColumns = _segmentWriter.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.BLOOM_FILTER); Set<String> columnsToAdd = new HashSet<>(_bloomFilterConfigs.keySet()); for (String column : existingColumns) { if (!columnsToAdd.remove(column)) { _segmentWriter.removeIndex(column, ColumnIndexType.BLOOM_FILTER); } } for (String column : columnsToAdd) { ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column); if (columnMetadata != null) { _bloomFilterColumns.add(columnMetadata); } } ``` ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java ########## @@ -386,6 +386,15 @@ public void removeIndex(String columnName, ColumnIndexType indexType) { @Override public Set<String> getColumnsWithIndex(ColumnIndexType type) { Set<String> columns = new HashSet<>(); + // TEXT_INDEX is not tracked via _columnEntries, so handled separately. Review comment: Move to the bug fix PR ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java ########## @@ -112,31 +111,23 @@ public void process() rangeIndexHandler.createRangeIndices(); // Create text indices according to the index config. - Set<String> textIndexColumns = _indexLoadingConfig.getTextIndexColumns(); - if (!textIndexColumns.isEmpty()) { - TextIndexHandler textIndexHandler = - new TextIndexHandler(_indexDir, _segmentMetadata, textIndexColumns, segmentWriter); - textIndexHandler.createTextIndexesOnSegmentLoad(); - } + TextIndexHandler textIndexHandler = + new TextIndexHandler(_indexDir, _segmentMetadata, _indexLoadingConfig, segmentWriter); + textIndexHandler.createTextIndexesOnSegmentLoad(); Review comment: We should probably rename the method because it also removes the indices. Probably `updateIndices()`? Same for other handlers ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java ########## @@ -47,39 +45,33 @@ protected boolean updateDefaultColumn(String column, DefaultColumnAction action) throws Exception { LOGGER.info("Starting default column action: {} on column: {}", action, column); - // For V3 segment format, only support ADD action - // For UPDATE and REMOVE action, throw exception to drop and re-download the segment - if (action.isUpdateAction()) { - throw new V3UpdateIndexException( - "Default value indices for column: " + column + " cannot be updated for V3 format segment."); + // For UPDATE and REMOVE action, delete existing dictionary and forward index, and remove column metadata + if (action.isUpdateAction() || action.isRemoveAction()) { + removeColumnIndices(column); } - - if (action.isRemoveAction()) { - throw new V3RemoveIndexException( - "Default value indices for column: " + column + " cannot be removed for V3 format segment."); + if (!action.isAddAction() && !action.isUpdateAction()) { Review comment: ```suggestion if (action.isRemoveAction()) { ``` ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java ########## @@ -108,10 +108,13 @@ public void removeIndex(String columnName, ColumnIndexType indexType) { @Override public Set<String> getColumnsWithIndex(ColumnIndexType type) { + // _indexBuffers is just a cache of index files, thus not reliable as Review comment: Shall we make these bug fixes in a separate PR for easier review? We can merge the bug fix first ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/JsonIndexHandler.java ########## @@ -104,8 +112,9 @@ private void createJsonIndexForColumn(ColumnMetadata columnMetadata) // Create new json index for the column. LOGGER.info("Creating new json index for segment: {}, column: {}", _segmentName, columnName); - Preconditions.checkState(columnMetadata.isSingleValue() && columnMetadata.getDataType() == DataType.STRING, - "Json index can only be applied to single-value STRING columns"); + Preconditions.checkState(columnMetadata.isSingleValue() && (columnMetadata.getDataType() == DataType.STRING Review comment: Move to a separate bug fix PR -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org