vvivekiyer commented on code in PR #9868: URL: https://github.com/apache/pinot/pull/9868#discussion_r1035613727
########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java: ########## @@ -500,6 +494,123 @@ public void testEnableDictAndOtherIndexesMV() false, 13, null, false, DataType.INT, 106688); } + @Test + public void testSimpleDisableDictionary() + throws Exception { + // TEST 1. Check running forwardIndexHandler on a V1 segment. No-op for all existing dict columns. + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + checkForwardIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false, true, false, 26, null, true, 0, + DataType.STRING, 100000); + validateIndex(ColumnIndexType.FORWARD_INDEX, COLUMN10_NAME, 3960, 12, _schema, false, true, false, 0, true, 0, null, + false, DataType.INT, 100000); + + // Convert the segment to V3. + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + + // TEST 2: Disable dictionary for EXISTING_STRING_COL_DICT. + _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_DICT); + checkForwardIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false, false, false, 0, ChunkCompressionType.LZ4, + true, 0, DataType.STRING, 100000); + + // TEST 3: Disable dictionary for COLUMN10_NAME + _indexLoadingConfig.getNoDictionaryColumns().add(COLUMN10_NAME); + checkForwardIndexCreation(COLUMN10_NAME, 3960, 12, _schema, false, false, false, 0, ChunkCompressionType.LZ4, true, + 0, DataType.INT, 100000); + } + + @Test + public void testDisableDictAndOtherIndexesSV() + throws Exception { + // Validate No-op. + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + + // TEST 1: Disable dictionary on a column that has inverted index. Should be a no-op and column should still have + // a dictionary. + _indexLoadingConfig.getNoDictionaryColumns().add(COLUMN1_NAME); + checkForwardIndexCreation(COLUMN1_NAME, 51594, 16, _schema, false, true, false, 0, null, true, 0, DataType.INT, + 100000); + + // TEST 2: Disable dictionary. Also remove inverted index on column1. + _indexLoadingConfig.getNoDictionaryColumns().add(COLUMN1_NAME); + _indexLoadingConfig.getInvertedIndexColumns().remove(COLUMN1_NAME); + checkForwardIndexCreation(COLUMN1_NAME, 51594, 16, _schema, false, false, false, 0, null, true, 0, DataType.INT, + 100000); + + // TEST 3: Disable dictionary for a column (Column10) that has range index. Review Comment: I added an additional validation based on rangeIndex size from indexMap. ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java: ########## @@ -677,4 +836,168 @@ private void updateMetadataProperties(File indexDir, Map<String, String> metadat properties.save(); } + + private void disableDictionaryAndCreateRawForwardIndex(String column, SegmentDirectory.Writer segmentWriter, + IndexCreatorProvider indexCreatorProvider) + throws Exception { + ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column); + boolean isSingleValue = existingColMetadata.isSingleValue(); + + File indexDir = _segmentMetadata.getIndexDir(); + String segmentName = _segmentMetadata.getName(); + File inProgress = new File(indexDir, column + ".fwd.inprogress"); + String fileExtension = isSingleValue ? V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION + : V1Constants.Indexes.RAW_MV_FORWARD_INDEX_FILE_EXTENSION; + File fwdIndexFile = new File(indexDir, column + fileExtension); + + if (!inProgress.exists()) { + // Marker file does not exist, which means last run ended normally. + // Create a marker file. + FileUtils.touch(inProgress); + } else { + // Marker file exists, which means last run was interrupted. + // Remove forward index if exists. + FileUtils.deleteQuietly(fwdIndexFile); + } + + LOGGER.info("Creating raw forward index for segment={} and column={}", segmentName, column); + rewriteDictToRawForwardIndex(column, existingColMetadata, segmentWriter, indexDir, indexCreatorProvider); + + // Remove dictionary and forward index + segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX); + segmentWriter.removeIndex(column, ColumnIndexType.DICTIONARY); + LoaderUtils.writeIndexToV3Format(segmentWriter, column, fwdIndexFile, ColumnIndexType.FORWARD_INDEX); + + LOGGER.info("Created raw forwardIndex. Updating metadata properties for segment={} and column={}", segmentName, + column); + Map<String, String> metadataProperties = new HashMap<>(); + metadataProperties.put(getKeyFor(column, HAS_DICTIONARY), String.valueOf(false)); + metadataProperties.put(getKeyFor(column, DICTIONARY_ELEMENT_SIZE), String.valueOf(0)); + updateMetadataProperties(indexDir, metadataProperties); + + // Remove range index, inverted index and FST index. + removeDictRelatedIndexes(column, segmentWriter); + + // Delete marker file. + FileUtils.deleteQuietly(inProgress); + + LOGGER.info("Created raw based forward index for segment: {}, column: {}", segmentName, column); + } + + private void rewriteDictToRawForwardIndex(String column, ColumnMetadata existingColMetadata, + SegmentDirectory.Writer segmentWriter, File indexDir, IndexCreatorProvider indexCreatorProvider) + throws Exception { + try (ForwardIndexReader reader = LoaderUtils.getForwardIndexReader(segmentWriter, existingColMetadata)) { + Dictionary dictionary = LoaderUtils.getDictionary(segmentWriter, existingColMetadata); + // lengthOfLongestEntry not available for dict columns. Should be computed by reading all values in dictionary. Review Comment: Good point. Fixed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org