klsince commented on code in PR #10184: URL: https://github.com/apache/pinot/pull/10184#discussion_r1147885247
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -133,183 +122,144 @@ public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreatio return; } - Collection<FieldSpec> fieldSpecs = schema.getAllFieldSpecs(); - Set<String> invertedIndexColumns = new HashSet<>(); - for (String columnName : _config.getInvertedIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create inverted index for column: %s because it is not in schema", columnName); - invertedIndexColumns.add(columnName); - } + Map<String, FieldIndexConfigs> indexConfigs = segmentCreationSpec.getIndexConfigsByColName(); - Set<String> bloomFilterColumns = new HashSet<>(); - for (String columnName : _config.getBloomFilterCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create bloom filter for column: %s because it is not in schema", columnName); - bloomFilterColumns.add(columnName); - } - - Set<String> rangeIndexColumns = new HashSet<>(); - for (String columnName : _config.getRangeIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create range index for column: %s because it is not in schema", columnName); - rangeIndexColumns.add(columnName); - } - - Set<String> textIndexColumns = new HashSet<>(); - for (String columnName : _config.getTextIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create text index for column: %s because it is not in schema", columnName); - textIndexColumns.add(columnName); - } - - Set<String> fstIndexColumns = new HashSet<>(); - for (String columnName : _config.getFSTIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create FST index for column: %s because it is not in schema", columnName); - fstIndexColumns.add(columnName); - } - - Map<String, JsonIndexConfig> jsonIndexConfigs = _config.getJsonIndexConfigs(); - for (String columnName : jsonIndexConfigs.keySet()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create json index for column: %s because it is not in schema", columnName); - } - - Set<String> forwardIndexDisabledColumns = new HashSet<>(); - for (String columnName : _config.getForwardIndexDisabledColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), String.format("Invalid config. Can't disable " - + "forward index creation for a column: %s that does not exist in schema", columnName)); - forwardIndexDisabledColumns.add(columnName); - } - - Map<String, H3IndexConfig> h3IndexConfigs = _config.getH3IndexConfigs(); - for (String columnName : h3IndexConfigs.keySet()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create H3 index for column: %s because it is not in schema", columnName); - } + _creatorsByColAndIndex = Maps.newHashMapWithExpectedSize(indexConfigs.keySet().size()); - // Initialize creators for dictionary, forward index and inverted index - IndexingConfig indexingConfig = _config.getTableConfig().getIndexingConfig(); - int rangeIndexVersion = indexingConfig.getRangeIndexVersion(); - for (FieldSpec fieldSpec : fieldSpecs) { - // Ignore virtual columns + for (String columnName : indexConfigs.keySet()) { + FieldSpec fieldSpec = schema.getFieldSpecFor(columnName); + if (fieldSpec == null) { + Preconditions.checkState(schema.hasColumn(columnName), + "Cannot create inverted index for column: %s because it is not in schema", columnName); Review Comment: adjust error msg? as I didn't see why it's specific to inverted index here. ########## pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java: ########## @@ -1542,7 +1556,7 @@ public void testLuceneRealtimeWithoutSearcherManager() indexReader3.close(); } - @Test + //@Test Review Comment: un-comment the test? ########## pinot-compatibility-verifier/pom.xml: ########## @@ -92,6 +92,11 @@ <version>${project.version}</version> <type>test-jar</type> </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> Review Comment: nit: looks like TestUtils.java, this pom.xml and StreamOp.java can merge firstly, to trim this PR a little bit.. ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java: ########## @@ -75,9 +81,8 @@ public void build(@Nullable SegmentVersion segmentVersion, ServerMetrics serverM // realtime segment generation genConfig.setSegmentTimeValueCheck(false); if (_columnIndicesForRealtimeTable.getInvertedIndexColumns() != null) { - for (String column : _columnIndicesForRealtimeTable.getInvertedIndexColumns()) { - genConfig.createInvertedIndexForColumn(column); - } + genConfig.setIndexOn(InvertedIndexType.INSTANCE, IndexConfig.ENABLED, Review Comment: Q: when should one use StandardIndexes.inverted() vs. InvertedIndexType.INSTANCE? I see both methods mixed in this class. I assume StandardIndexes.inverted() is preferred. ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java: ########## @@ -166,6 +165,8 @@ public static ImmutableSegment load(SegmentDirectory segmentDirectory, IndexLoad segmentMetadata.removeColumn(column); } } + } else { + indexLoadingConfig.addKnownColumns(columnMetadataMap.keySet()); Review Comment: curious why need to track `_knownColumns` in indexLoadingConfig now? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentDictionaryCreator.java: ########## @@ -62,9 +64,20 @@ public class SegmentDictionaryCreator implements Closeable { public SegmentDictionaryCreator(FieldSpec fieldSpec, File indexDir, boolean useVarLengthDictionary) { _columnName = fieldSpec.getName(); _storedType = fieldSpec.getDataType().getStoredType(); - _dictionaryFile = new File(indexDir, _columnName + V1Constants.Dict.FILE_EXTENSION); + _dictionaryFile = new File(indexDir, _columnName + DictionaryIndexType.INSTANCE.getFileExtension()); _useVarLengthDictionary = useVarLengthDictionary; } + @Override + public void add(@Nonnull Object value, int dictId) + throws IOException { + throw new UnsupportedOperationException("Dictionary indexes should not be build as a normal index"); Review Comment: s/Dictionary indexes/Dictionary would help understand here to comment why/how dict is built differently, not using add(). I kinda remember this was discussed in the PEP design doc, but still better leave some comments here as code iterates much faster than doc. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org