npawar commented on code in PR #10184: URL: https://github.com/apache/pinot/pull/10184#discussion_r1153911248
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java: ########## @@ -40,12 +40,15 @@ public class BitSlicedRangeIndexCreator implements CombinedInvertedIndexCreator private final RangeBitmap.Appender _appender; private final File _rangeIndexFile; private final long _minValue; + private final FieldSpec.DataType _valueType; Review Comment: nit: s/valueType/dataType ? ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java: ########## @@ -138,7 +138,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet private static final String COLUMN_LENGTH_MAP_KEY = "columnLengthMap"; private static final String COLUMN_CARDINALITY_MAP_KEY = "columnCardinalityMap"; private static final String MAX_NUM_MULTI_VALUES_MAP_KEY = "maxNumMultiValuesMap"; - private static final int DISK_SIZE_IN_BYTES = 20797327; Review Comment: how did we increase metadata in the process of this? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SameValueForwardIndexCreator.java: ########## @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.creator.impl; + +import java.io.IOException; +import java.math.BigDecimal; +import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator; +import org.apache.pinot.spi.data.FieldSpec; + + +class SameValueForwardIndexCreator implements ForwardIndexCreator { + + private final Object _actualValue; + private final ForwardIndexCreator _delegate; + + public SameValueForwardIndexCreator(Object value, ForwardIndexCreator delegate) { + _actualValue = value; + _delegate = delegate; + } + + @Override + public void seal() + throws IOException { + _delegate.seal(); + } + + @Override + public boolean isDictionaryEncoded() { + return false; + } + + @Override + public boolean isSingleValue() { + return _delegate.isSingleValue(); + } + + @Override + public FieldSpec.DataType getValueType() { + return _delegate.getValueType(); + } + + @Override + public void putInt(int value) { + _delegate.putInt((int) _actualValue); Review Comment: given this was introduced in this PR - can we add some java doc? it confused me too ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -134,183 +121,142 @@ public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreatio return; } - Collection<FieldSpec> fieldSpecs = schema.getAllFieldSpecs(); - Set<String> invertedIndexColumns = new HashSet<>(); - for (String columnName : _config.getInvertedIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create inverted index for column: %s because it is not in schema", columnName); - invertedIndexColumns.add(columnName); - } + Map<String, FieldIndexConfigs> indexConfigs = segmentCreationSpec.getIndexConfigsByColName(); - Set<String> bloomFilterColumns = new HashSet<>(); - for (String columnName : _config.getBloomFilterCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create bloom filter for column: %s because it is not in schema", columnName); - bloomFilterColumns.add(columnName); - } - - Set<String> rangeIndexColumns = new HashSet<>(); - for (String columnName : _config.getRangeIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create range index for column: %s because it is not in schema", columnName); - rangeIndexColumns.add(columnName); - } - - Set<String> textIndexColumns = new HashSet<>(); - for (String columnName : _config.getTextIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create text index for column: %s because it is not in schema", columnName); - textIndexColumns.add(columnName); - } - - Set<String> fstIndexColumns = new HashSet<>(); - for (String columnName : _config.getFSTIndexCreationColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create FST index for column: %s because it is not in schema", columnName); - fstIndexColumns.add(columnName); - } - - Map<String, JsonIndexConfig> jsonIndexConfigs = _config.getJsonIndexConfigs(); - for (String columnName : jsonIndexConfigs.keySet()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create json index for column: %s because it is not in schema", columnName); - } - - Set<String> forwardIndexDisabledColumns = new HashSet<>(); - for (String columnName : _config.getForwardIndexDisabledColumns()) { - Preconditions.checkState(schema.hasColumn(columnName), String.format("Invalid config. Can't disable " - + "forward index creation for a column: %s that does not exist in schema", columnName)); - forwardIndexDisabledColumns.add(columnName); - } - - Map<String, H3IndexConfig> h3IndexConfigs = _config.getH3IndexConfigs(); - for (String columnName : h3IndexConfigs.keySet()) { - Preconditions.checkState(schema.hasColumn(columnName), - "Cannot create H3 index for column: %s because it is not in schema", columnName); - } + _creatorsByColAndIndex = Maps.newHashMapWithExpectedSize(indexConfigs.keySet().size()); - // Initialize creators for dictionary, forward index and inverted index - IndexingConfig indexingConfig = _config.getTableConfig().getIndexingConfig(); - int rangeIndexVersion = indexingConfig.getRangeIndexVersion(); - for (FieldSpec fieldSpec : fieldSpecs) { - // Ignore virtual columns + for (String columnName : indexConfigs.keySet()) { + FieldSpec fieldSpec = schema.getFieldSpecFor(columnName); + if (fieldSpec == null) { + Preconditions.checkState(schema.hasColumn(columnName), Review Comment: do we need a precondition check again, if fieldSpec was null? i understand this is checking key, vs that one value, but can column exist with null fieldspec in that map? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java: ########## @@ -102,6 +118,8 @@ public class IndexLoadingConfig { private String _instanceId; private Map<String, Map<String, String>> _instanceTierConfigs; + private boolean _dirty = true; Review Comment: i know this class is fated to just be removed, but some comments in code for dirty and knownColumns would really help, in case it takes you some time to get to the demolition. it is pretty confusing without this context ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/RangeIndexConfig.java: ########## @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pinot.segment.spi.index; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import javax.annotation.Nullable; +import org.apache.pinot.spi.config.table.IndexConfig; + + +public class RangeIndexConfig extends IndexConfig { + public static final RangeIndexConfig DEFAULT = new RangeIndexConfig(true, 2); + public static final RangeIndexConfig DISABLED = new RangeIndexConfig(false, null); + + private final int _version; + + public RangeIndexConfig(int version) { + this(true, version); + } + + @JsonCreator + public RangeIndexConfig(@JsonProperty("enabled") Boolean enabled, + @JsonProperty("version") @Nullable Integer version) { + super(enabled != null && enabled && version != null); + _version = version != null ? version : 2; Review Comment: can we take this from the creator/reader or define it here as current version, instead of hardcoding? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/VirtualColumnIndexContainer.java: ########## @@ -46,53 +44,18 @@ public VirtualColumnIndexContainer(ForwardIndexReader<?> forwardIndex, InvertedI _dictionary = dictionary; } + @Nullable @Override - public ForwardIndexReader<?> getForwardIndex() { - return _forwardIndex; - } - - @Override - public InvertedIndexReader<?> getInvertedIndex() { - return _invertedIndex; - } - - @Override - public RangeIndexReader<?> getRangeIndex() { - return null; - } - - @Override - public TextIndexReader getTextIndex() { - return null; - } - - @Override - public TextIndexReader getFSTIndex() { - return null; - } - - @Override - public JsonIndexReader getJsonIndex() { - return null; - } - - @Override - public H3IndexReader getH3Index() { - return null; - } - - @Override - public Dictionary getDictionary() { - return _dictionary; - } - - @Override - public BloomFilterReader getBloomFilter() { - return null; - } - - @Override - public NullValueVectorReader getNullValueVector() { + public <I extends IndexReader, T extends IndexType<?, I, ?>> I getIndex(T indexType) { + if (indexType.equals(StandardIndexes.forward())) { + return (I) _forwardIndex; + } + if (indexType.equals(StandardIndexes.inverted())) { Review Comment: huh i didn't know we could create inverted index on an inverted column! ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java: ########## @@ -40,27 +46,42 @@ import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.data.Schema; -public class BloomIndexType implements IndexType<BloomFilterConfig, BloomFilterReader, BloomFilterCreator> { - public static final BloomIndexType INSTANCE = new BloomIndexType(); - @Override - public String getId() { - return StandardIndexes.BLOOM_FILTER_ID; +public class BloomIndexType + extends AbstractIndexType<BloomFilterConfig, BloomFilterReader, BloomFilterCreator> + implements ConfigurableFromIndexLoadingConfig<BloomFilterConfig> { + + protected BloomIndexType() { + super(StandardIndexes.BLOOM_FILTER_ID); } @Override public Class<BloomFilterConfig> getIndexConfigClass() { return BloomFilterConfig.class; } + @Override + public Map<String, BloomFilterConfig> fromIndexLoadingConfig(IndexLoadingConfig indexLoadingConfig) { + return indexLoadingConfig.getBloomFilterConfigs(); + } + @Override public BloomFilterConfig getDefaultConfig() { return BloomFilterConfig.DISABLED; } @Override - public BloomFilterConfig getConfig(TableConfig tableConfig, Schema schema) { - throw new UnsupportedOperationException("To be implemented in a future PR"); + public ColumnConfigDeserializer<BloomFilterConfig> createDeserializer() { + return IndexConfigDeserializer.fromIndexes("bloom", getIndexConfigClass()) Review Comment: isn't there a constant you created somewhre that you can use here? (for all *IndexType) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org