This is an automated email from the ASF dual-hosted git repository. rongr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 9898b913e0 another appraoch (#9712) 9898b913e0 is described below commit 9898b913e03592200e5743c61b1c18972901346a Author: Rong Rong <ro...@apache.org> AuthorDate: Fri Nov 4 11:40:54 2022 -0700 another appraoch (#9712) Co-authored-by: Rong Rong <ro...@startree.ai> --- .../segment/store/SingleFileIndexDirectory.java | 27 ++++-------- .../apache/pinot/segment/spi/ColumnMetadata.java | 4 ++ .../spi/index/metadata/ColumnMetadataImpl.java | 19 ++++++++- .../spi/index/metadata/SegmentMetadataImpl.java | 23 ++++++++++ .../pinot/segment/spi/store/ColumnIndexUtils.java | 49 ++++++++++++++++++++++ 5 files changed, 100 insertions(+), 22 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java index 744dd59e2f..8b0faacb9c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java @@ -44,6 +44,7 @@ import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl; import org.apache.pinot.segment.spi.memory.PinotDataBuffer; import org.apache.pinot.segment.spi.store.ColumnIndexDirectory; import org.apache.pinot.segment.spi.store.ColumnIndexType; +import org.apache.pinot.segment.spi.store.ColumnIndexUtils; import org.apache.pinot.spi.env.CommonsConfigurationUtils; import org.apache.pinot.spi.utils.ReadMode; import org.slf4j.Logger; @@ -70,9 +71,6 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory { private static final long MAGIC_MARKER = 0xdeadbeefdeafbeadL; private static final int MAGIC_MARKER_SIZE_BYTES = 8; - private static final String MAP_KEY_SEPARATOR = "."; - private static final String MAP_KEY_NAME_START_OFFSET = "startOffset"; - private static final String MAP_KEY_NAME_SIZE = "size"; // Max size of buffer we want to allocate // ByteBuffer limits the size to 2GB - (some platform dependent size) @@ -218,29 +216,17 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory { PropertiesConfiguration mapConfig = CommonsConfigurationUtils.fromFile(mapFile); for (String key : CommonsConfigurationUtils.getKeys(mapConfig)) { - // column names can have '.' in it hence scan from backwards - // parsing names like "column.name.dictionary.startOffset" - // or, "column.name.dictionary.endOffset" where column.name is the key - int lastSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR); - Preconditions - .checkState(lastSeparatorPos != -1, "Key separator not found: " + key + ", segment: " + _segmentDirectory); - String propertyName = key.substring(lastSeparatorPos + 1); - - int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos - 1); - Preconditions.checkState(indexSeparatorPos != -1, - "Index separator not found: " + key + " , segment: " + _segmentDirectory); - String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos); - String columnName = key.substring(0, indexSeparatorPos); - IndexKey indexKey = new IndexKey(columnName, ColumnIndexType.getValue(indexName)); + String[] parsedKeys = ColumnIndexUtils.parseIndexMapKeys(key, _segmentDirectory.getPath()); + IndexKey indexKey = new IndexKey(parsedKeys[0], ColumnIndexType.getValue(parsedKeys[1])); IndexEntry entry = _columnEntries.get(indexKey); if (entry == null) { entry = new IndexEntry(indexKey); _columnEntries.put(indexKey, entry); } - if (propertyName.equals(MAP_KEY_NAME_START_OFFSET)) { + if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_START_OFFSET)) { entry._startOffset = mapConfig.getLong(key); - } else if (propertyName.equals(MAP_KEY_NAME_SIZE)) { + } else if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_SIZE)) { entry._size = mapConfig.getLong(key); } else { throw new ConfigurationException( @@ -439,7 +425,8 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory { } private static String getKey(String column, String indexName, boolean isStartOffset) { - return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ? "startOffset" : "size"); + return column + ColumnIndexUtils.MAP_KEY_SEPARATOR + indexName + ColumnIndexUtils.MAP_KEY_SEPARATOR + + (isStartOffset ? "startOffset" : "size"); } @VisibleForTesting diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java index 00975a3c43..4346d1f744 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java @@ -19,9 +19,11 @@ package org.apache.pinot.segment.spi; import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Map; import java.util.Set; import javax.annotation.Nullable; import org.apache.pinot.segment.spi.partition.PartitionFunction; +import org.apache.pinot.segment.spi.store.ColumnIndexType; import org.apache.pinot.spi.annotations.InterfaceAudience; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; @@ -90,5 +92,7 @@ public interface ColumnMetadata { @Nullable Set<Integer> getPartitions(); + Map<ColumnIndexType, Long> getIndexSizeMap(); + boolean isAutoGenerated(); } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java index 55b076bfbb..6b0b3d72a9 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java @@ -37,6 +37,7 @@ import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment; import org.apache.pinot.segment.spi.partition.PartitionFunction; import org.apache.pinot.segment.spi.partition.PartitionFunctionFactory; import org.apache.pinot.segment.spi.partition.metadata.ColumnPartitionMetadata; +import org.apache.pinot.segment.spi.store.ColumnIndexType; import org.apache.pinot.spi.data.DateTimeFieldSpec; import org.apache.pinot.spi.data.DimensionFieldSpec; import org.apache.pinot.spi.data.FieldSpec; @@ -65,13 +66,14 @@ public class ColumnMetadataImpl implements ColumnMetadata { private final int _totalNumberOfEntries; private final PartitionFunction _partitionFunction; private final Set<Integer> _partitions; + private final Map<ColumnIndexType, Long> _indexSizeMap; private final boolean _autoGenerated; private ColumnMetadataImpl(FieldSpec fieldSpec, int totalDocs, int cardinality, boolean sorted, Comparable<?> minValue, Comparable<?> maxValue, boolean minMaxValueInvalid, boolean hasDictionary, int columnMaxLength, char paddingCharacter, int bitsPerElement, int maxNumberOfMultiValues, int totalNumberOfEntries, @Nullable PartitionFunction partitionFunction, @Nullable Set<Integer> partitions, - boolean autoGenerated) { + Map<ColumnIndexType, Long> indexSizeMap, boolean autoGenerated) { _fieldSpec = fieldSpec; _totalDocs = totalDocs; _cardinality = cardinality; @@ -87,6 +89,7 @@ public class ColumnMetadataImpl implements ColumnMetadata { _totalNumberOfEntries = totalNumberOfEntries; _partitionFunction = partitionFunction; _partitions = partitions; + _indexSizeMap = indexSizeMap; _autoGenerated = autoGenerated; } @@ -166,6 +169,12 @@ public class ColumnMetadataImpl implements ColumnMetadata { return _partitions; } + @Nullable + @Override + public Map<ColumnIndexType, Long> getIndexSizeMap() { + return _indexSizeMap; + } + @Override public boolean isAutoGenerated() { return _autoGenerated; @@ -348,6 +357,7 @@ public class ColumnMetadataImpl implements ColumnMetadata { private PartitionFunction _partitionFunction; private Set<Integer> _partitions; private boolean _autoGenerated; + private Map<ColumnIndexType, Long> _indexSizeMap = new HashMap<>(); public Builder setFieldSpec(FieldSpec fieldSpec) { _fieldSpec = fieldSpec; @@ -424,6 +434,10 @@ public class ColumnMetadataImpl implements ColumnMetadata { return this; } + public void setIndexSizeMap(Map<ColumnIndexType, Long> indexSizeMap) { + _indexSizeMap = indexSizeMap; + } + public Builder setAutoGenerated(boolean autoGenerated) { _autoGenerated = autoGenerated; return this; @@ -432,7 +446,8 @@ public class ColumnMetadataImpl implements ColumnMetadata { public ColumnMetadataImpl build() { return new ColumnMetadataImpl(_fieldSpec, _totalDocs, _cardinality, _sorted, _minValue, _maxValue, _minMaxValueInvalid, _hasDictionary, _columnMaxLength, _paddingCharacter, _bitsPerElement, - _maxNumberOfMultiValues, _totalNumberOfEntries, _partitionFunction, _partitions, _autoGenerated); + _maxNumberOfMultiValues, _totalNumberOfEntries, _partitionFunction, _partitions, _indexSizeMap, + _autoGenerated); } } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java index b17f5d41ea..196ef4ef30 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java @@ -45,10 +45,13 @@ import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.pinot.segment.spi.ColumnMetadata; import org.apache.pinot.segment.spi.SegmentMetadata; +import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment; import org.apache.pinot.segment.spi.creator.SegmentVersion; import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants; import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata; +import org.apache.pinot.segment.spi.store.ColumnIndexType; +import org.apache.pinot.segment.spi.store.ColumnIndexUtils; import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths; import org.apache.pinot.spi.config.table.TimestampIndexGranularity; import org.apache.pinot.spi.data.Schema; @@ -245,6 +248,26 @@ public class SegmentMetadataImpl implements SegmentMetadata { _schema.addField(columnMetadata.getFieldSpec()); } + // Load index metadata + // Support V3 (e.g. SingleFileIndexDirectory only) + if (_segmentVersion == SegmentVersion.v3) { + File indexMapFile = new File(_indexDir, "v3" + File.separator + V1Constants.INDEX_MAP_FILE_NAME); + if (indexMapFile.exists()) { + PropertiesConfiguration mapConfig = CommonsConfigurationUtils.fromFile(indexMapFile); + for (String key : CommonsConfigurationUtils.getKeys(mapConfig)) { + try { + String[] parsedKeys = ColumnIndexUtils.parseIndexMapKeys(key, _indexDir.getPath()); + if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_SIZE)) { + ColumnIndexType columnIndexType = ColumnIndexType.getValue(parsedKeys[1]); + _columnMetadataMap.get(parsedKeys[0]).getIndexSizeMap().put(columnIndexType, mapConfig.getLong(key)); + } + } catch (Exception e) { + LOGGER.debug("Unable to load index metadata in {} for {}!", indexMapFile, key, e); + } + } + } + } + // Build star-tree v2 metadata int starTreeV2Count = segmentMetadataPropertiesConfiguration.getInt(StarTreeV2Constants.MetadataKey.STAR_TREE_COUNT, 0); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java new file mode 100644 index 0000000000..4571b5f85b --- /dev/null +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.spi.store; + +import com.google.common.base.Preconditions; + + +public class ColumnIndexUtils { + public static final String MAP_KEY_SEPARATOR = "."; + public static final String MAP_KEY_NAME_START_OFFSET = "startOffset"; + public static final String MAP_KEY_NAME_SIZE = "size"; + + private ColumnIndexUtils() { + // do not instantiate. + } + + public static String[] parseIndexMapKeys(String key, String segmentDir) { + // column names can have '.' in it hence scan from backwards + // parsing names like "column.name.dictionary.startOffset" + // or, "column.name.dictionary.endOffset" where column.name is the key + int lastSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR); + Preconditions + .checkState(lastSeparatorPos != -1, "Key separator not found: " + key + ", segment: " + segmentDir); + String propertyName = key.substring(lastSeparatorPos + 1); + + int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos - 1); + Preconditions.checkState(indexSeparatorPos != -1, + "Index separator not found: " + key + " , segment: " + segmentDir); + String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos); + String columnName = key.substring(0, indexSeparatorPos); + return new String[]{columnName, indexName, propertyName}; + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org