This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new ec1d187475 Exclude dimensions from star-tree index stored type check (#13355) ec1d187475 is described below commit ec1d1874758a7a3ef30402c12de62e789223f8a0 Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Tue Jun 11 10:16:58 2024 -0700 Exclude dimensions from star-tree index stored type check (#13355) --- .../pinot/core/startree/v2/BaseStarTreeV2Test.java | 58 +++++++------- .../segment/store/SegmentLocalFSDirectory.java | 3 +- .../local/segment/store/StarTreeIndexReader.java | 22 ++---- .../startree/v2/builder/MultipleTreesBuilder.java | 16 ++-- .../v2/builder/StarTreeIndexSeparator.java | 55 ++++---------- .../startree/v2/store/StarTreeIndexMapUtils.java | 32 ++++---- .../startree/v2/store/StarTreeLoaderUtils.java | 1 + .../store/ColumnIndexDirectoryTestHelper.java | 1 + .../v2/builder/StarTreeIndexSeparatorTest.java | 88 ++++++++-------------- .../apache/pinot/segment/spi/SegmentMetadata.java | 1 + .../spi/index/metadata/SegmentMetadataImpl.java | 1 + 11 files changed, 115 insertions(+), 163 deletions(-) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java index 847d9e1568..732b8bc570 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java @@ -95,40 +95,43 @@ abstract class BaseStarTreeV2Test<R, A> { private static final int MAX_LEAF_RECORDS = RANDOM.nextInt(100) + 1; // Using column names with '__' to make sure regular table columns with '__' in the name aren't wrongly interpreted // as AggregationFunctionColumnPair - private static final String DIMENSION_D1 = "d1__COLUMN_NAME"; - private static final String DIMENSION_D2 = "__d2"; + private static final String DIMENSION1 = "d1__COLUMN_NAME"; + private static final String DIMENSION2 = "DISTINCTCOUNTRAWHLL__d2"; private static final int DIMENSION_CARDINALITY = 100; private static final String METRIC = "m"; // Supported filters - private static final String QUERY_FILTER_AND = " WHERE d1__COLUMN_NAME = 0 AND __d2 < 10"; + private static final String QUERY_FILTER_AND = String.format(" WHERE %1$s = 0 AND %2$s < 10", DIMENSION1, DIMENSION2); // StarTree supports OR predicates only on a single dimension - private static final String QUERY_FILTER_OR = " WHERE d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50"; - private static final String QUERY_FILTER_NOT = " WHERE NOT d1__COLUMN_NAME > 10"; - private static final String QUERY_FILTER_AND_NOT = " WHERE d1__COLUMN_NAME > 10 AND NOT __d2 < 10"; - private static final String QUERY_FILTER_OR_NOT = " WHERE d1__COLUMN_NAME > 50 OR NOT d1__COLUMN_NAME > 10"; - private static final String QUERY_NOT_NOT = " WHERE NOT NOT d1__COLUMN_NAME > 10"; + private static final String QUERY_FILTER_OR = String.format(" WHERE %1$s > 10 OR %1$s < 50", DIMENSION1); + private static final String QUERY_FILTER_NOT = String.format(" WHERE NOT %s > 10", DIMENSION1); + private static final String QUERY_FILTER_AND_NOT = + String.format(" WHERE %1$s > 10 AND NOT %2$s < 10", DIMENSION1, DIMENSION2); + private static final String QUERY_FILTER_OR_NOT = String.format(" WHERE %1$s > 50 OR NOT %1$s > 10", DIMENSION1); + private static final String QUERY_NOT_NOT = String.format(" WHERE NOT NOT %s > 10", DIMENSION1); private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS = - " WHERE __d2 < 95 AND (NOT d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME > 50)"; + String.format(" WHERE %2$s < 95 AND (NOT %1$s > 10 OR %1$s > 50)", DIMENSION1, DIMENSION2); private static final String QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES = - " WHERE __d2 < 95 AND NOT __d2 < 25 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + String.format(" WHERE %2$s < 95 AND NOT %2$s < 25 AND (%1$s > 10 OR %1$s < 50)", DIMENSION1, DIMENSION2); private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES = - " WHERE (__d2 > 95 OR __d2 < 25) AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + String.format(" WHERE (%2$s > 95 OR %2$s < 25) AND (%1$s > 10 OR %1$s < 50)", DIMENSION1, DIMENSION2); private static final String QUERY_FILTER_COMPLEX_OR_SINGLE_DIMENSION = - " WHERE NOT d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR d1__COLUMN_NAME < 100)"; + String.format(" WHERE NOT %1$s = 95 AND (%1$s > 90 OR %1$s < 100)", DIMENSION1); // Unsupported filters - private static final String QUERY_FILTER_OR_MULTIPLE_DIMENSIONS = " WHERE d1__COLUMN_NAME > 10 OR __d2 < 50"; + private static final String QUERY_FILTER_OR_MULTIPLE_DIMENSIONS = + String.format(" WHERE %1$s > 10 OR %2$s < 50", DIMENSION1, DIMENSION2); private static final String QUERY_FILTER_OR_ON_AND = - " WHERE (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50) OR d1__COLUMN_NAME < 50"; - private static final String QUERY_FILTER_NOT_ON_AND = " WHERE NOT (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50)"; - private static final String QUERY_FILTER_NOT_ON_OR = " WHERE NOT (d1__COLUMN_NAME < 10 OR d1__COLUMN_NAME > 50)"; + String.format(" WHERE (%1$s > 10 AND %1$s < 50) OR %1$s < 50", DIMENSION1); + private static final String QUERY_FILTER_NOT_ON_AND = + String.format(" WHERE NOT (%1$s > 10 AND %1$s < 50)", DIMENSION1); + private static final String QUERY_FILTER_NOT_ON_OR = String.format(" WHERE NOT (%1$s < 10 OR %1$s > 50)", DIMENSION1); // Always false filters - private static final String QUERY_FILTER_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100"; - private static final String QUERY_FILTER_OR_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100 OR d1__COLUMN_NAME < 0"; + private static final String QUERY_FILTER_ALWAYS_FALSE = String.format(" WHERE %s > 100", DIMENSION1); + private static final String QUERY_FILTER_OR_ALWAYS_FALSE = String.format(" WHERE %1$s > 100 OR %1$s < 0", DIMENSION1); - private static final String QUERY_GROUP_BY = " GROUP BY __d2"; - private static final String FILTER_AGG_CLAUSE = " FILTER(WHERE d1__COLUMN_NAME > 10)"; + private static final String QUERY_GROUP_BY = " GROUP BY " + DIMENSION2; + private static final String FILTER_AGG_CLAUSE = String.format(" FILTER(WHERE %s > 10)", DIMENSION1); private ValueAggregator _valueAggregator; private DataType _aggregatedValueType; @@ -143,8 +146,8 @@ abstract class BaseStarTreeV2Test<R, A> { _aggregatedValueType = _valueAggregator.getAggregatedValueType(); _aggregation = getAggregation(_valueAggregator.getAggregationType()); - Schema.SchemaBuilder schemaBuilder = new Schema.SchemaBuilder().addSingleValueDimension(DIMENSION_D1, DataType.INT) - .addSingleValueDimension(DIMENSION_D2, DataType.INT); + Schema.SchemaBuilder schemaBuilder = new Schema.SchemaBuilder().addSingleValueDimension(DIMENSION1, DataType.INT) + .addSingleValueDimension(DIMENSION2, DataType.INT); DataType rawValueType = getRawValueType(); // Raw value type will be null for COUNT aggregation function if (rawValueType != null) { @@ -156,8 +159,8 @@ abstract class BaseStarTreeV2Test<R, A> { List<GenericRow> segmentRecords = new ArrayList<>(NUM_SEGMENT_RECORDS); for (int i = 0; i < NUM_SEGMENT_RECORDS; i++) { GenericRow segmentRecord = new GenericRow(); - segmentRecord.putValue(DIMENSION_D1, RANDOM.nextInt(DIMENSION_CARDINALITY)); - segmentRecord.putValue(DIMENSION_D2, RANDOM.nextInt(DIMENSION_CARDINALITY)); + segmentRecord.putValue(DIMENSION1, RANDOM.nextInt(DIMENSION_CARDINALITY)); + segmentRecord.putValue(DIMENSION2, RANDOM.nextInt(DIMENSION_CARDINALITY)); if (rawValueType != null) { segmentRecord.putValue(METRIC, getRandomRawValue(RANDOM)); } @@ -171,10 +174,9 @@ abstract class BaseStarTreeV2Test<R, A> { driver.init(segmentGeneratorConfig, new GenericRowRecordReader(segmentRecords)); driver.build(); - StarTreeIndexConfig starTreeIndexConfig = - new StarTreeIndexConfig(Arrays.asList(DIMENSION_D1, DIMENSION_D2), null, null, Collections.singletonList( - new StarTreeAggregationConfig(METRIC, _valueAggregator.getAggregationType().getName(), - getCompressionCodec(), true, getIndexVersion(), null, null)), MAX_LEAF_RECORDS); + StarTreeIndexConfig starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList(DIMENSION1, DIMENSION2), null, null, + Collections.singletonList(new StarTreeAggregationConfig(METRIC, _valueAggregator.getAggregationType().getName(), + getCompressionCodec(), true, getIndexVersion(), null, null)), MAX_LEAF_RECORDS); File indexDir = new File(TEMP_DIR, SEGMENT_NAME); // Randomly build star-tree using on-heap or off-heap mode MultipleTreesBuilder.BuildMode buildMode = diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java index e158219a08..297bf036f1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java @@ -28,7 +28,6 @@ import java.util.Collections; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import javax.annotation.Nullable; -import org.apache.commons.collections.CollectionUtils; import org.apache.commons.configuration2.ex.ConfigurationException; import org.apache.commons.io.FileUtils; import org.apache.pinot.segment.spi.creator.SegmentVersion; @@ -268,7 +267,7 @@ public class SegmentLocalFSDirectory extends SegmentDirectory { default: break; } - if (CollectionUtils.isNotEmpty(_segmentMetadata.getStarTreeV2MetadataList())) { + if (_segmentMetadata.getStarTreeV2MetadataList() != null) { try { _starTreeIndexReader = new StarTreeIndexReader(_segmentDirectory, _segmentMetadata, _readMode); } catch (ConfigurationException e) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java index 4c10f46143..537a16d835 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.segment.local.segment.store; -import com.google.common.base.Preconditions; import java.io.Closeable; import java.io.File; import java.io.FileInputStream; @@ -51,10 +50,10 @@ public class StarTreeIndexReader implements Closeable { private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeIndexReader.class); private final File _segmentDirectory; - private final SegmentMetadataImpl _segmentMetadata; + private final List<StarTreeV2Metadata> _starTreeMetadataList; + private final int _numStarTrees; private final ReadMode _readMode; private final File _indexFile; - private final int _numStarTrees; // StarTree index can contain multiple index instances, identified by ids like 0, 1, etc. private final List<Map<IndexKey, StarTreeIndexEntry>> _indexColumnEntries; @@ -67,17 +66,11 @@ public class StarTreeIndexReader implements Closeable { */ public StarTreeIndexReader(File segmentDirectory, SegmentMetadataImpl segmentMetadata, ReadMode readMode) throws IOException, ConfigurationException { - Preconditions.checkNotNull(segmentDirectory); - Preconditions.checkArgument(segmentDirectory.exists(), "SegmentDirectory: " + segmentDirectory + " does not exist"); - Preconditions.checkArgument(segmentDirectory.isDirectory(), - "SegmentDirectory: " + segmentDirectory + " is not a directory"); - Preconditions.checkNotNull(segmentMetadata); - Preconditions.checkNotNull(readMode); - _segmentDirectory = segmentDirectory; - _segmentMetadata = segmentMetadata; + _starTreeMetadataList = segmentMetadata.getStarTreeV2MetadataList(); + assert _starTreeMetadataList != null; + _numStarTrees = _starTreeMetadataList.size(); _readMode = readMode; - _numStarTrees = _segmentMetadata.getStarTreeV2MetadataList().size(); _indexFile = new File(_segmentDirectory, StarTreeV2Constants.INDEX_FILE_NAME); _indexColumnEntries = new ArrayList<>(_numStarTrees); load(); @@ -88,7 +81,7 @@ public class StarTreeIndexReader implements Closeable { List<Map<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue>> indexMapList; try (InputStream inputStream = new FileInputStream( new File(_segmentDirectory, StarTreeV2Constants.INDEX_MAP_FILE_NAME))) { - indexMapList = StarTreeIndexMapUtils.loadFromInputStream(inputStream, _numStarTrees); + indexMapList = StarTreeIndexMapUtils.loadFromInputStream(inputStream, _starTreeMetadataList); } if (_readMode == ReadMode.heap) { _dataBuffer = PinotDataBuffer.loadFile(_indexFile, 0, _indexFile.length(), ByteOrder.LITTLE_ENDIAN, @@ -112,8 +105,7 @@ public class StarTreeIndexReader implements Closeable { columnEntries.put(new IndexKey(String.valueOf(starTreeId), StandardIndexes.inverted()), new StarTreeIndexEntry(indexMap.get(StarTreeIndexMapUtils.STAR_TREE_INDEX_KEY), _dataBuffer, ByteOrder.LITTLE_ENDIAN)); - List<StarTreeV2Metadata> starTreeMetadataList = _segmentMetadata.getStarTreeV2MetadataList(); - StarTreeV2Metadata starTreeMetadata = starTreeMetadataList.get(starTreeId); + StarTreeV2Metadata starTreeMetadata = _starTreeMetadataList.get(starTreeId); // Load dimension forward indexes for (String dimension : starTreeMetadata.getDimensionsSplitOrder()) { columnEntries.put(new IndexKey(dimension, StandardIndexes.forward()), new StarTreeIndexEntry( diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java index ee060d946b..50dfc8b56e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java @@ -38,8 +38,10 @@ import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils.In import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils.IndexValue; import org.apache.pinot.segment.spi.ImmutableSegment; import org.apache.pinot.segment.spi.V1Constants; +import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl; import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants; import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants.MetadataKey; +import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata; import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths; import org.apache.pinot.spi.config.table.StarTreeIndexConfig; import org.apache.pinot.spi.env.CommonsConfigurationUtils; @@ -60,6 +62,7 @@ public class MultipleTreesBuilder implements Closeable { private final List<StarTreeV2BuilderConfig> _builderConfigs; private final BuildMode _buildMode; + private final File _indexDir; private final File _segmentDirectory; private final PropertiesConfiguration _metadataProperties; private final ImmutableSegment _segment; @@ -82,10 +85,10 @@ public class MultipleTreesBuilder implements Closeable { Preconditions.checkArgument(CollectionUtils.isNotEmpty(builderConfigs), "Must provide star-tree builder configs"); _builderConfigs = builderConfigs; _buildMode = buildMode; + _indexDir = indexDir; _segmentDirectory = SegmentDirectoryPaths.findSegmentDirectory(indexDir); _metadataProperties = - CommonsConfigurationUtils.fromFile( - new File(_segmentDirectory, V1Constants.MetadataKeys.METADATA_FILE_NAME)); + CommonsConfigurationUtils.fromFile(new File(_segmentDirectory, V1Constants.MetadataKeys.METADATA_FILE_NAME)); _separator = getSeparator(); _segment = ImmutableSegmentLoader.load(indexDir, ReadMode.mmap); } @@ -104,10 +107,10 @@ public class MultipleTreesBuilder implements Closeable { Preconditions.checkArgument(CollectionUtils.isNotEmpty(indexConfigs) || enableDefaultStarTree, "Must provide star-tree index configs or enable default star-tree"); _buildMode = buildMode; + _indexDir = indexDir; _segmentDirectory = SegmentDirectoryPaths.findSegmentDirectory(indexDir); _metadataProperties = - CommonsConfigurationUtils.fromFile( - new File(_segmentDirectory, V1Constants.MetadataKeys.METADATA_FILE_NAME)); + CommonsConfigurationUtils.fromFile(new File(_segmentDirectory, V1Constants.MetadataKeys.METADATA_FILE_NAME)); Preconditions.checkState(!_metadataProperties.containsKey(MetadataKey.STAR_TREE_COUNT), "Star-tree already exists"); _segment = ImmutableSegmentLoader.load(indexDir, ReadMode.mmap); try { @@ -122,7 +125,8 @@ public class MultipleTreesBuilder implements Closeable { @Nullable private StarTreeIndexSeparator getSeparator() throws Exception { - if (!_metadataProperties.containsKey(MetadataKey.STAR_TREE_COUNT)) { + List<StarTreeV2Metadata> starTreeMetadataList = new SegmentMetadataImpl(_indexDir).getStarTreeV2MetadataList(); + if (starTreeMetadataList == null) { return null; } try { @@ -134,7 +138,7 @@ public class MultipleTreesBuilder implements Closeable { _separatorTempDir, false); StarTreeIndexSeparator separator = new StarTreeIndexSeparator(new File(_separatorTempDir, StarTreeV2Constants.INDEX_MAP_FILE_NAME), - new File(_separatorTempDir, StarTreeV2Constants.INDEX_FILE_NAME), _metadataProperties); + new File(_separatorTempDir, StarTreeV2Constants.INDEX_FILE_NAME), starTreeMetadataList); _metadataProperties.subset(StarTreeV2Constants.MetadataKey.STAR_TREE_SUBSET).clear(); CommonsConfigurationUtils.saveToFile(_metadataProperties, new File(_segmentDirectory, V1Constants.MetadataKeys.METADATA_FILE_NAME)); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java index c017ea4b6a..29e677f5d5 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java @@ -29,8 +29,6 @@ import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.apache.commons.configuration2.Configuration; -import org.apache.commons.configuration2.PropertiesConfiguration; import org.apache.commons.configuration2.ex.ConfigurationException; import org.apache.commons.io.FileUtils; import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils; @@ -44,53 +42,26 @@ import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata; * The {@code StarTreeIndexSeparator} pulls out the individual star-trees from the common star-tree index file */ public class StarTreeIndexSeparator implements Closeable { - - private final FileChannel _indexFileChannel; private final List<Map<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue>> _indexMapList; private final List<StarTreeV2BuilderConfig> _builderConfigList; - private final List<Integer> _totalDocsList; + private final List<Integer> _numDocsList; + private final FileChannel _indexFileChannel; - public StarTreeIndexSeparator(File indexMapFile, File indexFile, PropertiesConfiguration metadataProperties) + public StarTreeIndexSeparator(File indexMapFile, File indexFile, List<StarTreeV2Metadata> starTreeMetadataList) throws IOException { - _indexMapList = - extractIndexMap(indexMapFile, metadataProperties.getInt(StarTreeV2Constants.MetadataKey.STAR_TREE_COUNT)); - _indexFileChannel = new RandomAccessFile(indexFile, "r").getChannel(); - _builderConfigList = extractBuilderConfigs(metadataProperties); - _totalDocsList = extractTotalDocsList(metadataProperties); - } - - private List<Map<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue>> extractIndexMap(File indexMapFile, - int numStarTrees) { try (InputStream inputStream = new FileInputStream(indexMapFile)) { - return StarTreeIndexMapUtils.loadFromInputStream(inputStream, numStarTrees); - } catch (IOException | ConfigurationException e) { + _indexMapList = StarTreeIndexMapUtils.loadFromInputStream(inputStream, starTreeMetadataList); + } catch (ConfigurationException e) { throw new RuntimeException(e); } - } - - public List<Integer> extractTotalDocsList(PropertiesConfiguration metadataProperties) { - List<Integer> totalDocsList = new ArrayList<>(_indexMapList.size()); - for (int i = 0; i < _indexMapList.size(); i++) { - Configuration metadata = metadataProperties.subset(StarTreeV2Constants.MetadataKey.getStarTreePrefix(i)); - totalDocsList.add(i, metadata.getInt(StarTreeV2Constants.MetadataKey.TOTAL_DOCS)); - } - return totalDocsList; - } - - /** - * Extract the list of {@link StarTreeV2BuilderConfig} for each of the star-tree present in the given metadata - * properties. - * @param metadataProperties index metadata properties - * @return List of {@link StarTreeV2BuilderConfig} - */ - public List<StarTreeV2BuilderConfig> extractBuilderConfigs(PropertiesConfiguration metadataProperties) { - List<StarTreeV2BuilderConfig> builderConfigList = new ArrayList<>(_indexMapList.size()); - for (int i = 0; i < _indexMapList.size(); i++) { - Configuration metadata = metadataProperties.subset(StarTreeV2Constants.MetadataKey.getStarTreePrefix(i)); - StarTreeV2Metadata starTreeV2Metadata = new StarTreeV2Metadata(metadata); - builderConfigList.add(i, StarTreeV2BuilderConfig.fromMetadata(starTreeV2Metadata)); + int numStarTrees = starTreeMetadataList.size(); + _builderConfigList = new ArrayList<>(numStarTrees); + _numDocsList = new ArrayList<>(numStarTrees); + for (StarTreeV2Metadata starTreeMetadata : starTreeMetadataList) { + _builderConfigList.add(StarTreeV2BuilderConfig.fromMetadata(starTreeMetadata)); + _numDocsList.add(starTreeMetadata.getNumDocs()); } - return builderConfigList; + _indexFileChannel = new RandomAccessFile(indexFile, "r").getChannel(); } /** @@ -109,7 +80,7 @@ public class StarTreeIndexSeparator implements Closeable { return -1; } separate(starTreeOutputDir, treeIndex); - return _totalDocsList.get(treeIndex); + return _numDocsList.get(treeIndex); } private void separate(File starTreeOutputDir, int treeIndex) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java index 7ac45d3b18..fdc6e3703b 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java @@ -24,16 +24,18 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; -import javax.annotation.Nonnull; +import java.util.Set; import javax.annotation.Nullable; import org.apache.commons.configuration2.PropertiesConfiguration; import org.apache.commons.configuration2.ex.ConfigurationException; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair; +import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata; import org.apache.pinot.spi.env.CommonsConfigurationUtils; @@ -119,10 +121,8 @@ public class StarTreeIndexMapUtils { @Override public int compareTo(IndexKey other) { - return Comparator - .comparing((IndexKey i) -> i._column, Comparator.nullsLast(Comparator.naturalOrder())) - .thenComparing((IndexKey i) -> i._indexType) - .compare(this, other); + return Comparator.comparing((IndexKey i) -> i._column, Comparator.nullsLast(Comparator.naturalOrder())) + .thenComparing((IndexKey i) -> i._indexType).compare(this, other); } } @@ -142,7 +142,7 @@ public class StarTreeIndexMapUtils { } @Override - public int compareTo(@Nonnull IndexValue o) { + public int compareTo(IndexValue o) { return Long.compare(_offset, o._offset); } } @@ -171,10 +171,15 @@ public class StarTreeIndexMapUtils { /** * Loads the index maps for multiple star-trees from an input stream. */ - public static List<Map<IndexKey, IndexValue>> loadFromInputStream(InputStream indexMapInputStream, int numStarTrees) + public static List<Map<IndexKey, IndexValue>> loadFromInputStream(InputStream indexMapInputStream, + List<StarTreeV2Metadata> starTreeMetadataList) throws ConfigurationException { + assert starTreeMetadataList != null; + int numStarTrees = starTreeMetadataList.size(); + List<Set<String>> dimensionSets = new ArrayList<>(numStarTrees); List<Map<IndexKey, IndexValue>> indexMaps = new ArrayList<>(numStarTrees); - for (int i = 0; i < numStarTrees; i++) { + for (StarTreeV2Metadata starTreeMetadata : starTreeMetadataList) { + dimensionSets.add(new HashSet<>(starTreeMetadata.getDimensionsSplitOrder())); indexMaps.add(new HashMap<>()); } @@ -182,6 +187,7 @@ public class StarTreeIndexMapUtils { for (String key : CommonsConfigurationUtils.getKeys(configuration)) { String[] split = StringUtils.split(key, KEY_SEPARATOR); int starTreeId = Integer.parseInt(split[0]); + Set<String> dimensionSet = dimensionSets.get(starTreeId); Map<IndexKey, IndexValue> indexMap = indexMaps.get(starTreeId); int columnSplitEndIndex = split.length - 2; @@ -198,13 +204,9 @@ public class StarTreeIndexMapUtils { column = StringUtils.join(split, KEY_SEPARATOR, 1, columnSplitEndIndex); } // Convert metric (function-column pair) to stored name for backward-compatibility - if (column.contains(AggregationFunctionColumnPair.DELIMITER)) { - try { - AggregationFunctionColumnPair functionColumnPair = AggregationFunctionColumnPair.fromColumnName(column); - column = AggregationFunctionColumnPair.resolveToStoredType(functionColumnPair).toColumnName(); - } catch (Exception e) { - // Ignoring this exception for columns that are not metric (function-column pair) - } + if (!dimensionSet.contains(column)) { + AggregationFunctionColumnPair functionColumnPair = AggregationFunctionColumnPair.fromColumnName(column); + column = AggregationFunctionColumnPair.resolveToStoredType(functionColumnPair).toColumnName(); } indexKey = new IndexKey(IndexType.FORWARD_INDEX, column); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java index e798f249d0..f7adc1a07d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java @@ -55,6 +55,7 @@ public class StarTreeLoaderUtils { SegmentMetadataImpl segmentMetadata, Map<String, ColumnIndexContainer> indexContainerMap) throws IOException { List<StarTreeV2Metadata> starTreeMetadataList = segmentMetadata.getStarTreeV2MetadataList(); + assert starTreeMetadataList != null; int numStarTrees = starTreeMetadataList.size(); List<StarTreeV2> starTrees = new ArrayList<>(numStarTrees); for (int i = 0; i < numStarTrees; i++) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java index a18b24f591..b6718ee460 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java @@ -89,6 +89,7 @@ public class ColumnIndexDirectoryTestHelper { static SegmentMetadataImpl writeMetadata(SegmentVersion version) { SegmentMetadataImpl segmentMetadata = Mockito.mock(SegmentMetadataImpl.class); when(segmentMetadata.getVersion()).thenReturn(version); + when(segmentMetadata.getStarTreeV2MetadataList()).thenReturn(null); ColumnMetadata columnMetadata = Mockito.mock(ColumnMetadata.class); when(columnMetadata.isSingleValue()).thenReturn(true); when(columnMetadata.isSorted()).thenReturn(false); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java index b830493b8d..2e87c58e5d 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java @@ -18,85 +18,63 @@ */ package org.apache.pinot.segment.local.startree.v2.builder; -import com.google.common.collect.Lists; import java.io.File; -import java.io.IOException; import java.net.URL; import java.util.Arrays; +import java.util.HashSet; import java.util.List; -import java.util.Objects; -import org.apache.commons.configuration2.PropertiesConfiguration; -import org.apache.commons.configuration2.ex.ConfigurationException; +import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.pinot.segment.spi.V1Constants; +import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl; import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants; import org.apache.pinot.spi.config.table.StarTreeIndexConfig; -import org.apache.pinot.spi.env.CommonsConfigurationUtils; +import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import static org.apache.pinot.segment.spi.V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION; import static org.apache.pinot.segment.spi.V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION; import static org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants.STAR_TREE_INDEX_FILE_NAME; -import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.Assert.assertNotNull; import static org.testng.AssertJUnit.assertTrue; public class StarTreeIndexSeparatorTest { - private static final String SEGMENT_PATH = "data/startree/segment"; - private static final String TOTAL_DOCS_KEY = "startree.v2.0.total.docs"; - - private StarTreeIndexSeparator _separator; - private PropertiesConfiguration _metadataProperties; - private final StarTreeV2BuilderConfig _builderConfig = StarTreeV2BuilderConfig.fromIndexConfig( - new StarTreeIndexConfig( - Lists.newArrayList("AirlineID", "Origin", "Dest"), - Lists.newArrayList(), - Lists.newArrayList("count__*", "max__ArrDelay"), - null, - 10)); + private static final StarTreeV2BuilderConfig BUILDER_CONFIG = StarTreeV2BuilderConfig.fromIndexConfig( + new StarTreeIndexConfig(List.of("AirlineID", "Origin", "Dest"), List.of(), List.of("count__*", "max__ArrDelay"), + null, 10)); + private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(), "StarTreeIndexSeparatorTest"); @BeforeClass - public void setup() - throws IOException, ConfigurationException { - ClassLoader classLoader = getClass().getClassLoader(); - URL segmentUrl = classLoader.getResource(SEGMENT_PATH); - File segmentDir = new File(segmentUrl.getFile()); - _metadataProperties = CommonsConfigurationUtils.fromFile( - new File(segmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME)); - _separator = new StarTreeIndexSeparator( - new File(segmentDir, StarTreeV2Constants.INDEX_MAP_FILE_NAME), - new File(segmentDir, StarTreeV2Constants.INDEX_FILE_NAME), - _metadataProperties); - } - - @Test - public void extractTotalDocsListTest() { - assertNotNull(_separator); - List<Integer> docsList = _separator.extractTotalDocsList(_metadataProperties); - assertNotNull(docsList); - assertEquals(docsList, Lists.newArrayList(_metadataProperties.getInt(TOTAL_DOCS_KEY))); + public void setUp() { + FileUtils.deleteQuietly(TEMP_DIR); } - @Test - public void extractBuilderConfigsTest() { - List<StarTreeV2BuilderConfig> builderConfigList = _separator.extractBuilderConfigs(_metadataProperties); - assertEquals(builderConfigList, Lists.newArrayList(_builderConfig)); + @AfterClass + public void tearDown() { + FileUtils.deleteQuietly(TEMP_DIR); } @Test - public void separateTest() - throws IOException { - File tempDir = new File(FileUtils.getTempDirectory(), "separateTest"); - _separator.separate(tempDir, _builderConfig); - List<String> files = Arrays.asList(Objects.requireNonNull(tempDir.list())); - assertTrue(files.contains(STAR_TREE_INDEX_FILE_NAME)); - _builderConfig.getDimensionsSplitOrder() - .forEach(dimension -> assertTrue(files.contains(dimension + UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION))); - _builderConfig.getFunctionColumnPairs() - .forEach(dimension -> assertTrue(files.contains(dimension + RAW_SV_FORWARD_INDEX_FILE_EXTENSION))); - FileUtils.forceDelete(tempDir); + public void testSeparate() + throws Exception { + URL segmentUrl = getClass().getClassLoader().getResource(SEGMENT_PATH); + assertNotNull(segmentUrl); + File segmentDir = new File(segmentUrl.getFile()); + try (StarTreeIndexSeparator separator = new StarTreeIndexSeparator( + new File(segmentDir, StarTreeV2Constants.INDEX_MAP_FILE_NAME), + new File(segmentDir, StarTreeV2Constants.INDEX_FILE_NAME), + new SegmentMetadataImpl(segmentDir).getStarTreeV2MetadataList())) { + separator.separate(TEMP_DIR, BUILDER_CONFIG); + } + String[] fileNames = TEMP_DIR.list(); + assertNotNull(fileNames); + Set<String> fileNameSet = new HashSet<>(Arrays.asList(fileNames)); + assertTrue(fileNameSet.contains(STAR_TREE_INDEX_FILE_NAME)); + BUILDER_CONFIG.getDimensionsSplitOrder() + .forEach(dimension -> assertTrue(fileNameSet.contains(dimension + UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION))); + BUILDER_CONFIG.getFunctionColumnPairs() + .forEach(dimension -> assertTrue(fileNameSet.contains(dimension + RAW_SV_FORWARD_INDEX_FILE_EXTENSION))); } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java index 63f897e0a7..b9a1d23700 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java @@ -92,6 +92,7 @@ public interface SegmentMetadata { */ long getLatestIngestionTimestamp(); + @Nullable List<StarTreeV2Metadata> getStarTreeV2MetadataList(); Map<String, String> getCustomMap(); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java index b0bb5d04f0..d0b7c84d34 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java @@ -388,6 +388,7 @@ public class SegmentMetadataImpl implements SegmentMetadata { return Long.MIN_VALUE; } + @Nullable @Override public List<StarTreeV2Metadata> getStarTreeV2MetadataList() { return _starTreeV2MetadataList; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org