This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new b284154731 StarTree non deterministic ordering of functions in startree_index_map and other places (#10231) b284154731 is described below commit b284154731d2f658eb9ddfc14d25ea509c76919d Author: Saurabh Dubey <saurabhd...@gmail.com> AuthorDate: Thu Feb 9 00:42:22 2023 +0530 StarTree non deterministic ordering of functions in startree_index_map and other places (#10231) --- .../startree/v2/builder/MultipleTreesBuilder.java | 4 +-- .../startree/v2/builder/StarTreeIndexCombiner.java | 15 ++++---- .../v2/builder/StarTreeV2BuilderConfig.java | 8 ++--- .../startree/v2/store/StarTreeIndexMapUtils.java | 19 +++++++--- .../segment/store/StarTreeIndexReaderTest.java | 41 +++++++++++----------- .../startree/AggregationFunctionColumnPair.java | 10 +++++- 6 files changed, 57 insertions(+), 40 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java index 50f04abe0b..ec40df5f97 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java @@ -24,12 +24,12 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; -import java.util.Map; import javax.annotation.Nullable; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; import org.apache.pinot.segment.local.startree.StarTreeBuilderUtils; import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils; @@ -130,7 +130,7 @@ public class MultipleTreesBuilder implements Closeable { File starTreeIndexDir = new File(_segmentDirectory, StarTreeV2Constants.STAR_TREE_TEMP_DIR); FileUtils.forceMkdir(starTreeIndexDir); _metadataProperties.addProperty(MetadataKey.STAR_TREE_COUNT, numStarTrees); - List<Map<IndexKey, IndexValue>> indexMaps = new ArrayList<>(numStarTrees); + List<List<Pair<IndexKey, IndexValue>>> indexMaps = new ArrayList<>(numStarTrees); // Build all star-trees for (int i = 0; i < numStarTrees; i++) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java index dbe44c7a13..4d4c805d39 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java @@ -24,9 +24,10 @@ import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; -import java.util.HashMap; -import java.util.Map; +import java.util.ArrayList; +import java.util.List; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair; import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants; @@ -52,19 +53,19 @@ public class StarTreeIndexCombiner implements Closeable { /** * Combines the index files inside the given directory into the single index file, then cleans the directory. */ - public Map<IndexKey, IndexValue> combine(StarTreeV2BuilderConfig builderConfig, File starTreeIndexDir) + public List<Pair<IndexKey, IndexValue>> combine(StarTreeV2BuilderConfig builderConfig, File starTreeIndexDir) throws IOException { - Map<IndexKey, IndexValue> indexMap = new HashMap<>(); + List<Pair<IndexKey, IndexValue>> indexMap = new ArrayList<>(); // Write star-tree index File starTreeIndexFile = new File(starTreeIndexDir, StarTreeV2Constants.STAR_TREE_INDEX_FILE_NAME); - indexMap.put(STAR_TREE_INDEX_KEY, writeFile(starTreeIndexFile)); + indexMap.add(Pair.of(STAR_TREE_INDEX_KEY, writeFile(starTreeIndexFile))); // Write dimension indexes for (String dimension : builderConfig.getDimensionsSplitOrder()) { File dimensionIndexFile = new File(starTreeIndexDir, dimension + V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION); - indexMap.put(new IndexKey(IndexType.FORWARD_INDEX, dimension), writeFile(dimensionIndexFile)); + indexMap.add(Pair.of(new IndexKey(IndexType.FORWARD_INDEX, dimension), writeFile(dimensionIndexFile))); } // Write metric (function-column pair) indexes @@ -72,7 +73,7 @@ public class StarTreeIndexCombiner implements Closeable { String metric = functionColumnPair.toColumnName(); File metricIndexFile = new File(starTreeIndexDir, metric + V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION); - indexMap.put(new IndexKey(IndexType.FORWARD_INDEX, metric), writeFile(metricIndexFile)); + indexMap.add(Pair.of(new IndexKey(IndexType.FORWARD_INDEX, metric), writeFile(metricIndexFile))); } FileUtils.cleanDirectory(starTreeIndexDir); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java index 87ea5214cd..56e778a38b 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java @@ -21,10 +21,10 @@ package org.apache.pinot.segment.local.startree.v2.builder; import com.google.common.base.Preconditions; import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.TreeSet; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.pinot.segment.spi.AggregationFunctionType; @@ -56,7 +56,7 @@ public class StarTreeV2BuilderConfig { Set<String> skipStarNodeCreationForDimensions; if (indexConfig.getSkipStarNodeCreationForDimensions() != null) { - skipStarNodeCreationForDimensions = new HashSet<>(indexConfig.getSkipStarNodeCreationForDimensions()); + skipStarNodeCreationForDimensions = new TreeSet<>(indexConfig.getSkipStarNodeCreationForDimensions()); Preconditions.checkArgument(dimensionsSplitOrder.containsAll(skipStarNodeCreationForDimensions), "Can not skip star-node creation for dimensions not in the split order, dimensionsSplitOrder: %s, " + "skipStarNodeCreationForDimensions: %s", @@ -65,7 +65,7 @@ public class StarTreeV2BuilderConfig { skipStarNodeCreationForDimensions = Collections.emptySet(); } - Set<AggregationFunctionColumnPair> functionColumnPairs = new HashSet<>(); + Set<AggregationFunctionColumnPair> functionColumnPairs = new TreeSet<>(); for (String functionColumnPair : indexConfig.getFunctionColumnPairs()) { functionColumnPairs.add(AggregationFunctionColumnPair.fromColumnName(functionColumnPair)); } @@ -145,7 +145,7 @@ public class StarTreeV2BuilderConfig { } Preconditions.checkState(!dimensionsSplitOrder.isEmpty(), "No qualified dimension found for star-tree split order"); - Set<AggregationFunctionColumnPair> functionColumnPairs = new HashSet<>(); + Set<AggregationFunctionColumnPair> functionColumnPairs = new TreeSet<>(); functionColumnPairs.add(AggregationFunctionColumnPair.COUNT_STAR); for (String numericMetric : numericMetrics) { functionColumnPairs.add(new AggregationFunctionColumnPair(AggregationFunctionType.SUM, numericMetric)); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java index d50e507372..3b989d3c79 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import java.io.File; import java.io.InputStream; import java.util.ArrayList; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -30,9 +31,9 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.spi.env.CommonsConfigurationUtils; - /** * The {@code StarTreeIndexMapUtils} class is a utility class to store/load star-tree index map to/from file. * <p> @@ -81,7 +82,7 @@ public class StarTreeIndexMapUtils { /** * Key of the index map. */ - public static class IndexKey { + public static class IndexKey implements Comparable<IndexKey> { public final IndexType _indexType; // For star-tree index, column will be null public final String _column; @@ -112,6 +113,14 @@ public class StarTreeIndexMapUtils { return false; } } + + @Override + public int compareTo(IndexKey other) { + return Comparator + .comparing((IndexKey i) -> i._column, Comparator.nullsLast(Comparator.naturalOrder())) + .thenComparing((IndexKey i) -> i._indexType) + .compare(this, other); + } } /** @@ -138,14 +147,14 @@ public class StarTreeIndexMapUtils { /** * Stores the index maps for multiple star-trees into a file. */ - public static void storeToFile(List<Map<IndexKey, IndexValue>> indexMaps, File indexMapFile) { + public static void storeToFile(List<List<Pair<IndexKey, IndexValue>>> indexMaps, File indexMapFile) { Preconditions.checkState(!indexMapFile.exists(), "Star-tree index map file already exists"); PropertiesConfiguration configuration = CommonsConfigurationUtils.fromFile(indexMapFile); int numStarTrees = indexMaps.size(); for (int i = 0; i < numStarTrees; i++) { - Map<IndexKey, IndexValue> indexMap = indexMaps.get(i); - for (Map.Entry<IndexKey, IndexValue> entry : indexMap.entrySet()) { + List<Pair<IndexKey, IndexValue>> indexMap = indexMaps.get(i); + for (Pair<IndexKey, IndexValue> entry : indexMap) { IndexKey key = entry.getKey(); IndexValue value = entry.getValue(); configuration.addProperty(key.getPropertyName(i, OFFSET_SUFFIX), value._offset); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java index c2eaba1142..8a60975940 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java @@ -24,10 +24,9 @@ import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.segment.spi.creator.SegmentVersion; @@ -88,26 +87,26 @@ public class StarTreeIndexReaderTest { Collections.singleton(new AggregationFunctionColumnPair(AggregationFunctionType.SUM, "dimX"))); when(_segmentMetadata.getStarTreeV2MetadataList()).thenReturn(Arrays.asList(stMeta1, stMeta2)); // Mock the offset/sizes for the index buffers. - List<Map<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue>> indexMaps = new ArrayList<>(); - Map<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue> indexMap = new HashMap<>(); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null), - new StarTreeIndexMapUtils.IndexValue(0, 1)); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dim0"), - new StarTreeIndexMapUtils.IndexValue(1, 1)); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dim1"), - new StarTreeIndexMapUtils.IndexValue(2, 1)); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "count__*"), - new StarTreeIndexMapUtils.IndexValue(3, 1)); + List<List<Pair<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue>>> indexMaps = new ArrayList<>(); + List<Pair<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue>> indexMap = new ArrayList<>(); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null), + new StarTreeIndexMapUtils.IndexValue(0, 1))); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dim0"), + new StarTreeIndexMapUtils.IndexValue(1, 1))); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dim1"), + new StarTreeIndexMapUtils.IndexValue(2, 1))); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "count__*"), + new StarTreeIndexMapUtils.IndexValue(3, 1))); indexMaps.add(indexMap); - indexMap = new HashMap<>(); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null), - new StarTreeIndexMapUtils.IndexValue(10, 3)); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dimX"), - new StarTreeIndexMapUtils.IndexValue(13, 3)); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dimY"), - new StarTreeIndexMapUtils.IndexValue(16, 3)); - indexMap.put(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "sum__dimX"), - new StarTreeIndexMapUtils.IndexValue(19, 3)); + indexMap = new ArrayList<>(); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null), + new StarTreeIndexMapUtils.IndexValue(10, 3))); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dimX"), + new StarTreeIndexMapUtils.IndexValue(13, 3))); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "dimY"), + new StarTreeIndexMapUtils.IndexValue(16, 3))); + indexMap.add(Pair.of(new StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, "sum__dimX"), + new StarTreeIndexMapUtils.IndexValue(19, 3))); indexMaps.add(indexMap); File indexMapFile = new File(TEMP_DIR, StarTreeV2Constants.INDEX_MAP_FILE_NAME); StarTreeIndexMapUtils.storeToFile(indexMaps, indexMapFile); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java index 86f0cc7cd1..e77c65d028 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java @@ -18,10 +18,11 @@ */ package org.apache.pinot.segment.spi.index.startree; +import java.util.Comparator; import org.apache.pinot.segment.spi.AggregationFunctionType; -public class AggregationFunctionColumnPair { +public class AggregationFunctionColumnPair implements Comparable<AggregationFunctionColumnPair> { public static final String DELIMITER = "__"; public static final String STAR = "*"; public static final AggregationFunctionColumnPair COUNT_STAR = @@ -86,4 +87,11 @@ public class AggregationFunctionColumnPair { public String toString() { return toColumnName(); } + + @Override + public int compareTo(AggregationFunctionColumnPair other) { + return Comparator.comparing((AggregationFunctionColumnPair o) -> o._column) + .thenComparing((AggregationFunctionColumnPair o) -> o._functionType) + .compare(this, other); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org