This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new b284154731 StarTree non deterministic ordering of functions in 
startree_index_map and other places (#10231)
b284154731 is described below

commit b284154731d2f658eb9ddfc14d25ea509c76919d
Author: Saurabh Dubey <saurabhd...@gmail.com>
AuthorDate: Thu Feb 9 00:42:22 2023 +0530

    StarTree non deterministic ordering of functions in startree_index_map and 
other places (#10231)
---
 .../startree/v2/builder/MultipleTreesBuilder.java  |  4 +--
 .../startree/v2/builder/StarTreeIndexCombiner.java | 15 ++++----
 .../v2/builder/StarTreeV2BuilderConfig.java        |  8 ++---
 .../startree/v2/store/StarTreeIndexMapUtils.java   | 19 +++++++---
 .../segment/store/StarTreeIndexReaderTest.java     | 41 +++++++++++-----------
 .../startree/AggregationFunctionColumnPair.java    | 10 +++++-
 6 files changed, 57 insertions(+), 40 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
index 50f04abe0b..ec40df5f97 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
@@ -24,12 +24,12 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 import javax.annotation.Nullable;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.configuration.Configuration;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.tuple.Pair;
 import 
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
 import org.apache.pinot.segment.local.startree.StarTreeBuilderUtils;
 import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils;
@@ -130,7 +130,7 @@ public class MultipleTreesBuilder implements Closeable {
       File starTreeIndexDir = new File(_segmentDirectory, 
StarTreeV2Constants.STAR_TREE_TEMP_DIR);
       FileUtils.forceMkdir(starTreeIndexDir);
       _metadataProperties.addProperty(MetadataKey.STAR_TREE_COUNT, 
numStarTrees);
-      List<Map<IndexKey, IndexValue>> indexMaps = new 
ArrayList<>(numStarTrees);
+      List<List<Pair<IndexKey, IndexValue>>> indexMaps = new 
ArrayList<>(numStarTrees);
 
       // Build all star-trees
       for (int i = 0; i < numStarTrees; i++) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
index dbe44c7a13..4d4c805d39 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
@@ -24,9 +24,10 @@ import java.io.File;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.nio.channels.FileChannel;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.ArrayList;
+import java.util.List;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.segment.spi.V1Constants;
 import 
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
 import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
@@ -52,19 +53,19 @@ public class StarTreeIndexCombiner implements Closeable {
   /**
    * Combines the index files inside the given directory into the single index 
file, then cleans the directory.
    */
-  public Map<IndexKey, IndexValue> combine(StarTreeV2BuilderConfig 
builderConfig, File starTreeIndexDir)
+  public List<Pair<IndexKey, IndexValue>> combine(StarTreeV2BuilderConfig 
builderConfig, File starTreeIndexDir)
       throws IOException {
-    Map<IndexKey, IndexValue> indexMap = new HashMap<>();
+    List<Pair<IndexKey, IndexValue>> indexMap = new ArrayList<>();
 
     // Write star-tree index
     File starTreeIndexFile = new File(starTreeIndexDir, 
StarTreeV2Constants.STAR_TREE_INDEX_FILE_NAME);
-    indexMap.put(STAR_TREE_INDEX_KEY, writeFile(starTreeIndexFile));
+    indexMap.add(Pair.of(STAR_TREE_INDEX_KEY, writeFile(starTreeIndexFile)));
 
     // Write dimension indexes
     for (String dimension : builderConfig.getDimensionsSplitOrder()) {
       File dimensionIndexFile =
           new File(starTreeIndexDir, dimension + 
V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION);
-      indexMap.put(new IndexKey(IndexType.FORWARD_INDEX, dimension), 
writeFile(dimensionIndexFile));
+      indexMap.add(Pair.of(new IndexKey(IndexType.FORWARD_INDEX, dimension), 
writeFile(dimensionIndexFile)));
     }
 
     // Write metric (function-column pair) indexes
@@ -72,7 +73,7 @@ public class StarTreeIndexCombiner implements Closeable {
       String metric = functionColumnPair.toColumnName();
       File metricIndexFile =
           new File(starTreeIndexDir, metric + 
V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
-      indexMap.put(new IndexKey(IndexType.FORWARD_INDEX, metric), 
writeFile(metricIndexFile));
+      indexMap.add(Pair.of(new IndexKey(IndexType.FORWARD_INDEX, metric), 
writeFile(metricIndexFile)));
     }
 
     FileUtils.cleanDirectory(starTreeIndexDir);
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
index 87ea5214cd..56e778a38b 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
@@ -21,10 +21,10 @@ package org.apache.pinot.segment.local.startree.v2.builder;
 import com.google.common.base.Preconditions;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
+import java.util.TreeSet;
 import org.apache.commons.lang3.builder.ToStringBuilder;
 import org.apache.commons.lang3.builder.ToStringStyle;
 import org.apache.pinot.segment.spi.AggregationFunctionType;
@@ -56,7 +56,7 @@ public class StarTreeV2BuilderConfig {
 
     Set<String> skipStarNodeCreationForDimensions;
     if (indexConfig.getSkipStarNodeCreationForDimensions() != null) {
-      skipStarNodeCreationForDimensions = new 
HashSet<>(indexConfig.getSkipStarNodeCreationForDimensions());
+      skipStarNodeCreationForDimensions = new 
TreeSet<>(indexConfig.getSkipStarNodeCreationForDimensions());
       
Preconditions.checkArgument(dimensionsSplitOrder.containsAll(skipStarNodeCreationForDimensions),
           "Can not skip star-node creation for dimensions not in the split 
order, dimensionsSplitOrder: %s, "
               + "skipStarNodeCreationForDimensions: %s",
@@ -65,7 +65,7 @@ public class StarTreeV2BuilderConfig {
       skipStarNodeCreationForDimensions = Collections.emptySet();
     }
 
-    Set<AggregationFunctionColumnPair> functionColumnPairs = new HashSet<>();
+    Set<AggregationFunctionColumnPair> functionColumnPairs = new TreeSet<>();
     for (String functionColumnPair : indexConfig.getFunctionColumnPairs()) {
       
functionColumnPairs.add(AggregationFunctionColumnPair.fromColumnName(functionColumnPair));
     }
@@ -145,7 +145,7 @@ public class StarTreeV2BuilderConfig {
     }
     Preconditions.checkState(!dimensionsSplitOrder.isEmpty(), "No qualified 
dimension found for star-tree split order");
 
-    Set<AggregationFunctionColumnPair> functionColumnPairs = new HashSet<>();
+    Set<AggregationFunctionColumnPair> functionColumnPairs = new TreeSet<>();
     functionColumnPairs.add(AggregationFunctionColumnPair.COUNT_STAR);
     for (String numericMetric : numericMetrics) {
       functionColumnPairs.add(new 
AggregationFunctionColumnPair(AggregationFunctionType.SUM, numericMetric));
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
index d50e507372..3b989d3c79 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
@@ -22,6 +22,7 @@ import com.google.common.base.Preconditions;
 import java.io.File;
 import java.io.InputStream;
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -30,9 +31,9 @@ import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.spi.env.CommonsConfigurationUtils;
 
-
 /**
  * The {@code StarTreeIndexMapUtils} class is a utility class to store/load 
star-tree index map to/from file.
  * <p>
@@ -81,7 +82,7 @@ public class StarTreeIndexMapUtils {
   /**
    * Key of the index map.
    */
-  public static class IndexKey {
+  public static class IndexKey implements Comparable<IndexKey> {
     public final IndexType _indexType;
     // For star-tree index, column will be null
     public final String _column;
@@ -112,6 +113,14 @@ public class StarTreeIndexMapUtils {
         return false;
       }
     }
+
+    @Override
+    public int compareTo(IndexKey other) {
+      return Comparator
+          .comparing((IndexKey i) -> i._column, 
Comparator.nullsLast(Comparator.naturalOrder()))
+          .thenComparing((IndexKey i) -> i._indexType)
+          .compare(this, other);
+    }
   }
 
   /**
@@ -138,14 +147,14 @@ public class StarTreeIndexMapUtils {
   /**
    * Stores the index maps for multiple star-trees into a file.
    */
-  public static void storeToFile(List<Map<IndexKey, IndexValue>> indexMaps, 
File indexMapFile) {
+  public static void storeToFile(List<List<Pair<IndexKey, IndexValue>>> 
indexMaps, File indexMapFile) {
     Preconditions.checkState(!indexMapFile.exists(), "Star-tree index map file 
already exists");
 
     PropertiesConfiguration configuration = 
CommonsConfigurationUtils.fromFile(indexMapFile);
     int numStarTrees = indexMaps.size();
     for (int i = 0; i < numStarTrees; i++) {
-      Map<IndexKey, IndexValue> indexMap = indexMaps.get(i);
-      for (Map.Entry<IndexKey, IndexValue> entry : indexMap.entrySet()) {
+      List<Pair<IndexKey, IndexValue>> indexMap = indexMaps.get(i);
+      for (Pair<IndexKey, IndexValue> entry : indexMap) {
         IndexKey key = entry.getKey();
         IndexValue value = entry.getValue();
         configuration.addProperty(key.getPropertyName(i, OFFSET_SUFFIX), 
value._offset);
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
index c2eaba1142..8a60975940 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
@@ -24,10 +24,9 @@ import java.nio.ByteOrder;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils;
 import org.apache.pinot.segment.spi.AggregationFunctionType;
 import org.apache.pinot.segment.spi.creator.SegmentVersion;
@@ -88,26 +87,26 @@ public class StarTreeIndexReaderTest {
         Collections.singleton(new 
AggregationFunctionColumnPair(AggregationFunctionType.SUM, "dimX")));
     
when(_segmentMetadata.getStarTreeV2MetadataList()).thenReturn(Arrays.asList(stMeta1,
 stMeta2));
     // Mock the offset/sizes for the index buffers.
-    List<Map<StarTreeIndexMapUtils.IndexKey, 
StarTreeIndexMapUtils.IndexValue>> indexMaps = new ArrayList<>();
-    Map<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue> 
indexMap = new HashMap<>();
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
-        new StarTreeIndexMapUtils.IndexValue(0, 1));
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dim0"),
-        new StarTreeIndexMapUtils.IndexValue(1, 1));
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dim1"),
-        new StarTreeIndexMapUtils.IndexValue(2, 1));
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"count__*"),
-        new StarTreeIndexMapUtils.IndexValue(3, 1));
+    List<List<Pair<StarTreeIndexMapUtils.IndexKey, 
StarTreeIndexMapUtils.IndexValue>>> indexMaps = new ArrayList<>();
+    List<Pair<StarTreeIndexMapUtils.IndexKey, 
StarTreeIndexMapUtils.IndexValue>> indexMap = new ArrayList<>();
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
+        new StarTreeIndexMapUtils.IndexValue(0, 1)));
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dim0"),
+        new StarTreeIndexMapUtils.IndexValue(1, 1)));
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dim1"),
+        new StarTreeIndexMapUtils.IndexValue(2, 1)));
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"count__*"),
+        new StarTreeIndexMapUtils.IndexValue(3, 1)));
     indexMaps.add(indexMap);
-    indexMap = new HashMap<>();
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
-        new StarTreeIndexMapUtils.IndexValue(10, 3));
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dimX"),
-        new StarTreeIndexMapUtils.IndexValue(13, 3));
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dimY"),
-        new StarTreeIndexMapUtils.IndexValue(16, 3));
-    indexMap.put(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"sum__dimX"),
-        new StarTreeIndexMapUtils.IndexValue(19, 3));
+    indexMap = new ArrayList<>();
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
+        new StarTreeIndexMapUtils.IndexValue(10, 3)));
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dimX"),
+        new StarTreeIndexMapUtils.IndexValue(13, 3)));
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"dimY"),
+        new StarTreeIndexMapUtils.IndexValue(16, 3)));
+    indexMap.add(Pair.of(new 
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX, 
"sum__dimX"),
+        new StarTreeIndexMapUtils.IndexValue(19, 3)));
     indexMaps.add(indexMap);
     File indexMapFile = new File(TEMP_DIR, 
StarTreeV2Constants.INDEX_MAP_FILE_NAME);
     StarTreeIndexMapUtils.storeToFile(indexMaps, indexMapFile);
diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
index 86f0cc7cd1..e77c65d028 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
@@ -18,10 +18,11 @@
  */
 package org.apache.pinot.segment.spi.index.startree;
 
+import java.util.Comparator;
 import org.apache.pinot.segment.spi.AggregationFunctionType;
 
 
-public class AggregationFunctionColumnPair {
+public class AggregationFunctionColumnPair implements 
Comparable<AggregationFunctionColumnPair> {
   public static final String DELIMITER = "__";
   public static final String STAR = "*";
   public static final AggregationFunctionColumnPair COUNT_STAR =
@@ -86,4 +87,11 @@ public class AggregationFunctionColumnPair {
   public String toString() {
     return toColumnName();
   }
+
+  @Override
+  public int compareTo(AggregationFunctionColumnPair other) {
+    return Comparator.comparing((AggregationFunctionColumnPair o) -> o._column)
+        .thenComparing((AggregationFunctionColumnPair o) -> o._functionType)
+        .compare(this, other);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to