This is an automated email from the ASF dual-hosted git repository.

jiaguo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new e23d17aab4 Dynamic evaluation of GroupBy Initial Capacity (#14001)
e23d17aab4 is described below

commit e23d17aab416fbfb6e5362983a8574fc58b07913
Author: praveenc7 <praveenkchagan...@gmail.com>
AuthorDate: Thu Oct 31 12:42:44 2024 -0700

    Dynamic evaluation of GroupBy Initial Capacity (#14001)
    
    * Dynamic evaluation of GroupBy Initial Capacity
    
    * Add test
    
    * fix build
    
    * nested predicate
    
    * more test
    
    * fix linter
    
    * test
    
    * Query option
    
    * remove file
    
    * Bounded & unbounded solution
    
    * unit test
    
    * address review comments
    
    * address review comments
    
    * review comments
---
 .../common/utils/config/QueryOptionsUtils.java     |  4 ++
 .../groupby/DefaultGroupByExecutor.java            | 66 +++++++++++++++++++++-
 .../groupby/DictionaryBasedGroupKeyGenerator.java  | 34 ++++++++++-
 .../NoDictionaryMultiColumnGroupKeyGenerator.java  | 23 +++++++-
 .../NoDictionarySingleColumnGroupKeyGenerator.java | 11 +++-
 .../DictionaryBasedGroupKeyGeneratorTest.java      | 61 +++++++++++++++++---
 .../groupby/NoDictionaryGroupKeyGeneratorTest.java |  4 +-
 .../apache/pinot/spi/utils/CommonConstants.java    |  6 ++
 8 files changed, 188 insertions(+), 21 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java
index 0ec3374aea..bcc82efbf5 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java
@@ -261,6 +261,10 @@ public class QueryOptionsUtils {
     return checkedParseInt(QueryOptionKey.MAX_INITIAL_RESULT_HOLDER_CAPACITY, 
maxInitResultCap);
   }
 
+  public static boolean 
optimizeMaxInitialResultHolderCapacityEnabled(Map<String, String> queryOptions) 
{
+    return 
Boolean.parseBoolean(queryOptions.get(QueryOptionKey.OPTIMIZE_MAX_INITIAL_RESULT_HOLDER_CAPACITY));
+  }
+
   @Nullable
   public static Integer getGroupTrimThreshold(Map<String, String> 
queryOptions) {
     String groupByTrimThreshold = 
queryOptions.get(QueryOptionKey.GROUP_TRIM_THRESHOLD);
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java
index 133385a524..9134c0476c 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java
@@ -19,9 +19,16 @@
 package org.apache.pinot.core.query.aggregation.groupby;
 
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.FilterContext;
+import org.apache.pinot.common.request.context.predicate.InPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.common.utils.config.QueryOptionsUtils;
 import org.apache.pinot.core.common.BlockValSet;
 import org.apache.pinot.core.data.table.IntermediateRecord;
 import org.apache.pinot.core.data.table.TableResizer;
@@ -88,6 +95,11 @@ public class DefaultGroupByExecutor implements 
GroupByExecutor {
     // Initialize group key generator
     int numGroupsLimit = queryContext.getNumGroupsLimit();
     int maxInitialResultHolderCapacity = 
queryContext.getMaxInitialResultHolderCapacity();
+    Map<ExpressionContext, Integer> groupByExpressionSizesFromPredicates = 
null;
+    if (queryContext.getQueryOptions() != null
+        && 
QueryOptionsUtils.optimizeMaxInitialResultHolderCapacityEnabled(queryContext.getQueryOptions()))
 {
+      groupByExpressionSizesFromPredicates = 
getGroupByExpressionSizesFromPredicates(queryContext);
+    }
     if (groupKeyGenerator != null) {
       _groupKeyGenerator = groupKeyGenerator;
     } else {
@@ -96,15 +108,15 @@ public class DefaultGroupByExecutor implements 
GroupByExecutor {
           // TODO(nhejazi): support MV and dictionary based when null handling 
is enabled.
           _groupKeyGenerator =
               new NoDictionarySingleColumnGroupKeyGenerator(projectOperator, 
groupByExpressions[0], numGroupsLimit,
-                  _nullHandlingEnabled);
+                  _nullHandlingEnabled, groupByExpressionSizesFromPredicates);
         } else {
           _groupKeyGenerator =
               new NoDictionaryMultiColumnGroupKeyGenerator(projectOperator, 
groupByExpressions, numGroupsLimit,
-                  _nullHandlingEnabled);
+                  _nullHandlingEnabled, groupByExpressionSizesFromPredicates);
         }
       } else {
         _groupKeyGenerator = new 
DictionaryBasedGroupKeyGenerator(projectOperator, groupByExpressions, 
numGroupsLimit,
-            maxInitialResultHolderCapacity);
+            maxInitialResultHolderCapacity, 
groupByExpressionSizesFromPredicates);
       }
     }
 
@@ -127,6 +139,54 @@ public class DefaultGroupByExecutor implements 
GroupByExecutor {
     }
   }
 
+  /**
+   * Retrieve the sizes of GroupBy expressions from IN an EQ predicates found 
in the filter context, if available.
+   * 1. If the filter context is null or lacks GroupBy expressions, return 
null.
+   * 2. Ensure the top-level filter context consists solely of AND-type 
filters; other types for example OR we cannot
+   *    guarantee deterministic sizes for GroupBy expressions.
+   */
+  private Map<ExpressionContext, Integer> 
getGroupByExpressionSizesFromPredicates(QueryContext queryContext) {
+    FilterContext filterContext = queryContext.getFilter();
+    if (filterContext == null || queryContext.getGroupByExpressions() == null) 
{
+      return null;
+    }
+
+    Set<Predicate> predicateColumns = new HashSet<>();
+    if (filterContext.getType() == FilterContext.Type.AND) {
+      for (FilterContext child : filterContext.getChildren()) {
+        FilterContext.Type type = child.getType();
+        if (type != FilterContext.Type.PREDICATE && type != 
FilterContext.Type.AND) {
+          return null;
+        } else if (child.getPredicate() != null) {
+          predicateColumns.add(child.getPredicate());
+        }
+      }
+    } else if (filterContext.getPredicate() != null) {
+      predicateColumns.add(filterContext.getPredicate());
+    } else {
+      return null;
+    }
+
+    // Collect IN and EQ predicates and store their sizes
+    Map<ExpressionContext, Integer> predicateSizeMap = 
predicateColumns.stream()
+        .filter(predicate -> predicate.getType() == Predicate.Type.IN || 
predicate.getType() == Predicate.Type.EQ)
+        .collect(Collectors.toMap(
+            Predicate::getLhs,
+            predicate -> (predicate.getType() == Predicate.Type.IN)
+                ? ((InPredicate) predicate).getValues().size()
+                : 1,
+            Integer::min
+        ));
+
+    // Populate the group-by expressions with sizes from the predicate map
+    return queryContext.getGroupByExpressions().stream()
+        .filter(predicateSizeMap::containsKey)
+        .collect(Collectors.toMap(
+            expression -> expression,
+            expression -> predicateSizeMap.getOrDefault(expression, null)
+        ));
+  }
+
   @Override
   public void process(ValueBlock valueBlock) {
     // Generate group keys
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java
index 8650ccad9b..257e95c004 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java
@@ -26,7 +26,11 @@ import it.unimi.dsi.fastutil.objects.Object2IntMap;
 import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
 import it.unimi.dsi.fastutil.objects.ObjectIterator;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.core.common.BlockValSet;
 import org.apache.pinot.core.operator.BaseProjectOperator;
@@ -99,7 +103,8 @@ public class DictionaryBasedGroupKeyGenerator implements 
GroupKeyGenerator {
   private final RawKeyHolder _rawKeyHolder;
 
   public DictionaryBasedGroupKeyGenerator(BaseProjectOperator<?> 
projectOperator,
-      ExpressionContext[] groupByExpressions, int numGroupsLimit, int 
arrayBasedThreshold) {
+      ExpressionContext[] groupByExpressions, int numGroupsLimit, int 
arrayBasedThreshold,
+      @Nullable Map<ExpressionContext, Integer> 
groupByExpressionSizesFromPredicates) {
     assert numGroupsLimit >= arrayBasedThreshold;
 
     _groupByExpressions = groupByExpressions;
@@ -113,7 +118,7 @@ public class DictionaryBasedGroupKeyGenerator implements 
GroupKeyGenerator {
     // no need to intern dictionary values when there is only one group by 
expression because
     // only one call will be made to the dictionary to extract each raw value.
     _internedDictionaryValues = _numGroupByExpressions > 1 ? new 
Object[_numGroupByExpressions][] : null;
-
+    Map<ExpressionContext, Integer> cardinalityMap = new 
HashMap<>(_numGroupByExpressions);
     long cardinalityProduct = 1L;
     boolean longOverflow = false;
     for (int i = 0; i < _numGroupByExpressions; i++) {
@@ -123,6 +128,7 @@ public class DictionaryBasedGroupKeyGenerator implements 
GroupKeyGenerator {
       assert _dictionaries[i] != null;
       int cardinality = _dictionaries[i].length();
       _cardinalities[i] = cardinality;
+      cardinalityMap.put(groupByExpression, cardinality);
       if (_internedDictionaryValues != null && cardinality < 
MAX_DICTIONARY_INTERN_TABLE_SIZE) {
         _internedDictionaryValues[i] = new Object[cardinality];
       }
@@ -135,6 +141,14 @@ public class DictionaryBasedGroupKeyGenerator implements 
GroupKeyGenerator {
       }
       _isSingleValueColumn[i] = columnContext.isSingleValue();
     }
+    if (groupByExpressionSizesFromPredicates != null) {
+       Pair<Boolean, Long> optimizedCardinality = 
getOptimizedGroupByCardinality(groupByExpressionSizesFromPredicates,
+           cardinalityMap);
+       if (optimizedCardinality.getLeft() && optimizedCardinality.getRight() 
!= null) {
+         longOverflow = false;
+         cardinalityProduct = Math.min(optimizedCardinality.getRight(), 
cardinalityProduct);
+       }
+    }
     // TODO: Clear the holder after processing the query instead of before
     if (longOverflow) {
       // ArrayMapBasedHolder
@@ -171,6 +185,22 @@ public class DictionaryBasedGroupKeyGenerator implements 
GroupKeyGenerator {
     }
   }
 
+  private Pair<Boolean, Long> 
getOptimizedGroupByCardinality(Map<ExpressionContext, Integer> 
groupByExpressionSizes,
+      Map<ExpressionContext, Integer> columnCardinalityMap) {
+    long maxInitialResultHolderCapacity = 1L;
+    for (Map.Entry<ExpressionContext, Integer> entry : 
columnCardinalityMap.entrySet()) {
+      Integer cardinality = entry.getValue();
+      Integer size = groupByExpressionSizes.get(entry.getKey());
+      int minSize = size != null ? Math.min(size, cardinality) : cardinality;
+      if (maxInitialResultHolderCapacity > Long.MAX_VALUE / minSize) {
+        return Pair.of(false, null);
+      } else {
+        maxInitialResultHolderCapacity *= minSize;
+      }
+    }
+    return Pair.of(true, maxInitialResultHolderCapacity);
+  }
+
   @Override
   public int getGlobalGroupKeyUpperBound() {
     return _globalGroupIdUpperBound;
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
index 9c7cf193a6..26857e2dd2 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
@@ -24,6 +24,7 @@ import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
 import it.unimi.dsi.fastutil.objects.ObjectIterator;
 import java.util.Arrays;
 import java.util.Iterator;
+import java.util.Map;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.core.common.BlockValSet;
 import org.apache.pinot.core.operator.BaseProjectOperator;
@@ -59,9 +60,11 @@ public class NoDictionaryMultiColumnGroupKeyGenerator 
implements GroupKeyGenerat
   private final boolean[] _isSingleValueExpressions;
   private final int _numGroupsLimit;
   private final boolean _nullHandlingEnabled;
+  private final int _globalGroupIdUpperBound;
 
   public NoDictionaryMultiColumnGroupKeyGenerator(BaseProjectOperator<?> 
projectOperator,
-      ExpressionContext[] groupByExpressions, int numGroupsLimit, boolean 
nullHandlingEnabled) {
+      ExpressionContext[] groupByExpressions, int numGroupsLimit, boolean 
nullHandlingEnabled,
+      Map<ExpressionContext, Integer> groupByExpressionSizesFromPredicates) {
     _groupByExpressions = groupByExpressions;
     _numGroupByExpressions = groupByExpressions.length;
     _storedTypes = new DataType[_numGroupByExpressions];
@@ -69,7 +72,8 @@ public class NoDictionaryMultiColumnGroupKeyGenerator 
implements GroupKeyGenerat
     _onTheFlyDictionaries = new ValueToIdMap[_numGroupByExpressions];
     _isSingleValueExpressions = new boolean[_numGroupByExpressions];
     _nullHandlingEnabled = nullHandlingEnabled;
-
+    int optimizedGroupByUpperBound = 1;
+    boolean canOptimizeGroupByUpperBound = 
groupByExpressionSizesFromPredicates != null;
     for (int i = 0; i < _numGroupByExpressions; i++) {
       ExpressionContext groupByExpression = groupByExpressions[i];
       ColumnContext columnContext = 
projectOperator.getResultColumnContext(groupByExpression);
@@ -80,17 +84,30 @@ public class NoDictionaryMultiColumnGroupKeyGenerator 
implements GroupKeyGenerat
       } else {
         _onTheFlyDictionaries[i] = ValueToIdMapFactory.get(_storedTypes[i]);
       }
+      if (canOptimizeGroupByUpperBound) {
+        Integer size = 
groupByExpressionSizesFromPredicates.get(groupByExpression);
+        if (size == null) {
+          canOptimizeGroupByUpperBound = false;
+        } else {
+          if (optimizedGroupByUpperBound > Integer.MAX_VALUE / size) {
+            optimizedGroupByUpperBound = numGroupsLimit;
+          } else {
+            optimizedGroupByUpperBound *= size;
+          }
+        }
+      }
       _isSingleValueExpressions[i] = columnContext.isSingleValue();
     }
 
     _groupKeyMap = new Object2IntOpenHashMap<>();
     _groupKeyMap.defaultReturnValue(INVALID_ID);
     _numGroupsLimit = numGroupsLimit;
+    _globalGroupIdUpperBound = canOptimizeGroupByUpperBound ? 
optimizedGroupByUpperBound : numGroupsLimit;
   }
 
   @Override
   public int getGlobalGroupKeyUpperBound() {
-    return _numGroupsLimit;
+    return _globalGroupIdUpperBound;
   }
 
   @Override
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java
index 768bfec1be..de378b8dcf 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java
@@ -33,6 +33,7 @@ import java.math.BigDecimal;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.Map;
+import javax.annotation.Nullable;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.core.common.BlockValSet;
 import org.apache.pinot.core.operator.BaseProjectOperator;
@@ -64,12 +65,18 @@ public class NoDictionarySingleColumnGroupKeyGenerator 
implements GroupKeyGenera
   private int _numGroups = 0;
 
   public NoDictionarySingleColumnGroupKeyGenerator(BaseProjectOperator<?> 
projectOperator,
-      ExpressionContext groupByExpression, int numGroupsLimit, boolean 
nullHandlingEnabled) {
+      ExpressionContext groupByExpression, int numGroupsLimit, boolean 
nullHandlingEnabled,
+      @Nullable Map<ExpressionContext, Integer> 
groupByExpressionSizesFromPredicates) {
     _groupByExpression = groupByExpression;
     ColumnContext columnContext = 
projectOperator.getResultColumnContext(groupByExpression);
     _storedType = columnContext.getDataType().getStoredType();
     _groupKeyMap = createGroupKeyMap(_storedType);
-    _globalGroupIdUpperBound = numGroupsLimit;
+    if (groupByExpressionSizesFromPredicates != null) {
+      Integer size = 
groupByExpressionSizesFromPredicates.get(groupByExpression);
+      _globalGroupIdUpperBound = size != null ? Math.min(size, numGroupsLimit) 
: numGroupsLimit;
+    } else {
+      _globalGroupIdUpperBound = numGroupsLimit;
+    }
     _nullHandlingEnabled = nullHandlingEnabled;
     _isSingleValueExpression = columnContext.isSingleValue();
   }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java
index a0d61c0e52..c6c5cded9c 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java
@@ -54,6 +54,7 @@ import org.apache.pinot.spi.utils.ReadMode;
 import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
@@ -167,7 +168,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), 
UNIQUE_ROWS, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
UNIQUE_ROWS, _errorMessage);
 
@@ -187,7 +188,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(),
         InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
0, _errorMessage);
@@ -208,7 +209,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(),
         InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
0, _errorMessage);
@@ -229,7 +230,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(),
         InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
0, _errorMessage);
@@ -264,7 +265,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     int groupKeyUpperBound = 
dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound();
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
groupKeyUpperBound, _errorMessage);
 
@@ -285,7 +286,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(),
         InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
0, _errorMessage);
@@ -308,7 +309,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(),
         InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
0, _errorMessage);
@@ -330,7 +331,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns),
             InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT,
-            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY);
+            
InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(),
         InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
0, _errorMessage);
@@ -350,7 +351,7 @@ public class DictionaryBasedGroupKeyGeneratorTest {
     // NOTE: arrayBasedThreshold must be smaller or equal to numGroupsLimit
     DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator =
         new DictionaryBasedGroupKeyGenerator(_projectOperator, 
getExpressions(groupByColumns), numGroupsLimit,
-            numGroupsLimit);
+            numGroupsLimit, null);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), 
numGroupsLimit, _errorMessage);
     
assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 
0, _errorMessage);
 
@@ -431,6 +432,48 @@ public class DictionaryBasedGroupKeyGeneratorTest {
         GroupKeyGenerator.INVALID_ID);
   }
 
+  @Test(dataProvider = "groupByResultHolderCapacityDataProvider")
+  public void testGetGroupByResultHolderCapacity(String query, Integer 
expectedCapacity) {
+    query = query + "SET optimizeMaxInitialResultHolderCapacity=true";
+    QueryContext queryContext = 
QueryContextConverterUtils.getQueryContext(query);
+    List<ExpressionContext> expressionContextList = 
queryContext.getGroupByExpressions();
+    ExpressionContext[] expressions =
+        expressionContextList.toArray(new 
ExpressionContext[expressionContextList.size()]);
+    DefaultGroupByExecutor defaultGroupByExecutor =
+        new DefaultGroupByExecutor(queryContext, expressions, 
_projectOperator);
+    
assertEquals(defaultGroupByExecutor.getGroupKeyGenerator().getGlobalGroupKeyUpperBound(),
 expectedCapacity,
+        _errorMessage);
+  }
+
+  @DataProvider(name = "groupByResultHolderCapacityDataProvider")
+  public Object[][] groupByResultHolderCapacityDataProvider() {
+    return new Object[][]{
+        // Single IN predicate
+        {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 IN (1, 2, 3, 4, 5) 
GROUP BY s1 LIMIT 10;", 5},
+        // Multiple IN predicates but only one used in group-by
+        {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 IN (1, 2, 3) AND s2 IN 
(4, 5) GROUP BY s1 LIMIT 10;", 3},
+        // Multiple IN predicates used in group-by
+        {"SELECT COUNT(s9), s1, s3 FROM testTable WHERE s1 IN (1, 2, 3) AND s3 
IN (4, 5) GROUP BY s1, s3 LIMIT 10;", 6},
+        // Single EQ predicate
+        {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 = 1 GROUP BY s1 LIMIT 
10;", 1},
+        // Multiple EQ predicates but only one used in group-by
+        {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 = 1 AND s2 = 4 GROUP BY 
s1 LIMIT 10;", 1},
+        // Mixed predicates
+        {"SELECT COUNT(s9), s1, s3 FROM testTable WHERE s1 IN (1, 2, 3) AND s3 
= 4 GROUP BY s1, s3 LIMIT 10;", 3},
+        {"SELECT COUNT(*), s1, s3 FROM testTable WHERE s1 = 1 AND s3 IN (4, 5) 
GROUP BY s1, s3 LIMIT 10;", 2},
+        // No filter -> s1 has cardinality 100
+        {"SELECT COUNT(s9), s1 FROM testTable GROUP BY s1 LIMIT 1000;", 100},
+        // No matching filter EQ predicate in group-by expression -> s2 has 
cardinality 100
+        {"SELECT COUNT(s9), s2 FROM testTable WHERE s1 = 1 GROUP BY s2 LIMIT 
1000;", 100},
+        // No matching filter IN predicate in group-by expression  -> s2 has 
cardinality 100
+        {"SELECT COUNT(s9), s2 FROM testTable WHERE s1 IN (1, 2, 3) GROUP BY 
s2 LIMIT 1000;", 100},
+        // Only one matching filter predicate in group-by expression  -> (3 
[s1] * 100 [s2]) = 300
+        {"SELECT COUNT(s9), s1, s2 FROM testTable WHERE s1 IN (1, 2, 3) GROUP 
BY s1, s2 LIMIT 1000;", 300},
+        // OR Predicate  -> (100 [s1] * 100 [s2]) = 1000 [Just cardinality 
cross product]
+        {"SELECT COUNT(s9), s1, s2 FROM testTable WHERE s1 IN (1, 2, 3) OR s2 
> 1 GROUP BY s1, s2 LIMIT 20000;", 10000},
+    };
+  }
+
   @AfterClass
   public void tearDown() {
     FileUtils.deleteQuietly(new File(INDEX_DIR_PATH));
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java
index fa2882c192..8ac8bc745b 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java
@@ -200,14 +200,14 @@ public class NoDictionaryGroupKeyGeneratorTest {
     if (numGroupByColumns == 1) {
       groupKeyGenerator = new 
NoDictionarySingleColumnGroupKeyGenerator(_projectOperator,
           
ExpressionContext.forIdentifier(COLUMNS.get(groupByColumnIndexes[0])),
-          InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false);
+          InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false, null);
     } else {
       ExpressionContext[] groupByExpressions = new 
ExpressionContext[numGroupByColumns];
       for (int i = 0; i < numGroupByColumns; i++) {
         groupByExpressions[i] = 
ExpressionContext.forIdentifier(COLUMNS.get(groupByColumnIndexes[i]));
       }
       groupKeyGenerator = new 
NoDictionaryMultiColumnGroupKeyGenerator(_projectOperator, groupByExpressions,
-          InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false);
+          InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false, null);
     }
     groupKeyGenerator.generateKeysForBlock(_valueBlock, new int[NUM_RECORDS]);
 
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index a89db355b5..e92f1306a4 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -480,6 +480,12 @@ public class CommonConstants {
         // will be returned - this query option can be set. This is useful for 
performance, since indexes can be used
         // for the aggregation filters and a full scan can be avoided.
         public static final String FILTERED_AGGREGATIONS_SKIP_EMPTY_GROUPS = 
"filteredAggregationsSkipEmptyGroups";
+
+        // When set to true, the max initial result holder capacity will be 
optimized based on the query. Rather than
+        // using the default value. This is best-effort for now and returns 
the default value if the optimization is not
+        // possible.
+        public static final String OPTIMIZE_MAX_INITIAL_RESULT_HOLDER_CAPACITY 
=
+            "optimizeMaxInitialResultHolderCapacity";
       }
 
       public static class QueryOptionValue {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to