This is an automated email from the ASF dual-hosted git repository. jiaguo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new e23d17aab4 Dynamic evaluation of GroupBy Initial Capacity (#14001) e23d17aab4 is described below commit e23d17aab416fbfb6e5362983a8574fc58b07913 Author: praveenc7 <praveenkchagan...@gmail.com> AuthorDate: Thu Oct 31 12:42:44 2024 -0700 Dynamic evaluation of GroupBy Initial Capacity (#14001) * Dynamic evaluation of GroupBy Initial Capacity * Add test * fix build * nested predicate * more test * fix linter * test * Query option * remove file * Bounded & unbounded solution * unit test * address review comments * address review comments * review comments --- .../common/utils/config/QueryOptionsUtils.java | 4 ++ .../groupby/DefaultGroupByExecutor.java | 66 +++++++++++++++++++++- .../groupby/DictionaryBasedGroupKeyGenerator.java | 34 ++++++++++- .../NoDictionaryMultiColumnGroupKeyGenerator.java | 23 +++++++- .../NoDictionarySingleColumnGroupKeyGenerator.java | 11 +++- .../DictionaryBasedGroupKeyGeneratorTest.java | 61 +++++++++++++++++--- .../groupby/NoDictionaryGroupKeyGeneratorTest.java | 4 +- .../apache/pinot/spi/utils/CommonConstants.java | 6 ++ 8 files changed, 188 insertions(+), 21 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java index 0ec3374aea..bcc82efbf5 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java @@ -261,6 +261,10 @@ public class QueryOptionsUtils { return checkedParseInt(QueryOptionKey.MAX_INITIAL_RESULT_HOLDER_CAPACITY, maxInitResultCap); } + public static boolean optimizeMaxInitialResultHolderCapacityEnabled(Map<String, String> queryOptions) { + return Boolean.parseBoolean(queryOptions.get(QueryOptionKey.OPTIMIZE_MAX_INITIAL_RESULT_HOLDER_CAPACITY)); + } + @Nullable public static Integer getGroupTrimThreshold(Map<String, String> queryOptions) { String groupByTrimThreshold = queryOptions.get(QueryOptionKey.GROUP_TRIM_THRESHOLD); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java index 133385a524..9134c0476c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java @@ -19,9 +19,16 @@ package org.apache.pinot.core.query.aggregation.groupby; import java.util.Collection; +import java.util.HashSet; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nullable; import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FilterContext; +import org.apache.pinot.common.request.context.predicate.InPredicate; +import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.common.utils.config.QueryOptionsUtils; import org.apache.pinot.core.common.BlockValSet; import org.apache.pinot.core.data.table.IntermediateRecord; import org.apache.pinot.core.data.table.TableResizer; @@ -88,6 +95,11 @@ public class DefaultGroupByExecutor implements GroupByExecutor { // Initialize group key generator int numGroupsLimit = queryContext.getNumGroupsLimit(); int maxInitialResultHolderCapacity = queryContext.getMaxInitialResultHolderCapacity(); + Map<ExpressionContext, Integer> groupByExpressionSizesFromPredicates = null; + if (queryContext.getQueryOptions() != null + && QueryOptionsUtils.optimizeMaxInitialResultHolderCapacityEnabled(queryContext.getQueryOptions())) { + groupByExpressionSizesFromPredicates = getGroupByExpressionSizesFromPredicates(queryContext); + } if (groupKeyGenerator != null) { _groupKeyGenerator = groupKeyGenerator; } else { @@ -96,15 +108,15 @@ public class DefaultGroupByExecutor implements GroupByExecutor { // TODO(nhejazi): support MV and dictionary based when null handling is enabled. _groupKeyGenerator = new NoDictionarySingleColumnGroupKeyGenerator(projectOperator, groupByExpressions[0], numGroupsLimit, - _nullHandlingEnabled); + _nullHandlingEnabled, groupByExpressionSizesFromPredicates); } else { _groupKeyGenerator = new NoDictionaryMultiColumnGroupKeyGenerator(projectOperator, groupByExpressions, numGroupsLimit, - _nullHandlingEnabled); + _nullHandlingEnabled, groupByExpressionSizesFromPredicates); } } else { _groupKeyGenerator = new DictionaryBasedGroupKeyGenerator(projectOperator, groupByExpressions, numGroupsLimit, - maxInitialResultHolderCapacity); + maxInitialResultHolderCapacity, groupByExpressionSizesFromPredicates); } } @@ -127,6 +139,54 @@ public class DefaultGroupByExecutor implements GroupByExecutor { } } + /** + * Retrieve the sizes of GroupBy expressions from IN an EQ predicates found in the filter context, if available. + * 1. If the filter context is null or lacks GroupBy expressions, return null. + * 2. Ensure the top-level filter context consists solely of AND-type filters; other types for example OR we cannot + * guarantee deterministic sizes for GroupBy expressions. + */ + private Map<ExpressionContext, Integer> getGroupByExpressionSizesFromPredicates(QueryContext queryContext) { + FilterContext filterContext = queryContext.getFilter(); + if (filterContext == null || queryContext.getGroupByExpressions() == null) { + return null; + } + + Set<Predicate> predicateColumns = new HashSet<>(); + if (filterContext.getType() == FilterContext.Type.AND) { + for (FilterContext child : filterContext.getChildren()) { + FilterContext.Type type = child.getType(); + if (type != FilterContext.Type.PREDICATE && type != FilterContext.Type.AND) { + return null; + } else if (child.getPredicate() != null) { + predicateColumns.add(child.getPredicate()); + } + } + } else if (filterContext.getPredicate() != null) { + predicateColumns.add(filterContext.getPredicate()); + } else { + return null; + } + + // Collect IN and EQ predicates and store their sizes + Map<ExpressionContext, Integer> predicateSizeMap = predicateColumns.stream() + .filter(predicate -> predicate.getType() == Predicate.Type.IN || predicate.getType() == Predicate.Type.EQ) + .collect(Collectors.toMap( + Predicate::getLhs, + predicate -> (predicate.getType() == Predicate.Type.IN) + ? ((InPredicate) predicate).getValues().size() + : 1, + Integer::min + )); + + // Populate the group-by expressions with sizes from the predicate map + return queryContext.getGroupByExpressions().stream() + .filter(predicateSizeMap::containsKey) + .collect(Collectors.toMap( + expression -> expression, + expression -> predicateSizeMap.getOrDefault(expression, null) + )); + } + @Override public void process(ValueBlock valueBlock) { // Generate group keys diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java index 8650ccad9b..257e95c004 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java @@ -26,7 +26,11 @@ import it.unimi.dsi.fastutil.objects.Object2IntMap; import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import it.unimi.dsi.fastutil.objects.ObjectIterator; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; +import java.util.Map; +import javax.annotation.Nullable; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.common.request.context.ExpressionContext; import org.apache.pinot.core.common.BlockValSet; import org.apache.pinot.core.operator.BaseProjectOperator; @@ -99,7 +103,8 @@ public class DictionaryBasedGroupKeyGenerator implements GroupKeyGenerator { private final RawKeyHolder _rawKeyHolder; public DictionaryBasedGroupKeyGenerator(BaseProjectOperator<?> projectOperator, - ExpressionContext[] groupByExpressions, int numGroupsLimit, int arrayBasedThreshold) { + ExpressionContext[] groupByExpressions, int numGroupsLimit, int arrayBasedThreshold, + @Nullable Map<ExpressionContext, Integer> groupByExpressionSizesFromPredicates) { assert numGroupsLimit >= arrayBasedThreshold; _groupByExpressions = groupByExpressions; @@ -113,7 +118,7 @@ public class DictionaryBasedGroupKeyGenerator implements GroupKeyGenerator { // no need to intern dictionary values when there is only one group by expression because // only one call will be made to the dictionary to extract each raw value. _internedDictionaryValues = _numGroupByExpressions > 1 ? new Object[_numGroupByExpressions][] : null; - + Map<ExpressionContext, Integer> cardinalityMap = new HashMap<>(_numGroupByExpressions); long cardinalityProduct = 1L; boolean longOverflow = false; for (int i = 0; i < _numGroupByExpressions; i++) { @@ -123,6 +128,7 @@ public class DictionaryBasedGroupKeyGenerator implements GroupKeyGenerator { assert _dictionaries[i] != null; int cardinality = _dictionaries[i].length(); _cardinalities[i] = cardinality; + cardinalityMap.put(groupByExpression, cardinality); if (_internedDictionaryValues != null && cardinality < MAX_DICTIONARY_INTERN_TABLE_SIZE) { _internedDictionaryValues[i] = new Object[cardinality]; } @@ -135,6 +141,14 @@ public class DictionaryBasedGroupKeyGenerator implements GroupKeyGenerator { } _isSingleValueColumn[i] = columnContext.isSingleValue(); } + if (groupByExpressionSizesFromPredicates != null) { + Pair<Boolean, Long> optimizedCardinality = getOptimizedGroupByCardinality(groupByExpressionSizesFromPredicates, + cardinalityMap); + if (optimizedCardinality.getLeft() && optimizedCardinality.getRight() != null) { + longOverflow = false; + cardinalityProduct = Math.min(optimizedCardinality.getRight(), cardinalityProduct); + } + } // TODO: Clear the holder after processing the query instead of before if (longOverflow) { // ArrayMapBasedHolder @@ -171,6 +185,22 @@ public class DictionaryBasedGroupKeyGenerator implements GroupKeyGenerator { } } + private Pair<Boolean, Long> getOptimizedGroupByCardinality(Map<ExpressionContext, Integer> groupByExpressionSizes, + Map<ExpressionContext, Integer> columnCardinalityMap) { + long maxInitialResultHolderCapacity = 1L; + for (Map.Entry<ExpressionContext, Integer> entry : columnCardinalityMap.entrySet()) { + Integer cardinality = entry.getValue(); + Integer size = groupByExpressionSizes.get(entry.getKey()); + int minSize = size != null ? Math.min(size, cardinality) : cardinality; + if (maxInitialResultHolderCapacity > Long.MAX_VALUE / minSize) { + return Pair.of(false, null); + } else { + maxInitialResultHolderCapacity *= minSize; + } + } + return Pair.of(true, maxInitialResultHolderCapacity); + } + @Override public int getGlobalGroupKeyUpperBound() { return _globalGroupIdUpperBound; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java index 9c7cf193a6..26857e2dd2 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java @@ -24,6 +24,7 @@ import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import it.unimi.dsi.fastutil.objects.ObjectIterator; import java.util.Arrays; import java.util.Iterator; +import java.util.Map; import org.apache.pinot.common.request.context.ExpressionContext; import org.apache.pinot.core.common.BlockValSet; import org.apache.pinot.core.operator.BaseProjectOperator; @@ -59,9 +60,11 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat private final boolean[] _isSingleValueExpressions; private final int _numGroupsLimit; private final boolean _nullHandlingEnabled; + private final int _globalGroupIdUpperBound; public NoDictionaryMultiColumnGroupKeyGenerator(BaseProjectOperator<?> projectOperator, - ExpressionContext[] groupByExpressions, int numGroupsLimit, boolean nullHandlingEnabled) { + ExpressionContext[] groupByExpressions, int numGroupsLimit, boolean nullHandlingEnabled, + Map<ExpressionContext, Integer> groupByExpressionSizesFromPredicates) { _groupByExpressions = groupByExpressions; _numGroupByExpressions = groupByExpressions.length; _storedTypes = new DataType[_numGroupByExpressions]; @@ -69,7 +72,8 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat _onTheFlyDictionaries = new ValueToIdMap[_numGroupByExpressions]; _isSingleValueExpressions = new boolean[_numGroupByExpressions]; _nullHandlingEnabled = nullHandlingEnabled; - + int optimizedGroupByUpperBound = 1; + boolean canOptimizeGroupByUpperBound = groupByExpressionSizesFromPredicates != null; for (int i = 0; i < _numGroupByExpressions; i++) { ExpressionContext groupByExpression = groupByExpressions[i]; ColumnContext columnContext = projectOperator.getResultColumnContext(groupByExpression); @@ -80,17 +84,30 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat } else { _onTheFlyDictionaries[i] = ValueToIdMapFactory.get(_storedTypes[i]); } + if (canOptimizeGroupByUpperBound) { + Integer size = groupByExpressionSizesFromPredicates.get(groupByExpression); + if (size == null) { + canOptimizeGroupByUpperBound = false; + } else { + if (optimizedGroupByUpperBound > Integer.MAX_VALUE / size) { + optimizedGroupByUpperBound = numGroupsLimit; + } else { + optimizedGroupByUpperBound *= size; + } + } + } _isSingleValueExpressions[i] = columnContext.isSingleValue(); } _groupKeyMap = new Object2IntOpenHashMap<>(); _groupKeyMap.defaultReturnValue(INVALID_ID); _numGroupsLimit = numGroupsLimit; + _globalGroupIdUpperBound = canOptimizeGroupByUpperBound ? optimizedGroupByUpperBound : numGroupsLimit; } @Override public int getGlobalGroupKeyUpperBound() { - return _numGroupsLimit; + return _globalGroupIdUpperBound; } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java index 768bfec1be..de378b8dcf 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionarySingleColumnGroupKeyGenerator.java @@ -33,6 +33,7 @@ import java.math.BigDecimal; import java.util.Arrays; import java.util.Iterator; import java.util.Map; +import javax.annotation.Nullable; import org.apache.pinot.common.request.context.ExpressionContext; import org.apache.pinot.core.common.BlockValSet; import org.apache.pinot.core.operator.BaseProjectOperator; @@ -64,12 +65,18 @@ public class NoDictionarySingleColumnGroupKeyGenerator implements GroupKeyGenera private int _numGroups = 0; public NoDictionarySingleColumnGroupKeyGenerator(BaseProjectOperator<?> projectOperator, - ExpressionContext groupByExpression, int numGroupsLimit, boolean nullHandlingEnabled) { + ExpressionContext groupByExpression, int numGroupsLimit, boolean nullHandlingEnabled, + @Nullable Map<ExpressionContext, Integer> groupByExpressionSizesFromPredicates) { _groupByExpression = groupByExpression; ColumnContext columnContext = projectOperator.getResultColumnContext(groupByExpression); _storedType = columnContext.getDataType().getStoredType(); _groupKeyMap = createGroupKeyMap(_storedType); - _globalGroupIdUpperBound = numGroupsLimit; + if (groupByExpressionSizesFromPredicates != null) { + Integer size = groupByExpressionSizesFromPredicates.get(groupByExpression); + _globalGroupIdUpperBound = size != null ? Math.min(size, numGroupsLimit) : numGroupsLimit; + } else { + _globalGroupIdUpperBound = numGroupsLimit; + } _nullHandlingEnabled = nullHandlingEnabled; _isSingleValueExpression = columnContext.isSingleValue(); } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java index a0d61c0e52..c6c5cded9c 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java @@ -54,6 +54,7 @@ import org.apache.pinot.spi.utils.ReadMode; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; @@ -167,7 +168,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), UNIQUE_ROWS, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), UNIQUE_ROWS, _errorMessage); @@ -187,7 +188,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -208,7 +209,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -229,7 +230,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -264,7 +265,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); int groupKeyUpperBound = dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), groupKeyUpperBound, _errorMessage); @@ -285,7 +286,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -308,7 +309,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -330,7 +331,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -350,7 +351,7 @@ public class DictionaryBasedGroupKeyGeneratorTest { // NOTE: arrayBasedThreshold must be smaller or equal to numGroupsLimit DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), numGroupsLimit, - numGroupsLimit); + numGroupsLimit, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), numGroupsLimit, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -431,6 +432,48 @@ public class DictionaryBasedGroupKeyGeneratorTest { GroupKeyGenerator.INVALID_ID); } + @Test(dataProvider = "groupByResultHolderCapacityDataProvider") + public void testGetGroupByResultHolderCapacity(String query, Integer expectedCapacity) { + query = query + "SET optimizeMaxInitialResultHolderCapacity=true"; + QueryContext queryContext = QueryContextConverterUtils.getQueryContext(query); + List<ExpressionContext> expressionContextList = queryContext.getGroupByExpressions(); + ExpressionContext[] expressions = + expressionContextList.toArray(new ExpressionContext[expressionContextList.size()]); + DefaultGroupByExecutor defaultGroupByExecutor = + new DefaultGroupByExecutor(queryContext, expressions, _projectOperator); + assertEquals(defaultGroupByExecutor.getGroupKeyGenerator().getGlobalGroupKeyUpperBound(), expectedCapacity, + _errorMessage); + } + + @DataProvider(name = "groupByResultHolderCapacityDataProvider") + public Object[][] groupByResultHolderCapacityDataProvider() { + return new Object[][]{ + // Single IN predicate + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 IN (1, 2, 3, 4, 5) GROUP BY s1 LIMIT 10;", 5}, + // Multiple IN predicates but only one used in group-by + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 IN (1, 2, 3) AND s2 IN (4, 5) GROUP BY s1 LIMIT 10;", 3}, + // Multiple IN predicates used in group-by + {"SELECT COUNT(s9), s1, s3 FROM testTable WHERE s1 IN (1, 2, 3) AND s3 IN (4, 5) GROUP BY s1, s3 LIMIT 10;", 6}, + // Single EQ predicate + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 = 1 GROUP BY s1 LIMIT 10;", 1}, + // Multiple EQ predicates but only one used in group-by + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 = 1 AND s2 = 4 GROUP BY s1 LIMIT 10;", 1}, + // Mixed predicates + {"SELECT COUNT(s9), s1, s3 FROM testTable WHERE s1 IN (1, 2, 3) AND s3 = 4 GROUP BY s1, s3 LIMIT 10;", 3}, + {"SELECT COUNT(*), s1, s3 FROM testTable WHERE s1 = 1 AND s3 IN (4, 5) GROUP BY s1, s3 LIMIT 10;", 2}, + // No filter -> s1 has cardinality 100 + {"SELECT COUNT(s9), s1 FROM testTable GROUP BY s1 LIMIT 1000;", 100}, + // No matching filter EQ predicate in group-by expression -> s2 has cardinality 100 + {"SELECT COUNT(s9), s2 FROM testTable WHERE s1 = 1 GROUP BY s2 LIMIT 1000;", 100}, + // No matching filter IN predicate in group-by expression -> s2 has cardinality 100 + {"SELECT COUNT(s9), s2 FROM testTable WHERE s1 IN (1, 2, 3) GROUP BY s2 LIMIT 1000;", 100}, + // Only one matching filter predicate in group-by expression -> (3 [s1] * 100 [s2]) = 300 + {"SELECT COUNT(s9), s1, s2 FROM testTable WHERE s1 IN (1, 2, 3) GROUP BY s1, s2 LIMIT 1000;", 300}, + // OR Predicate -> (100 [s1] * 100 [s2]) = 1000 [Just cardinality cross product] + {"SELECT COUNT(s9), s1, s2 FROM testTable WHERE s1 IN (1, 2, 3) OR s2 > 1 GROUP BY s1, s2 LIMIT 20000;", 10000}, + }; + } + @AfterClass public void tearDown() { FileUtils.deleteQuietly(new File(INDEX_DIR_PATH)); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java index fa2882c192..8ac8bc745b 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryGroupKeyGeneratorTest.java @@ -200,14 +200,14 @@ public class NoDictionaryGroupKeyGeneratorTest { if (numGroupByColumns == 1) { groupKeyGenerator = new NoDictionarySingleColumnGroupKeyGenerator(_projectOperator, ExpressionContext.forIdentifier(COLUMNS.get(groupByColumnIndexes[0])), - InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false); + InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false, null); } else { ExpressionContext[] groupByExpressions = new ExpressionContext[numGroupByColumns]; for (int i = 0; i < numGroupByColumns; i++) { groupByExpressions[i] = ExpressionContext.forIdentifier(COLUMNS.get(groupByColumnIndexes[i])); } groupKeyGenerator = new NoDictionaryMultiColumnGroupKeyGenerator(_projectOperator, groupByExpressions, - InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false); + InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, false, null); } groupKeyGenerator.generateKeysForBlock(_valueBlock, new int[NUM_RECORDS]); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java index a89db355b5..e92f1306a4 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java @@ -480,6 +480,12 @@ public class CommonConstants { // will be returned - this query option can be set. This is useful for performance, since indexes can be used // for the aggregation filters and a full scan can be avoided. public static final String FILTERED_AGGREGATIONS_SKIP_EMPTY_GROUPS = "filteredAggregationsSkipEmptyGroups"; + + // When set to true, the max initial result holder capacity will be optimized based on the query. Rather than + // using the default value. This is best-effort for now and returns the default value if the optimization is not + // possible. + public static final String OPTIMIZE_MAX_INITIAL_RESULT_HOLDER_CAPACITY = + "optimizeMaxInitialResultHolderCapacity"; } public static class QueryOptionValue { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org