praveenc7 commented on code in PR #14001: URL: https://github.com/apache/pinot/pull/14001#discussion_r1819929596
########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java: ########## @@ -127,6 +139,48 @@ public DefaultGroupByExecutor(QueryContext queryContext, AggregationFunction[] a } } + private Map<ExpressionContext, Integer> getGroupByExpressionSizesFromPredicates(QueryContext queryContext) { + FilterContext filterContext = queryContext.getFilter(); + if (filterContext == null || queryContext.getGroupByExpressions() == null) { + return null; + } + + Set<Predicate> predicateColumns = new HashSet<>(); + if (filterContext.getType() == FilterContext.Type.AND) { Review Comment: Added comment ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java: ########## @@ -127,6 +139,48 @@ public DefaultGroupByExecutor(QueryContext queryContext, AggregationFunction[] a } } + private Map<ExpressionContext, Integer> getGroupByExpressionSizesFromPredicates(QueryContext queryContext) { + FilterContext filterContext = queryContext.getFilter(); + if (filterContext == null || queryContext.getGroupByExpressions() == null) { + return null; + } + + Set<Predicate> predicateColumns = new HashSet<>(); + if (filterContext.getType() == FilterContext.Type.AND) { + for (FilterContext child : filterContext.getChildren()) { + FilterContext.Type type = child.getType(); + if (type != FilterContext.Type.PREDICATE && type != FilterContext.Type.AND) { + return null; + } else if (child.getPredicate() != null) { + predicateColumns.add(child.getPredicate()); + } + } + } else if (filterContext.getPredicate() != null) { + predicateColumns.add(filterContext.getPredicate()); + } else { + return null; + } + + // Collect IN and EQ predicates and store their sizes + Map<ExpressionContext, Integer> predicateSizeMap = predicateColumns.stream() + .filter(predicate -> predicate.getType() == Predicate.Type.IN || predicate.getType() == Predicate.Type.EQ) + .collect(Collectors.toMap( + Predicate::getLhs, + predicate -> (predicate.getType() == Predicate.Type.IN) + ? ((InPredicate) predicate).getValues().size() + : 1, + Integer::sum Review Comment: We are right, let me fix this ########## pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java: ########## @@ -431,6 +430,51 @@ public void testMapDefaultValue() { GroupKeyGenerator.INVALID_ID); } + @Test(dataProvider = "groupByResultHolderCapacityDataProvider") + public void testGetGroupByResultHolderCapacity(String query, Integer expectedCapacity) { + query = query + "SET optimizeMaxInitialResultHolderCapacity=true"; + QueryContext queryContext = QueryContextConverterUtils.getQueryContext(query); + List<ExpressionContext> expressionContextList = queryContext.getGroupByExpressions(); + ExpressionContext[] expressions = + expressionContextList.toArray(new ExpressionContext[expressionContextList.size()]); + DefaultGroupByExecutor defaultGroupByExecutor = + new DefaultGroupByExecutor(queryContext, expressions, _projectOperator); + assertEquals(defaultGroupByExecutor.getGroupKeyGenerator().getGlobalGroupKeyUpperBound(), expectedCapacity, + _errorMessage); + } + + @DataProvider(name = "groupByResultHolderCapacityDataProvider") + public Object[][] groupByResultHolderCapacityDataProvider() { + return new Object[][]{ + // Single IN predicate + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 IN (1, 2, 3, 4, 5) GROUP BY s1 LIMIT 10;", 5}, + // Multiple IN predicates but only one used in group-by + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 IN (1, 2, 3) AND s2 IN (4, 5) GROUP BY s1 LIMIT 10;", 3}, + // Multiple IN predicates used in group-by + {"SELECT COUNT(s9), s1, s3 FROM testTable WHERE s1 IN (1, 2, 3) AND s3 IN (4, 5) GROUP BY s1, s3 LIMIT 10;", 6}, + // Single EQ predicate + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 = 1 GROUP BY s1 LIMIT 10;", 1}, + // Multiple EQ predicates but only one used in group-by + {"SELECT COUNT(s9), s1 FROM testTable WHERE s1 = 1 AND s2 = 4 GROUP BY s1 LIMIT 10;", 1}, + // Mixed predicates + {"SELECT COUNT(s9), s1, s3 FROM testTable WHERE s1 IN (1, 2, 3) AND s3 = 4 GROUP BY s1, s3 LIMIT 10;", 3}, + {"SELECT COUNT(*), s1, s3 FROM testTable WHERE s1 = 1 AND s3 IN (4, 5) GROUP BY s1, s3 LIMIT 10;", 2}, + // Multiple IN Predicate columns with same column name and different values + {"SELECT COUNT(s9), s1, s2 FROM testTable WHERE s1 IN (1, 2, 3) AND s1 IN (4, 5) AND s2 IN (6, 7)" Review Comment: Let me remove this -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org