bkuang88 commented on a change in pull request #5832: URL: https://github.com/apache/incubator-pinot/pull/5832#discussion_r467344354
########## File path: pinot-core/src/test/java/org/apache/pinot/queries/DistinctCountThetaSketchTest.java ########## @@ -117,60 +121,77 @@ public void testGroupBySql() { testThetaSketches(true, true); } + @Test(expectedExceptions = BadQueryRequestException.class, dataProvider = "badQueries") + public void testInvalidNoPredicates(final String query) { + getBrokerResponseForSqlQuery(query); + } + + @DataProvider(name = "badQueries") + public Object[][] badQueries() { + return new Object[][] { + // need at least 4 arguments in agg func + {"select distinctCountThetaSketch(colTS, 'nominalEntries=123', '$0') from testTable"}, + // substitution arguments should start at $1 + {"select distinctCountThetaSketch(colTS, 'nominalEntries=123', 'colA = 1', '$0') from testTable"}, + // substituting variable has numeric value higher than the number of predicates provided + {"select distinctCountThetaSketch(colTS, 'nominalEntries=123', 'colA = 1', '$5') from testTable"}, + // SET_DIFF requires exactly 2 arguments + {"select distinctCountThetaSketch(colTS, 'nominalEntries=123', 'colA = 1', 'SET_DIFF($1)') from testTable"}, + // invalid merging function + {"select distinctCountThetaSketch(colTS, 'nominalEntries=123', 'colA = 1', 'asdf') from testTable"} + }; + } + private void testThetaSketches(boolean groupBy, boolean sql) { String tsQuery, distinctQuery; String thetaSketchParams = "nominalEntries=1001"; List<String> predicateStrings = Collections.singletonList("colA = 1"); + String substitution = "$1"; String whereClause = Strings.join(predicateStrings, " or "); - tsQuery = buildQuery(whereClause, thetaSketchParams, predicateStrings, whereClause, groupBy, false); + tsQuery = buildQuery(whereClause, thetaSketchParams, predicateStrings, substitution, groupBy, false); Review comment: I think this test wasn't testing any aggregations. It was just selecting a sketch without any aggregations, so I didn't touch it. Would you like me to get rid of this test then? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -108,35 +136,24 @@ public DistinctCountThetaSketchAggregationFunction(List<ExpressionContext> argum Preconditions.checkArgument(paramsExpression.getType() == ExpressionContext.Type.LITERAL, "Last argument of DistinctCountThetaSketch must be literal (post-aggregation expression)"); _postAggregationExpression = QueryContextConverterUtils - .getFilter(CalciteSqlParser.compileToExpression(postAggregationExpression.getLiteral())); + .getExpression(CalciteSqlParser.compileToExpression(postAggregationExpression.getLiteral())); // Initialize the predicate map _predicateInfoMap = new HashMap<>(); - if (numArguments > 3) { - // Predicates are explicitly specified - for (int i = 2; i < numArguments - 1; i++) { - ExpressionContext predicateExpression = arguments.get(i); - Preconditions.checkArgument(predicateExpression.getType() == ExpressionContext.Type.LITERAL, - "Third to second last argument of DistinctCountThetaSketch must be literal (predicate expression)"); - Predicate predicate = getPredicate(predicateExpression.getLiteral()); - _inputExpressions.add(predicate.getLhs()); - _predicateInfoMap.put(predicate, new PredicateInfo(predicate)); - } - } else { - // Auto-derive predicates from the post-aggregation expression - Stack<FilterContext> stack = new Stack<>(); - stack.push(_postAggregationExpression); - while (!stack.isEmpty()) { - FilterContext filter = stack.pop(); - if (filter.getType() == FilterContext.Type.PREDICATE) { - Predicate predicate = filter.getPredicate(); - _inputExpressions.add(predicate.getLhs()); - _predicateInfoMap.put(predicate, new PredicateInfo(predicate)); - } else { - stack.addAll(filter.getChildren()); - } - } + + // Predicates are explicitly specified Review comment: Wonder if that introduces more confusion than flexibility. Because we currently don't want to support complex predicates right? Wouldn't the users make more mistakes or be confused as to when they can use complex predicates and when they cannot? And if we do start to introduce complex queries in the future, will the syntax be "UNION($1, $2)" or "$1 and $2" or "colA='a' or colB='b'" or "UNION(colA='a', colB='b')"? Given the vast possibilities out there, I'm wondering if it's better to just stick to a single syntax so that it doesn't cause confusion in the future in case we do want to modify the predicate complexities. What do you guys think? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -57,11 +61,31 @@ * <p>TODO: For performance concern, use {@code List<Sketch>} as the intermediate result. */ public class DistinctCountThetaSketchAggregationFunction implements AggregationFunction<Map<String, Sketch>, Long> { + + public enum MergeFunction { + SET_UNION, SET_INTERSECT, SET_DIFF; Review comment: I think there are some SQL keywords in there. Do we want to mix real SQL keywords with theta sketch merging functions? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org