Jackie-Jiang commented on a change in pull request #5832: URL: https://github.com/apache/incubator-pinot/pull/5832#discussion_r467499977
########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -57,11 +61,31 @@ * <p>TODO: For performance concern, use {@code List<Sketch>} as the intermediate result. */ public class DistinctCountThetaSketchAggregationFunction implements AggregationFunction<Map<String, Sketch>, Long> { + + public enum MergeFunction { Review comment: I feel putting these functions as constant is more readable and easier to use: `public static final String UNION = "UNION";` You can directly `switch` on the `expression.getFunction().getFunctionName().toUpperCase()` and not need to worry about `MergeFunction.valueOf()` throws exception. ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -57,11 +61,31 @@ * <p>TODO: For performance concern, use {@code List<Sketch>} as the intermediate result. */ public class DistinctCountThetaSketchAggregationFunction implements AggregationFunction<Map<String, Sketch>, Long> { + + public enum MergeFunction { + SET_UNION, SET_INTERSECT, SET_DIFF; + + /** SET_INTERSECT($1, $2, $3) => "SET_INTERSECT($1,$2,$3)", useful for unit tests */ + public String apply(final String... args) { Review comment: Let's not have method just for test in production class. In the test you should test different functions instead of the standardized one (e.g. `Intersect($1, $2, $3)`, `INTERSECT($1,$2,$3)` etc.) ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -576,6 +616,69 @@ private SetOperationBuilder getSetOperationBuilder() { : SetOperation.builder().setNominalEntries(_thetaSketchParams.getNominalEntries()); } + /** + * Validates that the function context's substitution parameters ($1, $2, etc) does not exceed the number + * of predicates passed into the post-aggregation function. + * Review comment: Also validate that there are 2 arguments for `DIFF` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -57,11 +61,31 @@ * <p>TODO: For performance concern, use {@code List<Sketch>} as the intermediate result. */ public class DistinctCountThetaSketchAggregationFunction implements AggregationFunction<Map<String, Sketch>, Long> { + + public enum MergeFunction { + SET_UNION, SET_INTERSECT, SET_DIFF; + + /** SET_INTERSECT($1, $2, $3) => "SET_INTERSECT($1,$2,$3)", useful for unit tests */ + public String apply(final String... args) { + final String delimited = String.join(",", args); + return String.format("%s(%s)", name(), delimited); + } + + public static boolean isValid(final String name) { + return SET_UNION.name().equalsIgnoreCase(name) + || SET_INTERSECT.name().equalsIgnoreCase(name) + || SET_DIFF.name().equalsIgnoreCase(name); + } + } + + private static final Pattern ARGUMENT_SUBSTITUTION = Pattern.compile("\\$(\\d+)"); + private final ExpressionContext _thetaSketchColumn; private final ThetaSketchParams _thetaSketchParams; private final SetOperationBuilder _setOperationBuilder; private final List<ExpressionContext> _inputExpressions; - private final FilterContext _postAggregationExpression; + private final ExpressionContext _postAggregationExpression; + private final List<Predicate> _predicateInfoList; Review comment: ```suggestion private final List<Predicate> _predicates; ``` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -526,28 +543,51 @@ private Predicate getPredicate(String predicateString) { * passed to this method to be used when evaluating the expression. * * @param postAggregationExpression Post-aggregation expression to evaluate (modeled as a filter) + * @param expressions list of aggregation function parameters * @param sketchMap Precomputed sketches for predicates that are part of the expression. * @return Overall evaluated sketch for the expression. */ - private Sketch evalPostAggregationExpression(FilterContext postAggregationExpression, - Map<Predicate, Sketch> sketchMap) { - switch (postAggregationExpression.getType()) { - case AND: - Intersection intersection = _setOperationBuilder.buildIntersection(); - for (FilterContext child : postAggregationExpression.getChildren()) { - intersection.update(evalPostAggregationExpression(child, sketchMap)); - } - return intersection.getResult(); - case OR: - Union union = _setOperationBuilder.buildUnion(); - for (FilterContext child : postAggregationExpression.getChildren()) { - union.update(evalPostAggregationExpression(child, sketchMap)); + private Sketch evalPostAggregationExpression( + final ExpressionContext postAggregationExpression, Review comment: (Code convention) Remove `final` and reformat. Same for other methods ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -57,11 +61,31 @@ * <p>TODO: For performance concern, use {@code List<Sketch>} as the intermediate result. */ public class DistinctCountThetaSketchAggregationFunction implements AggregationFunction<Map<String, Sketch>, Long> { + + public enum MergeFunction { + SET_UNION, SET_INTERSECT, SET_DIFF; + + /** SET_INTERSECT($1, $2, $3) => "SET_INTERSECT($1,$2,$3)", useful for unit tests */ + public String apply(final String... args) { + final String delimited = String.join(",", args); + return String.format("%s(%s)", name(), delimited); + } + + public static boolean isValid(final String name) { Review comment: (Code convention) We don't use `final` within method argument or local variables. Same for other places ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -57,11 +61,31 @@ * <p>TODO: For performance concern, use {@code List<Sketch>} as the intermediate result. */ public class DistinctCountThetaSketchAggregationFunction implements AggregationFunction<Map<String, Sketch>, Long> { + + public enum MergeFunction { + SET_UNION, SET_INTERSECT, SET_DIFF; Review comment: `UNION`, `INTERSECT`, `DIFF` for concise and simplicity? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -576,6 +616,69 @@ private SetOperationBuilder getSetOperationBuilder() { : SetOperation.builder().setNominalEntries(_thetaSketchParams.getNominalEntries()); } + /** + * Validates that the function context's substitution parameters ($1, $2, etc) does not exceed the number + * of predicates passed into the post-aggregation function. + * + * For example, if the post aggregation function is: + * INTERSECT($1, $2, $3) + * + * But there are only 2 arguments passed into the aggregation function, throw an error + * @param context The parsed function context that's a tree structure + * @param numPredicates Max number of predicates available to be substituted + */ + private static void validatePostAggregationExpression(final ExpressionContext context, final int numPredicates) { + if (context.getType() == ExpressionContext.Type.LITERAL) { + throw new IllegalArgumentException("Invalid post-aggregation function expression syntax."); + } + + if (context.getType() == ExpressionContext.Type.IDENTIFIER) { + int id = extractSubstitutionPosition(context.getIdentifier()); + if (id <= 0) + throw new IllegalArgumentException("Argument substitution starts at $1"); + if (id > numPredicates) + throw new IllegalArgumentException("Argument substitution exceeded number of predicates"); + // if none of the invalid conditions are met above, exit out early + return; + } + + if (!MergeFunction.isValid(context.getFunction().getFunctionName())) { + final String allowed = + Arrays.stream(MergeFunction.values()) + .map(MergeFunction::name) + .collect(Collectors.joining(",")); + throw new IllegalArgumentException( + String.format("Invalid Theta Sketch aggregation function. Allowed: [%s]", allowed)); + } + + switch (MergeFunction.valueOf(context.getFunction().getFunctionName().toUpperCase())) { + case SET_DIFF: + // set diff can only have 2 arguments + if (context.getFunction().getArguments().size() != 2) { + throw new IllegalArgumentException("SET_DIFF function can only have 2 arguments."); + } + break; Review comment: Also validate the arguments ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -108,35 +136,24 @@ public DistinctCountThetaSketchAggregationFunction(List<ExpressionContext> argum Preconditions.checkArgument(paramsExpression.getType() == ExpressionContext.Type.LITERAL, "Last argument of DistinctCountThetaSketch must be literal (post-aggregation expression)"); _postAggregationExpression = QueryContextConverterUtils - .getFilter(CalciteSqlParser.compileToExpression(postAggregationExpression.getLiteral())); + .getExpression(CalciteSqlParser.compileToExpression(postAggregationExpression.getLiteral())); // Initialize the predicate map _predicateInfoMap = new HashMap<>(); - if (numArguments > 3) { - // Predicates are explicitly specified - for (int i = 2; i < numArguments - 1; i++) { - ExpressionContext predicateExpression = arguments.get(i); - Preconditions.checkArgument(predicateExpression.getType() == ExpressionContext.Type.LITERAL, - "Third to second last argument of DistinctCountThetaSketch must be literal (predicate expression)"); - Predicate predicate = getPredicate(predicateExpression.getLiteral()); - _inputExpressions.add(predicate.getLhs()); - _predicateInfoMap.put(predicate, new PredicateInfo(predicate)); - } - } else { - // Auto-derive predicates from the post-aggregation expression - Stack<FilterContext> stack = new Stack<>(); - stack.push(_postAggregationExpression); - while (!stack.isEmpty()) { - FilterContext filter = stack.pop(); - if (filter.getType() == FilterContext.Type.PREDICATE) { - Predicate predicate = filter.getPredicate(); - _inputExpressions.add(predicate.getLhs()); - _predicateInfoMap.put(predicate, new PredicateInfo(predicate)); - } else { - stack.addAll(filter.getChildren()); - } - } + + // Predicates are explicitly specified + for (int i = 2; i < numArguments - 1; i++) { + ExpressionContext predicateExpression = arguments.get(i); + Preconditions.checkArgument(predicateExpression.getType() == ExpressionContext.Type.LITERAL, + "Third to second last argument of DistinctCountThetaSketch must be literal (predicate expression)"); + Predicate predicate = getPredicate(predicateExpression.getLiteral()); + _inputExpressions.add(predicate.getLhs()); + _predicateInfoList.add(predicate); + _predicateInfoMap.put(predicate, new PredicateInfo(predicate)); } + + // first expression is the nominal entries parameter Review comment: (Code convention) Capitalize the first character of the comments. Same for other places ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -576,6 +616,69 @@ private SetOperationBuilder getSetOperationBuilder() { : SetOperation.builder().setNominalEntries(_thetaSketchParams.getNominalEntries()); } + /** + * Validates that the function context's substitution parameters ($1, $2, etc) does not exceed the number + * of predicates passed into the post-aggregation function. + * + * For example, if the post aggregation function is: + * INTERSECT($1, $2, $3) + * + * But there are only 2 arguments passed into the aggregation function, throw an error + * @param context The parsed function context that's a tree structure + * @param numPredicates Max number of predicates available to be substituted + */ + private static void validatePostAggregationExpression(final ExpressionContext context, final int numPredicates) { + if (context.getType() == ExpressionContext.Type.LITERAL) { + throw new IllegalArgumentException("Invalid post-aggregation function expression syntax."); + } + + if (context.getType() == ExpressionContext.Type.IDENTIFIER) { + int id = extractSubstitutionPosition(context.getIdentifier()); + if (id <= 0) + throw new IllegalArgumentException("Argument substitution starts at $1"); + if (id > numPredicates) + throw new IllegalArgumentException("Argument substitution exceeded number of predicates"); + // if none of the invalid conditions are met above, exit out early + return; + } + + if (!MergeFunction.isValid(context.getFunction().getFunctionName())) { + final String allowed = + Arrays.stream(MergeFunction.values()) + .map(MergeFunction::name) + .collect(Collectors.joining(",")); + throw new IllegalArgumentException( + String.format("Invalid Theta Sketch aggregation function. Allowed: [%s]", allowed)); + } + + switch (MergeFunction.valueOf(context.getFunction().getFunctionName().toUpperCase())) { + case SET_DIFF: + // set diff can only have 2 arguments + if (context.getFunction().getArguments().size() != 2) { + throw new IllegalArgumentException("SET_DIFF function can only have 2 arguments."); + } + break; + case SET_UNION: + case SET_INTERSECT: Review comment: Check it has more than one argument? Prevent inefficient function such as `UNION()` or `UNION($1)` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -78,9 +102,9 @@ public DistinctCountThetaSketchAggregationFunction(List<ExpressionContext> argum throws SqlParseException { int numArguments = arguments.size(); - // NOTE: This function expects at least 3 arguments: theta-sketch column, parameters, post-aggregation expression. - Preconditions.checkArgument(numArguments >= 3, - "DistinctCountThetaSketch expects at least three arguments (theta-sketch column, parameters, post-aggregation expression), got: ", + // NOTE: This function expects at least 4 arguments: theta-sketch column, parameters, post-aggregation expression. Review comment: +1 ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java ########## @@ -526,28 +543,51 @@ private Predicate getPredicate(String predicateString) { * passed to this method to be used when evaluating the expression. * * @param postAggregationExpression Post-aggregation expression to evaluate (modeled as a filter) + * @param expressions list of aggregation function parameters * @param sketchMap Precomputed sketches for predicates that are part of the expression. * @return Overall evaluated sketch for the expression. */ - private Sketch evalPostAggregationExpression(FilterContext postAggregationExpression, - Map<Predicate, Sketch> sketchMap) { - switch (postAggregationExpression.getType()) { - case AND: - Intersection intersection = _setOperationBuilder.buildIntersection(); - for (FilterContext child : postAggregationExpression.getChildren()) { - intersection.update(evalPostAggregationExpression(child, sketchMap)); - } - return intersection.getResult(); - case OR: - Union union = _setOperationBuilder.buildUnion(); - for (FilterContext child : postAggregationExpression.getChildren()) { - union.update(evalPostAggregationExpression(child, sketchMap)); + private Sketch evalPostAggregationExpression( + final ExpressionContext postAggregationExpression, + final List<Predicate> expressions, Review comment: No need to pass in this argument. Directly use member variable `_predicates` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org