noon-stripe commented on code in PR #8611: URL: https://github.com/apache/pinot/pull/8611#discussion_r875071563
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java: ########## @@ -306,15 +311,71 @@ public static void validateIngestionConfig(TableConfig tableConfig, @Nullable Sc } } + // Aggregation configs + List<AggregationConfig> aggregationConfigs = ingestionConfig.getAggregationConfigs(); + Set<String> aggregationSourceColumns = new HashSet<>(); + if (aggregationConfigs != null) { + Preconditions.checkState( + !tableConfig.getIndexingConfig().isAggregateMetrics(), + "aggregateMetrics cannot be set with AggregationConfig"); + Set<String> aggregationColumns = new HashSet<>(); + for (AggregationConfig aggregationConfig : aggregationConfigs) { + String columnName = aggregationConfig.getColumnName(); + if (schema != null) { + FieldSpec fieldSpec = schema.getFieldSpecFor(columnName); + Preconditions.checkState(fieldSpec != null, "The destination column '" + columnName + + "' of the aggregation function must be present in the schema"); + Preconditions.checkState(fieldSpec.getFieldType() == FieldSpec.FieldType.METRIC, + "The destination column '" + columnName + "' of the aggregation function must be a metric column"); + } + String aggregationFunction = aggregationConfig.getAggregationFunction(); + if (columnName == null || aggregationFunction == null) { + throw new IllegalStateException( + "columnName/aggregationFunction cannot be null in AggregationConfig " + aggregationConfig); + } + + if (!aggregationColumns.add(columnName)) { + throw new IllegalStateException("Duplicate aggregation config found for column '" + columnName + "'"); + } + ExpressionContext expressionContext; + try { + expressionContext = RequestContextUtils.getExpression(aggregationConfig.getAggregationFunction()); + } catch (Exception e) { + throw new IllegalStateException( + "Invalid aggregation function '" + aggregationFunction + "' for column '" + columnName + "'", e); + } + Preconditions.checkState(expressionContext.getType() == ExpressionContext.Type.FUNCTION, + "aggregation function must be a function for: %s", aggregationConfig); + + FunctionContext functionContext = expressionContext.getFunction(); + validateIngestionAggregation(functionContext.getFunctionName()); + Preconditions.checkState(functionContext.getArguments().size() == 1, + "aggregation function can only have one argument: %s", aggregationConfig); + + ExpressionContext argument = functionContext.getArguments().get(0); + Preconditions.checkState(argument.getType() == ExpressionContext.Type.IDENTIFIER, + "aggregator function argument must be a identifier: %s", aggregationConfig); + + aggregationSourceColumns.add(argument.getIdentifier()); + } + if (schema != null) { + Preconditions.checkState(new HashSet<>(schema.getMetricNames()).equals(aggregationColumns), Review Comment: That's a good suggestion, but I actually prefer to have the aggregation config be a requirement. Otherwise, you're altering the data without informing the user. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org