Jackie-Jiang commented on a change in pull request #7664: URL: https://github.com/apache/pinot/pull/7664#discussion_r740447764
########## File path: pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java ########## @@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) { return tableName; } + private static Set<String> getSegmentPartitionedColumns(TableCache tableCache, String tableName) { + final TableConfig offlineTableConfig = + tableCache.getTableConfig(TableNameBuilder.OFFLINE.tableNameWithType(tableName)); + final TableConfig realtimeTableConfig = + tableCache.getTableConfig(TableNameBuilder.REALTIME.tableNameWithType(tableName)); + if (offlineTableConfig == null) { + return getSegmentPartitionedColumns(realtimeTableConfig); + } + if (realtimeTableConfig == null) { + return getSegmentPartitionedColumns(offlineTableConfig); + } + Set<String> segmentPartitionedColumns = getSegmentPartitionedColumns(offlineTableConfig); + segmentPartitionedColumns.retainAll(getSegmentPartitionedColumns(realtimeTableConfig)); + return segmentPartitionedColumns; + } + + private static Set<String> getSegmentPartitionedColumns(TableConfig tableConfig) { Review comment: (minor) ```suggestion private static Set<String> getSegmentPartitionedColumns(@Nullable TableConfig tableConfig) { ``` ########## File path: pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java ########## @@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) { return tableName; } + private static Set<String> getSegmentPartitionedColumns(TableCache tableCache, String tableName) { + final TableConfig offlineTableConfig = + tableCache.getTableConfig(TableNameBuilder.OFFLINE.tableNameWithType(tableName)); + final TableConfig realtimeTableConfig = + tableCache.getTableConfig(TableNameBuilder.REALTIME.tableNameWithType(tableName)); + if (offlineTableConfig == null) { + return getSegmentPartitionedColumns(realtimeTableConfig); + } + if (realtimeTableConfig == null) { + return getSegmentPartitionedColumns(offlineTableConfig); + } + Set<String> segmentPartitionedColumns = getSegmentPartitionedColumns(offlineTableConfig); + segmentPartitionedColumns.retainAll(getSegmentPartitionedColumns(realtimeTableConfig)); + return segmentPartitionedColumns; + } + + private static Set<String> getSegmentPartitionedColumns(TableConfig tableConfig) { + Set<String> segmentPartitionedColumns = new HashSet<>(); + if (tableConfig == null) { + return segmentPartitionedColumns; + } + List<FieldConfig> fieldConfigs = tableConfig.getFieldConfigList(); + if (fieldConfigs != null) { + for (FieldConfig fieldConfig : fieldConfigs) { + if (fieldConfig.getProperties() != null && "true".equalsIgnoreCase( + fieldConfig.getProperties().getOrDefault(FieldConfig.IS_SEGMENT_PARTITIONED_COLUMN_KEY, "false"))) { Review comment: (minor) ```suggestion if (fieldConfig.getProperties() != null && Boolean.parseBoolean( fieldConfig.getProperties().get(FieldConfig.IS_SEGMENT_PARTITIONED_COLUMN_KEY))) { ``` ########## File path: pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java ########## @@ -1155,6 +1217,58 @@ private static void handleDistinctCountBitmapOverride(BrokerRequest brokerReques } } + /** + * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given SQL query. + */ + @VisibleForTesting + static void handleSegmentPartitionedDistinctCountOverride(PinotQuery pinotQuery, + Set<String> segmentPartitionedColumns) { + if (segmentPartitionedColumns.isEmpty()) { + return; + } + for (Expression expression : pinotQuery.getSelectList()) { + handleSegmentPartitionedDistinctCountOverride(expression, segmentPartitionedColumns); + } + List<Expression> orderByExpressions = pinotQuery.getOrderByList(); + if (orderByExpressions != null) { + for (Expression expression : orderByExpressions) { + // NOTE: Order-by is always a Function with the ordering of the Expression + handleSegmentPartitionedDistinctCountOverride(expression.getFunctionCall().getOperands().get(0), + segmentPartitionedColumns); + } + } + Expression havingExpression = pinotQuery.getHavingExpression(); + if (havingExpression != null) { + handleSegmentPartitionedDistinctCountOverride(havingExpression, segmentPartitionedColumns); + } + } + + /** + * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given SQL expression. + */ + private static void handleSegmentPartitionedDistinctCountOverride(Expression expression, + Set<String> segmentPartitionedColumns) { + Function function = expression.getFunctionCall(); + if (function == null) { + return; + } + if (StringUtils.remove(function.getOperator(), '_') + .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) { + final Set<String> identifiers = + CalciteSqlParser.extractIdentifiers(expression.getFunctionCall().getOperands(), true); Review comment: (major) This is incorrect. We should not extract all the identifiers, but only check the single top-level identifier. There is no guarantee that the udf is still segment partitioned ########## File path: pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java ########## @@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) { return tableName; } + private static Set<String> getSegmentPartitionedColumns(TableCache tableCache, String tableName) { Review comment: Add some javadoc on why we should intersect the columns? ########## File path: pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java ########## @@ -1155,6 +1218,59 @@ private static void handleDistinctCountBitmapOverride(BrokerRequest brokerReques } } + /** + * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given SQL query. + */ + @VisibleForTesting + static void handleSegmentPartitionedDistinctCountOverride(PinotQuery pinotQuery, + Set<String> segmentPartitionedColumns) { + if (segmentPartitionedColumns.isEmpty()) { + return; + } + for (Expression expression : pinotQuery.getSelectList()) { + handleSegmentPartitionedDistinctCountOverride(expression, segmentPartitionedColumns); + } + List<Expression> orderByExpressions = pinotQuery.getOrderByList(); + if (orderByExpressions != null) { + for (Expression expression : orderByExpressions) { + // NOTE: Order-by is always a Function with the ordering of the Expression + handleSegmentPartitionedDistinctCountOverride(expression.getFunctionCall().getOperands().get(0), + segmentPartitionedColumns); + } + } + Expression havingExpression = pinotQuery.getHavingExpression(); + if (havingExpression != null) { + handleSegmentPartitionedDistinctCountOverride(havingExpression, segmentPartitionedColumns); + } + } + + /** + * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given SQL expression. + */ + private static void handleSegmentPartitionedDistinctCountOverride(Expression expression, + Set<String> segmentPartitionedColumns) { + Function function = expression.getFunctionCall(); + if (function == null) { + return; + } + if (StringUtils.remove(function.getOperator(), '_') + .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) { + final Set<String> identifiers = + expression.getFunctionCall().getOperands().stream().filter(expr -> expr.isSetIdentifier()) + .map(expr -> expr.getIdentifier().getName()).collect(Collectors.toUnmodifiableSet()); Review comment: Suggest checking if there is a single identifier to avoid unexpected rewrite ```suggestion List<Expression> operands = function.getOperands(); if (operands.size() == 1 && operands.get(0).getType() == IDENTIFIER && segmentPartitionedColumns.contains(operands.get(0).getIdentifier.getName())) { function.setOperator(AggregationFunctionType.SEGMENTPARTITIONEDDISTINCTCOUNT.name()); } ``` ########## File path: pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java ########## @@ -1139,6 +1176,32 @@ static void handleQueryLimitOverride(PinotQuery pinotQuery, int queryLimit) { } } + /** + * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given PQL broker request. + */ + @Deprecated + @VisibleForTesting + static void handleSegmentPartitionedDistinctCountOverride(BrokerRequest brokerRequest, + Set<String> segmentPartitionedColumns) { + if (segmentPartitionedColumns.isEmpty()) { + return; + } + List<AggregationInfo> aggregationsInfo = brokerRequest.getAggregationsInfo(); + if (aggregationsInfo != null) { + for (AggregationInfo aggregationInfo : aggregationsInfo) { + if (StringUtils.remove(aggregationInfo.getAggregationType(), '_') + .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) { + for (String expr : aggregationInfo.getExpressions()) { Review comment: Suggest checking if there is a single identifier to avoid unexpected rewrite ```suggestion List<String> expressions = aggregationInfo.getExpressions(); if (expressions.size() == 1 && segmentPartitionedColumns.contains(expressions.get(0))) { aggregationInfo.setAggregationType(AggregationFunctionType.SEGMENTPARTITIONEDDISTINCTCOUNT.name()); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org