Jackie-Jiang commented on a change in pull request #7664:
URL: https://github.com/apache/pinot/pull/7664#discussion_r740447764



##########
File path: 
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) {
     return tableName;
   }
 
+  private static Set<String> getSegmentPartitionedColumns(TableCache 
tableCache, String tableName) {
+    final TableConfig offlineTableConfig =
+        
tableCache.getTableConfig(TableNameBuilder.OFFLINE.tableNameWithType(tableName));
+    final TableConfig realtimeTableConfig =
+        
tableCache.getTableConfig(TableNameBuilder.REALTIME.tableNameWithType(tableName));
+    if (offlineTableConfig == null) {
+      return getSegmentPartitionedColumns(realtimeTableConfig);
+    }
+    if (realtimeTableConfig == null) {
+      return getSegmentPartitionedColumns(offlineTableConfig);
+    }
+    Set<String> segmentPartitionedColumns = 
getSegmentPartitionedColumns(offlineTableConfig);
+    
segmentPartitionedColumns.retainAll(getSegmentPartitionedColumns(realtimeTableConfig));
+    return segmentPartitionedColumns;
+  }
+
+  private static Set<String> getSegmentPartitionedColumns(TableConfig 
tableConfig) {

Review comment:
       (minor)
   ```suggestion
     private static Set<String> getSegmentPartitionedColumns(@Nullable 
TableConfig tableConfig) {
   ```

##########
File path: 
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) {
     return tableName;
   }
 
+  private static Set<String> getSegmentPartitionedColumns(TableCache 
tableCache, String tableName) {
+    final TableConfig offlineTableConfig =
+        
tableCache.getTableConfig(TableNameBuilder.OFFLINE.tableNameWithType(tableName));
+    final TableConfig realtimeTableConfig =
+        
tableCache.getTableConfig(TableNameBuilder.REALTIME.tableNameWithType(tableName));
+    if (offlineTableConfig == null) {
+      return getSegmentPartitionedColumns(realtimeTableConfig);
+    }
+    if (realtimeTableConfig == null) {
+      return getSegmentPartitionedColumns(offlineTableConfig);
+    }
+    Set<String> segmentPartitionedColumns = 
getSegmentPartitionedColumns(offlineTableConfig);
+    
segmentPartitionedColumns.retainAll(getSegmentPartitionedColumns(realtimeTableConfig));
+    return segmentPartitionedColumns;
+  }
+
+  private static Set<String> getSegmentPartitionedColumns(TableConfig 
tableConfig) {
+    Set<String> segmentPartitionedColumns = new HashSet<>();
+    if (tableConfig == null) {
+      return segmentPartitionedColumns;
+    }
+    List<FieldConfig> fieldConfigs = tableConfig.getFieldConfigList();
+    if (fieldConfigs != null) {
+      for (FieldConfig fieldConfig : fieldConfigs) {
+        if (fieldConfig.getProperties() != null && "true".equalsIgnoreCase(
+            
fieldConfig.getProperties().getOrDefault(FieldConfig.IS_SEGMENT_PARTITIONED_COLUMN_KEY,
 "false"))) {

Review comment:
       (minor)
   ```suggestion
           if (fieldConfig.getProperties() != null && Boolean.parseBoolean(
               
fieldConfig.getProperties().get(FieldConfig.IS_SEGMENT_PARTITIONED_COLUMN_KEY)))
 {
   ```

##########
File path: 
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1155,6 +1217,58 @@ private static void 
handleDistinctCountBitmapOverride(BrokerRequest brokerReques
     }
   }
 
+  /**
+   * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given 
SQL query.
+   */
+  @VisibleForTesting
+  static void handleSegmentPartitionedDistinctCountOverride(PinotQuery 
pinotQuery,
+      Set<String> segmentPartitionedColumns) {
+    if (segmentPartitionedColumns.isEmpty()) {
+      return;
+    }
+    for (Expression expression : pinotQuery.getSelectList()) {
+      handleSegmentPartitionedDistinctCountOverride(expression, 
segmentPartitionedColumns);
+    }
+    List<Expression> orderByExpressions = pinotQuery.getOrderByList();
+    if (orderByExpressions != null) {
+      for (Expression expression : orderByExpressions) {
+        // NOTE: Order-by is always a Function with the ordering of the 
Expression
+        
handleSegmentPartitionedDistinctCountOverride(expression.getFunctionCall().getOperands().get(0),
+            segmentPartitionedColumns);
+      }
+    }
+    Expression havingExpression = pinotQuery.getHavingExpression();
+    if (havingExpression != null) {
+      handleSegmentPartitionedDistinctCountOverride(havingExpression, 
segmentPartitionedColumns);
+    }
+  }
+
+  /**
+   * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given 
SQL expression.
+   */
+  private static void handleSegmentPartitionedDistinctCountOverride(Expression 
expression,
+      Set<String> segmentPartitionedColumns) {
+    Function function = expression.getFunctionCall();
+    if (function == null) {
+      return;
+    }
+    if (StringUtils.remove(function.getOperator(), '_')
+        .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) {
+      final Set<String> identifiers =
+          
CalciteSqlParser.extractIdentifiers(expression.getFunctionCall().getOperands(), 
true);

Review comment:
       (major) This is incorrect. We should not extract all the identifiers, 
but only check the single top-level identifier. There is no guarantee that the 
udf is still segment partitioned

##########
File path: 
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) {
     return tableName;
   }
 
+  private static Set<String> getSegmentPartitionedColumns(TableCache 
tableCache, String tableName) {

Review comment:
       Add some javadoc on why we should intersect the columns?

##########
File path: 
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1155,6 +1218,59 @@ private static void 
handleDistinctCountBitmapOverride(BrokerRequest brokerReques
     }
   }
 
+  /**
+   * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given 
SQL query.
+   */
+  @VisibleForTesting
+  static void handleSegmentPartitionedDistinctCountOverride(PinotQuery 
pinotQuery,
+      Set<String> segmentPartitionedColumns) {
+    if (segmentPartitionedColumns.isEmpty()) {
+      return;
+    }
+    for (Expression expression : pinotQuery.getSelectList()) {
+      handleSegmentPartitionedDistinctCountOverride(expression, 
segmentPartitionedColumns);
+    }
+    List<Expression> orderByExpressions = pinotQuery.getOrderByList();
+    if (orderByExpressions != null) {
+      for (Expression expression : orderByExpressions) {
+        // NOTE: Order-by is always a Function with the ordering of the 
Expression
+        
handleSegmentPartitionedDistinctCountOverride(expression.getFunctionCall().getOperands().get(0),
+            segmentPartitionedColumns);
+      }
+    }
+    Expression havingExpression = pinotQuery.getHavingExpression();
+    if (havingExpression != null) {
+      handleSegmentPartitionedDistinctCountOverride(havingExpression, 
segmentPartitionedColumns);
+    }
+  }
+
+  /**
+   * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given 
SQL expression.
+   */
+  private static void handleSegmentPartitionedDistinctCountOverride(Expression 
expression,
+      Set<String> segmentPartitionedColumns) {
+    Function function = expression.getFunctionCall();
+    if (function == null) {
+      return;
+    }
+    if (StringUtils.remove(function.getOperator(), '_')
+        .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) {
+      final Set<String> identifiers =
+          expression.getFunctionCall().getOperands().stream().filter(expr -> 
expr.isSetIdentifier())
+              .map(expr -> 
expr.getIdentifier().getName()).collect(Collectors.toUnmodifiableSet());

Review comment:
       Suggest checking if there is a single identifier to avoid unexpected 
rewrite
   ```suggestion
         List<Expression> operands = function.getOperands();
         if (operands.size() == 1 && operands.get(0).getType() == IDENTIFIER && 
segmentPartitionedColumns.contains(operands.get(0).getIdentifier.getName())) {
           
function.setOperator(AggregationFunctionType.SEGMENTPARTITIONEDDISTINCTCOUNT.name());
         }
   ```

##########
File path: 
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1139,6 +1176,32 @@ static void handleQueryLimitOverride(PinotQuery 
pinotQuery, int queryLimit) {
     }
   }
 
+  /**
+   * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given 
PQL broker request.
+   */
+  @Deprecated
+  @VisibleForTesting
+  static void handleSegmentPartitionedDistinctCountOverride(BrokerRequest 
brokerRequest,
+      Set<String> segmentPartitionedColumns) {
+    if (segmentPartitionedColumns.isEmpty()) {
+      return;
+    }
+    List<AggregationInfo> aggregationsInfo = 
brokerRequest.getAggregationsInfo();
+    if (aggregationsInfo != null) {
+      for (AggregationInfo aggregationInfo : aggregationsInfo) {
+        if (StringUtils.remove(aggregationInfo.getAggregationType(), '_')
+            .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) {
+          for (String expr : aggregationInfo.getExpressions()) {

Review comment:
       Suggest checking if there is a single identifier to avoid unexpected 
rewrite
   ```suggestion
             List<String> expressions = aggregationInfo.getExpressions();
             if (expressions.size() == 1 && 
segmentPartitionedColumns.contains(expressions.get(0))) {
               
aggregationInfo.setAggregationType(AggregationFunctionType.SEGMENTPARTITIONEDDISTINCTCOUNT.name());
             }
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to