ylcn91 commented on code in PR #31476: URL: https://github.com/apache/doris/pull/31476#discussion_r1503982890
########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalPartitionTopNToPhysicalPartitionTopN.java: ########## @@ -99,6 +107,66 @@ private List<PhysicalPartitionTopN<? extends Plan>> generatePhysicalPartitionTop } } + /** + * check if partition keys' ndv is almost near the total row count. + * if yes, it is not suitable for two phase global partition topn. + */ + private boolean checkTwoPhaseGlobalPartitionTopn(LogicalPartitionTopN<? extends Plan> logicalPartitionTopN) { + logicalPartitionTopN.getGroupExpression().get().getOwnerGroup().getStatistics(); + int globalPartitionTopnThreshold = ConnectContext.get().getSessionVariable().getGlobalPartitionTopNThreshold(); + if (logicalPartitionTopN.getGroupExpression().isPresent()) { + Group group = logicalPartitionTopN.getGroupExpression().get().getOwnerGroup(); + if (group != null && group.getStatistics() != null) { + Statistics stats = group.getStatistics(); + double rowCount = stats.getRowCount(); + List<Expression> partitionKeys = logicalPartitionTopN.getPartitionKeys(); + if (!checkPartitionKeys(partitionKeys)) { + return false; + } + List<ColumnStatistic> partitionByKeyStats = partitionKeys.stream() + .map(partitionKey -> { + ColumnStatistic partitionKeyStats = stats.findColumnStatistics(partitionKey); Review Comment: maybe we can simplify: .map(partitionKey -> stats.findColumnStatistics(partitionKey)) ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalPartitionTopNToPhysicalPartitionTopN.java: ########## @@ -99,6 +107,66 @@ private List<PhysicalPartitionTopN<? extends Plan>> generatePhysicalPartitionTop } } + /** + * check if partition keys' ndv is almost near the total row count. + * if yes, it is not suitable for two phase global partition topn. + */ + private boolean checkTwoPhaseGlobalPartitionTopn(LogicalPartitionTopN<? extends Plan> logicalPartitionTopN) { + logicalPartitionTopN.getGroupExpression().get().getOwnerGroup().getStatistics(); + int globalPartitionTopnThreshold = ConnectContext.get().getSessionVariable().getGlobalPartitionTopNThreshold(); + if (logicalPartitionTopN.getGroupExpression().isPresent()) { + Group group = logicalPartitionTopN.getGroupExpression().get().getOwnerGroup(); + if (group != null && group.getStatistics() != null) { + Statistics stats = group.getStatistics(); + double rowCount = stats.getRowCount(); + List<Expression> partitionKeys = logicalPartitionTopN.getPartitionKeys(); + if (!checkPartitionKeys(partitionKeys)) { + return false; + } + List<ColumnStatistic> partitionByKeyStats = partitionKeys.stream() + .map(partitionKey -> { + ColumnStatistic partitionKeyStats = stats.findColumnStatistics(partitionKey); + if (partitionKeyStats == null || partitionKeyStats.isUnKnown) { + partitionKeyStats = null; + } + return partitionKeyStats; + }).collect(Collectors.toList()); + if (partitionByKeyStats.isEmpty() || partitionByKeyStats.contains(null)) { + return false; + } else { + double maxNdv = partitionByKeyStats.stream().map(s -> s.ndv) + .max(Double::compare).get(); + if (rowCount / maxNdv >= globalPartitionTopnThreshold) { + return true; Review Comment: we can directly return rowCount / maxNdv >= globalPartitionTopnThreshold ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalPartitionTopNToPhysicalPartitionTopN.java: ########## @@ -99,6 +107,66 @@ private List<PhysicalPartitionTopN<? extends Plan>> generatePhysicalPartitionTop } } + /** + * check if partition keys' ndv is almost near the total row count. + * if yes, it is not suitable for two phase global partition topn. + */ + private boolean checkTwoPhaseGlobalPartitionTopn(LogicalPartitionTopN<? extends Plan> logicalPartitionTopN) { + logicalPartitionTopN.getGroupExpression().get().getOwnerGroup().getStatistics(); + int globalPartitionTopnThreshold = ConnectContext.get().getSessionVariable().getGlobalPartitionTopNThreshold(); + if (logicalPartitionTopN.getGroupExpression().isPresent()) { + Group group = logicalPartitionTopN.getGroupExpression().get().getOwnerGroup(); + if (group != null && group.getStatistics() != null) { + Statistics stats = group.getStatistics(); + double rowCount = stats.getRowCount(); + List<Expression> partitionKeys = logicalPartitionTopN.getPartitionKeys(); + if (!checkPartitionKeys(partitionKeys)) { + return false; + } + List<ColumnStatistic> partitionByKeyStats = partitionKeys.stream() + .map(partitionKey -> { + ColumnStatistic partitionKeyStats = stats.findColumnStatistics(partitionKey); + if (partitionKeyStats == null || partitionKeyStats.isUnKnown) { Review Comment: and we can use filter instead of null check with if. .filter(Objects::nonNull) .filter(stat -> !stat.isUnKnown) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org