imay commented on a change in pull request #1709: Make the max recursion depth
of distribution pruner configurable
URL: https://github.com/apache/incubator-doris/pull/1709#discussion_r317951999
##########
File path:
fe/src/main/java/org/apache/doris/planner/HashDistributionPruner.java
##########
@@ -34,43 +35,55 @@
import java.util.Map;
import java.util.Set;
+/*
+ * Prune the distribution by distribution columns' predicate, recursively.
+ * It only supports binary equal predicate and in predicate with AND
combination.
+ * For example:
+ * where a = 1 and b in (2,3,4) and c in (5,6,7)
+ * a/b/c are distribution columns
+ *
+ * the config 'max_distribution_pruner_recursion_depth' will limit the max
recursion depth of pruning.
+ * the recursion depth is calculated by the product of element number of all
predicates.
+ * The above example's depth is 9(= 1 * 3 * 3)
+ *
+ * If depth is larger than 'max_distribution_pruner_recursion_depth', all
buckets will be return without pruning.
+ */
public class HashDistributionPruner implements DistributionPruner {
private static final Logger LOG =
LogManager.getLogger(HashDistributionPruner.class);
// partition list, sort by the hash code
- private List<Long> partitionList;
+ private List<Long> bucketsList;
// partition columns
- private List<Column> partitionColumns;
+ private List<Column> distributionColumns;
// partition column filters
- private Map<String, PartitionColumnFilter> partitionColumnFilters;
+ private Map<String, PartitionColumnFilter> distributionColumnFilters;
private int hashMod;
- HashDistributionPruner(List<Long> partitions, List<Column> columns,
+ HashDistributionPruner(List<Long> bucketsList, List<Column> columns,
Map<String, PartitionColumnFilter> filters, int
hashMod) {
- this.partitionList = partitions;
- this.partitionColumns = columns;
- this.partitionColumnFilters = filters;
+ this.bucketsList = bucketsList;
+ this.distributionColumns = columns;
+ this.distributionColumnFilters = filters;
this.hashMod = hashMod;
}
// columnId: which column to compute
// hashKey: the key which to compute hash value
public Collection<Long> prune(int columnId, PartitionKey hashKey, int
complex) {
- if (columnId == partitionColumns.size()) {
+ if (columnId == distributionColumns.size()) {
// compute Hash Key
long hashValue = hashKey.getHashValue();
- return Lists.newArrayList(
- partitionList.get((int) ((hashValue & 0xffffffff) %
hashMod)));
+ return Lists.newArrayList(bucketsList.get((int) ((hashValue &
0xffffffff) % hashMod)));
}
- Column keyColumn = partitionColumns.get(columnId);
- PartitionColumnFilter filter =
partitionColumnFilters.get(keyColumn.getName());
+ Column keyColumn = distributionColumns.get(columnId);
+ PartitionColumnFilter filter =
distributionColumnFilters.get(keyColumn.getName());
if (null == filter) {
// no filter in this column, no partition Key
// return all subPartition
- return Lists.newArrayList(partitionList);
+ return Lists.newArrayList(bucketsList);
}
InPredicate inPredicate = filter.getInPredicate();
- if (null == inPredicate || inPredicate.getChildren().size() * complex
> 100) {
+ if (null == inPredicate || inPredicate.getChildren().size() * complex
> Config.max_distribution_pruner_recursion_depth) {
Review comment:
children().size() - 1
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]