morningman commented on code in PR #27784: URL: https://github.com/apache/doris/pull/27784#discussion_r1419083020
########## fe/fe-core/src/main/java/org/apache/doris/planner/DataPartition.java: ########## @@ -53,18 +54,24 @@ public class DataPartition { public static final DataPartition RANDOM = new DataPartition(TPartitionType.RANDOM); private final TPartitionType type; + private final THashType hashType; // for hash partition: exprs used to compute hash value private ImmutableList<Expr> partitionExprs = ImmutableList.of(); public DataPartition(TPartitionType type, List<Expr> exprs) { + this(type, exprs, THashType.XXHASH64); Review Comment: Previously, the default hash type is CRC32, I think we should not change it ########## fe/fe-core/src/main/java/org/apache/doris/planner/DataPartition.java: ########## @@ -76,10 +83,15 @@ public DataPartition(TPartitionType type) { Preconditions.checkState(type == TPartitionType.UNPARTITIONED || type == TPartitionType.RANDOM); this.type = type; this.partitionExprs = ImmutableList.of(); + this.hashType = THashType.CRC32; + } + + public static DataPartition hashPartitioned(List<Expr> exprs, THashType hashType) { + return new DataPartition(TPartitionType.HASH_PARTITIONED, exprs, hashType); } public static DataPartition hashPartitioned(List<Expr> exprs) { - return new DataPartition(TPartitionType.HASH_PARTITIONED, exprs); + return new DataPartition(TPartitionType.HASH_PARTITIONED, exprs, THashType.XXHASH64); Review Comment: Previously, the default hash type is CRC32, I think we should not change it ########## fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java: ########## @@ -423,4 +430,37 @@ protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws User } return compressType; } + + @Override + public DataPartition constructInputPartitionByDistributionInfo() { + if (hmsTable.isBucketedTable()) { + DistributionInfo distributionInfo = hmsTable.getDefaultDistributionInfo(); + if (!(distributionInfo instanceof HashDistributionInfo)) { + return DataPartition.RANDOM; + } + List<Column> distributeColumns = ((HiveExternalDistributionInfo) distributionInfo).getDistributionColumns(); + List<Expr> dataDistributeExprs = Lists.newArrayList(); + for (Column column : distributeColumns) { + SlotRef slotRef = new SlotRef(desc.getRef().getName(), column.getName()); + dataDistributeExprs.add(slotRef); + } + return DataPartition.hashPartitioned(dataDistributeExprs, THashType.SPARK_MURMUR32); + } + + return DataPartition.RANDOM; + } + + public HMSExternalTable getHiveTable() { + return hmsTable; + } + + @Override + public THashType getHashType() { + if (hmsTable.isBucketedTable() + && hmsTable.getDefaultDistributionInfo() instanceof HashDistributionInfo) { Review Comment: ok -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org