Nitin-Kashyap commented on code in PR #27784:
URL: https://github.com/apache/doris/pull/27784#discussion_r1414247571


##########
fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java:
##########
@@ -438,9 +469,85 @@ public List<Column> initSchema() {
             columns = tmpSchema;
         }
         initPartitionColumns(columns);
+        initBucketingColumns(columns);
         return columns;
     }
 
+    private void initBucketingColumns(List<Column> columns) {
+        List<String> bucketCols = new ArrayList<>(5);
+        int numBuckets = getBucketColums(bucketCols);
+        if (bucketCols.isEmpty()) {
+            bucketColumns = ImmutableList.of();
+            distributionInfo = new RandomDistributionInfo(1, true);
+            return;
+        }
+
+        int bucketingVersion = 
Integer.valueOf(remoteTable.getParameters().getOrDefault(BUCKETING_VERSION,
+                "2"));
+        ImmutableList.Builder<Column> bucketColBuilder = 
ImmutableList.builder();
+        for (String colName : bucketCols) {
+            // do not use "getColum()", which will cause dead loop
+            for (Column column : columns) {
+                if (colName.equals(column.getName())) {
+                    // For partition column, if it is string type, change it 
to varchar(65535)
+                    // to be same as doris managed table.
+                    // This is to avoid some unexpected behavior such as 
different partition pruning result
+                    // between doris managed table and external table.
+                    if (column.getType().getPrimitiveType() == 
PrimitiveType.STRING) {
+                        
column.setType(ScalarType.createVarcharType(ScalarType.MAX_VARCHAR_LENGTH));
+                    }
+                    bucketColBuilder.add(column);
+                    break;
+                }
+            }
+        }
+
+        bucketColumns = bucketColBuilder.build();
+        distributionInfo = new HiveExternalDistributionInfo(numBuckets, 
bucketColumns, bucketingVersion);
+        LOG.debug("get {} bucket columns for table: {}", bucketColumns.size(), 
name);
+    }
+
+    private int getBucketColums(List<String> bucketCols) {
+        StorageDescriptor descriptor = remoteTable.getSd();
+        int numBuckets = -1;
+        if (descriptor.isSetBucketCols() && 
!descriptor.getBucketCols().isEmpty()) {
+            /* Hive Bucketed Table */
+            bucketCols.addAll(descriptor.getBucketCols());
+            numBuckets = descriptor.getNumBuckets();
+        } else if (remoteTable.isSetParameters()
+                && !Collections.disjoint(SUPPORTED_BUCKET_PROPERTIES, 
remoteTable.getParameters().keySet())) {
+            Map<String, String> parameters = remoteTable.getParameters();
+            for (String key : SUPPORTED_BUCKET_PROPERTIES) {
+                if (parameters.containsKey(key)) {
+                    switch (key) {
+                        case SPARK_BUCKET + "0":
+                            bucketCols.add(0, parameters.get(key));
+                            break;
+                        case SPARK_BUCKET + "1":
+                            bucketCols.add(1, parameters.get(key));
+                            break;
+                        case SPARK_BUCKET + "2":
+                            bucketCols.add(2, parameters.get(key));
+                            break;
+                        case SPARK_BUCKET + "3":
+                            bucketCols.add(3, parameters.get(key));
+                            break;
+                        case SPARK_BUCKET + "4":

Review Comment:
   No, it can be more.
   I have re-worded the code for this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to