This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d4ba9eb382 [fix](maxcompute)fix maxcompute partition column schema 
order (#48325)
6d4ba9eb382 is described below

commit 6d4ba9eb3826ca1474a2284e8a8f50011d11d076
Author: daidai <changyu...@selectdb.com>
AuthorDate: Wed Feb 26 17:30:12 2025 +0800

    [fix](maxcompute)fix maxcompute partition column schema order (#48325)
    
    ### What problem does this PR solve?
    Problem Summary:
    ```sql
    create table in  MaxCompute :
    CREATE TABLE `mc_parts` (`id` STRING COMMENT 'Id')
    PARTITIONED BY (
            `ds` STRING ,
            `audit_flag` STRING );
    
    query in doris :
    before:
    mysql> desc mc_parts;
    +------------+------+------+------+---------+-------+
    | Field      | Type | Null | Key  | Default | Extra |
    +------------+------+------+------+---------+-------+
    | id         | text | Yes  | true | NULL    |       |
    | audit_flag | text | Yes  | true | NULL    |       |
    | ds         | text | Yes  | true | NULL    |       |
    +------------+------+------+------+---------+-------+
    
    mysql> select * from mc_parts;
    ERROR 1105 (HY000): errCode = 2, detailMessage = java.io.IOException: 
ODPS-0420061: Invalid parameter in HTTP request - The requested columns should 
be in order with table schema
    
    The reason is because this ds column needs to be before audit_flag, and 
this order is also required when doing partition pruning ( eg: 
ds=2024-01-01/audit_flag=Y  ).
    
    after:
    mysql> desc mc_parts;
    +------------+------+------+------+---------+-------+
    | Field      | Type | Null | Key  | Default | Extra |
    +------------+------+------+------+---------+-------+
    | id         | text | Yes  | true | NULL    |       |
    | ds         | text | Yes  | true | NULL    |       |
    | audit_flag | text | Yes  | true | NULL    |       |
    +------------+------+------+------+---------+-------+
    ```
---
 .../maxcompute/MaxComputeExternalTable.java        |  37 ++++++++++-----------
 .../maxcompute/MaxComputeSchemaCacheValue.java     |  21 ++++++------
 .../test_external_catalog_maxcompute.out           | Bin 3747 -> 4745 bytes
 .../test_external_catalog_maxcompute.groovy        |  26 +++++++++++++--
 4 files changed, 52 insertions(+), 32 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
index 96eeb76684e..7f626655442 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
@@ -128,11 +128,11 @@ public class MaxComputeExternalTable extends 
ExternalTable {
     private TablePartitionValues 
loadPartitionValues(MaxComputeSchemaCacheValue schemaCacheValue) {
         List<String> partitionSpecs = schemaCacheValue.getPartitionSpecs();
         List<Type> partitionTypes = schemaCacheValue.getPartitionTypes();
+        List<String> partitionColumnNames = 
schemaCacheValue.getPartitionColumnNames();
         TablePartitionValues partitionValues = new TablePartitionValues();
         partitionValues.addPartitions(partitionSpecs,
                 partitionSpecs.stream()
-                        .map(p -> 
parsePartitionValues(getPartitionColumns().stream().map(c -> 
c.getName()).collect(
-                                Collectors.toList()), p))
+                        .map(p -> parsePartitionValues(partitionColumnNames, 
p))
                         .collect(Collectors.toList()),
                 partitionTypes);
         return partitionValues;
@@ -187,9 +187,21 @@ public class MaxComputeExternalTable extends ExternalTable 
{
         }
 
         List<com.aliyun.odps.Column> partitionColumns = 
odpsTable.getSchema().getPartitionColumns();
+        List<String> partitionColumnNames = new 
ArrayList<>(partitionColumns.size());
+        List<Type> partitionTypes = new ArrayList<>(partitionColumns.size());
 
-        for (com.aliyun.odps.Column partitionColumn : partitionColumns) {
-            columnNameToOdpsColumn.put(partitionColumn.getName(), 
partitionColumn);
+        // sort partition columns to align partitionTypes and partitionName.
+        List<Column> partitionDorisColumns = new ArrayList<>();
+        for (com.aliyun.odps.Column partColumn : partitionColumns) {
+            Type partitionType = mcTypeToDorisType(partColumn.getTypeInfo());
+            Column dorisCol = new Column(partColumn.getName(), partitionType, 
true, null,
+                    true, partColumn.getComment(), true, -1);
+
+            columnNameToOdpsColumn.put(partColumn.getName(), partColumn);
+            partitionColumnNames.add(partColumn.getName());
+            partitionDorisColumns.add(dorisCol);
+            partitionTypes.add(partitionType);
+            schema.add(dorisCol);
         }
 
         List<String> partitionSpecs;
@@ -200,22 +212,9 @@ public class MaxComputeExternalTable extends ExternalTable 
{
         } else {
             partitionSpecs = ImmutableList.of();
         }
-        // sort partition columns to align partitionTypes and partitionName.
-        Map<String, Column> partitionNameToColumns = Maps.newHashMap();
-        for (com.aliyun.odps.Column partColumn : partitionColumns) {
-            Column dorisCol = new Column(partColumn.getName(),
-                    mcTypeToDorisType(partColumn.getTypeInfo()), true, null,
-                    true, partColumn.getComment(), true, -1);
-            partitionNameToColumns.put(dorisCol.getName(), dorisCol);
-        }
-        List<Type> partitionTypes = partitionNameToColumns.values()
-                .stream()
-                .map(Column::getType)
-                .collect(Collectors.toList());
 
-        schema.addAll(partitionNameToColumns.values());
-        return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable, 
partitionSpecs, partitionNameToColumns,
-                partitionTypes));
+        return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable, 
partitionColumnNames,
+                partitionSpecs, partitionDorisColumns, partitionTypes));
     }
 
     private Type mcTypeToDorisType(TypeInfo typeInfo) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
index b8337d96120..0d0fb69a3e4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
@@ -22,36 +22,35 @@ import org.apache.doris.catalog.Type;
 import org.apache.doris.datasource.SchemaCacheValue;
 
 import com.aliyun.odps.Table;
-import com.google.common.collect.Lists;
 import lombok.Getter;
 import lombok.Setter;
 
 import java.util.List;
-import java.util.Map;
-import java.util.Set;
 
 @Getter
 @Setter
 public class MaxComputeSchemaCacheValue extends SchemaCacheValue {
     private Table odpsTable;
+    private List<String> partitionColumnNames;
     private List<String> partitionSpecs;
-    private Map<String, Column> partitionNameToColumns;
+    private List<Column> partitionColumns;
     private List<Type> partitionTypes;
 
-    public MaxComputeSchemaCacheValue(List<Column> schema, Table odpsTable, 
List<String> partitionSpecs,
-            Map<String, Column> partitionNameToColumns, List<Type> 
partitionTypes) {
+    public MaxComputeSchemaCacheValue(List<Column> schema, Table odpsTable, 
List<String> partitionColumnNames,
+            List<String> partitionSpecs, List<Column> partitionColumns, 
List<Type> partitionTypes) {
         super(schema);
         this.odpsTable = odpsTable;
         this.partitionSpecs = partitionSpecs;
-        this.partitionNameToColumns = partitionNameToColumns;
+        this.partitionColumnNames = partitionColumnNames;
+        this.partitionColumns = partitionColumns;
         this.partitionTypes = partitionTypes;
     }
 
-    public Set<String> getPartitionColNames() {
-        return partitionNameToColumns.keySet();
+    public List<Column> getPartitionColumns() {
+        return partitionColumns;
     }
 
-    public List<Column> getPartitionColumns() {
-        return Lists.newArrayList(partitionNameToColumns.values());
+    public List<String> getPartitionColumnNames() {
+        return partitionColumnNames;
     }
 }
diff --git 
a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
 
b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
index 63677f14720..c388e4eddf6 100644
Binary files 
a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
 and 
b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
 differ
diff --git 
a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
 
b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
index 81133270fb6..6843a586d86 100644
--- 
a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
+++ 
b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
@@ -314,8 +314,17 @@
     INSERT INTO `other_db_mc_parts` PARTITION (dt='e') VALUES
     (1005, 'Sample data 5');
 
-
-
+  CREATE TABLE `mc_parts2` (
+    `id` STRING COMMENT 'Id'
+  )
+  PARTITIONED BY (
+    `ds` STRING ,
+    `audit_flag` STRING 
+  );
+  INSERT INTO TABLE `mc_parts2`  PARTITION (`ds`='2024-01-01', 
`audit_flag`='Y') values ('1');
+  INSERT INTO TABLE `mc_parts2`  PARTITION (`ds`='2024-04-04', 
`audit_flag`='N') values ('2');
+  INSERT INTO TABLE `mc_parts2`  PARTITION (`ds`='2027-01-09', 
`audit_flag`='Y') values ('3');
+  INSERT INTO TABLE `mc_parts2`  PARTITION (`ds`='2024-01-01', 
`audit_flag`='N') values ('4');
  */
 suite("test_external_catalog_maxcompute", 
"p2,external,maxcompute,external_remote,external_remote_maxcompute") {
     String enabled = context.config.otherConfigs.get("enableMaxComputeTest")
@@ -353,6 +362,7 @@ suite("test_external_catalog_maxcompute", 
"p2,external,maxcompute,external_remot
             order_qt_q5 """ select * from mc_parts where dt > '2023-08-03' 
order by mc_bigint """
             order_qt_q6 """ select * from mc_parts where dt > '2023-08-03' and 
mc_bigint > 1002 """
             order_qt_q7 """ select * from mc_parts where dt < '2023-08-03' or 
(mc_bigint > 1003 and dt > '2023-08-04') order by mc_bigint, dt; """
+            qt_q8 """ desc mc_parts """
         }
 
         sql """ switch `${mc_catalog_name}`; """
@@ -380,6 +390,8 @@ suite("test_external_catalog_maxcompute", 
"p2,external,maxcompute,external_remot
         sql """ refresh catalog ${mc_catalog_name} """
         sql """ switch `${mc_catalog_name}`; """
         sql """ use `${mc_db}`; """
+
+        qt_multi_partition_q1 """ desc multi_partitions """
         order_qt_multi_partition_q1 """ show partitions from multi_partitions; 
"""
         order_qt_multi_partition_q2 """ select pt, create_time, yy, mm, dd 
from multi_partitions where pt>-1 and yy > '' and mm > '' and dd >'' order by 
pt , dd; """
         order_qt_multi_partition_q3 """ select sum(pt), create_time, yy, mm, 
dd from multi_partitions where yy > '' and mm > '' and dd >'' group by 
create_time, yy, mm, dd order by create_time,dd ; """
@@ -425,5 +437,15 @@ suite("test_external_catalog_maxcompute", 
"p2,external,maxcompute,external_remot
         order_qt_show_partition """ show partitions from  mc_parts """ 
         
 
+        order_qt_part2_q1 """ select * from mc_parts2 """
+        order_qt_part2_q2 """ SELECT * FROM `mc_parts2` WHERE `ds` = 
'2024-01-01' AND `audit_flag` = 'Y'; """ 
+        order_qt_part2_q3 """ SELECT * FROM `mc_parts2` WHERE `audit_flag` = 
'Y';""" 
+        order_qt_part2_q4 """ SELECT * FROM `mc_parts2` WHERE `ds` BETWEEN 
'2024-01-01' AND '2027-01-01';""" 
+        order_qt_part2_q5 """ SELECT ds FROM `mc_parts2` WHERE `ds` != 
'2027-01-09';""" 
+        order_qt_part2_q6 """ SELECT ds,audit_flag,id FROM `mc_parts2` WHERE 
`ds` != '2027-01-09';""" 
+        order_qt_part2_q7 """ SELECT audit_flag,ds,ds,id,id,id FROM 
`mc_parts2`;""" 
+        order_qt_part2_q8 """ SELECT audit_flag FROM `mc_parts2` WHERE `ds` != 
'2027-01-09';""" 
+        qt_part2_q9 """ desc  mc_parts2 """
+
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to