This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6d4ba9eb382 [fix](maxcompute)fix maxcompute partition column schema order (#48325) 6d4ba9eb382 is described below commit 6d4ba9eb3826ca1474a2284e8a8f50011d11d076 Author: daidai <changyu...@selectdb.com> AuthorDate: Wed Feb 26 17:30:12 2025 +0800 [fix](maxcompute)fix maxcompute partition column schema order (#48325) ### What problem does this PR solve? Problem Summary: ```sql create table in MaxCompute : CREATE TABLE `mc_parts` (`id` STRING COMMENT 'Id') PARTITIONED BY ( `ds` STRING , `audit_flag` STRING ); query in doris : before: mysql> desc mc_parts; +------------+------+------+------+---------+-------+ | Field | Type | Null | Key | Default | Extra | +------------+------+------+------+---------+-------+ | id | text | Yes | true | NULL | | | audit_flag | text | Yes | true | NULL | | | ds | text | Yes | true | NULL | | +------------+------+------+------+---------+-------+ mysql> select * from mc_parts; ERROR 1105 (HY000): errCode = 2, detailMessage = java.io.IOException: ODPS-0420061: Invalid parameter in HTTP request - The requested columns should be in order with table schema The reason is because this ds column needs to be before audit_flag, and this order is also required when doing partition pruning ( eg: ds=2024-01-01/audit_flag=Y ). after: mysql> desc mc_parts; +------------+------+------+------+---------+-------+ | Field | Type | Null | Key | Default | Extra | +------------+------+------+------+---------+-------+ | id | text | Yes | true | NULL | | | ds | text | Yes | true | NULL | | | audit_flag | text | Yes | true | NULL | | +------------+------+------+------+---------+-------+ ``` --- .../maxcompute/MaxComputeExternalTable.java | 37 ++++++++++----------- .../maxcompute/MaxComputeSchemaCacheValue.java | 21 ++++++------ .../test_external_catalog_maxcompute.out | Bin 3747 -> 4745 bytes .../test_external_catalog_maxcompute.groovy | 26 +++++++++++++-- 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java index 96eeb76684e..7f626655442 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java @@ -128,11 +128,11 @@ public class MaxComputeExternalTable extends ExternalTable { private TablePartitionValues loadPartitionValues(MaxComputeSchemaCacheValue schemaCacheValue) { List<String> partitionSpecs = schemaCacheValue.getPartitionSpecs(); List<Type> partitionTypes = schemaCacheValue.getPartitionTypes(); + List<String> partitionColumnNames = schemaCacheValue.getPartitionColumnNames(); TablePartitionValues partitionValues = new TablePartitionValues(); partitionValues.addPartitions(partitionSpecs, partitionSpecs.stream() - .map(p -> parsePartitionValues(getPartitionColumns().stream().map(c -> c.getName()).collect( - Collectors.toList()), p)) + .map(p -> parsePartitionValues(partitionColumnNames, p)) .collect(Collectors.toList()), partitionTypes); return partitionValues; @@ -187,9 +187,21 @@ public class MaxComputeExternalTable extends ExternalTable { } List<com.aliyun.odps.Column> partitionColumns = odpsTable.getSchema().getPartitionColumns(); + List<String> partitionColumnNames = new ArrayList<>(partitionColumns.size()); + List<Type> partitionTypes = new ArrayList<>(partitionColumns.size()); - for (com.aliyun.odps.Column partitionColumn : partitionColumns) { - columnNameToOdpsColumn.put(partitionColumn.getName(), partitionColumn); + // sort partition columns to align partitionTypes and partitionName. + List<Column> partitionDorisColumns = new ArrayList<>(); + for (com.aliyun.odps.Column partColumn : partitionColumns) { + Type partitionType = mcTypeToDorisType(partColumn.getTypeInfo()); + Column dorisCol = new Column(partColumn.getName(), partitionType, true, null, + true, partColumn.getComment(), true, -1); + + columnNameToOdpsColumn.put(partColumn.getName(), partColumn); + partitionColumnNames.add(partColumn.getName()); + partitionDorisColumns.add(dorisCol); + partitionTypes.add(partitionType); + schema.add(dorisCol); } List<String> partitionSpecs; @@ -200,22 +212,9 @@ public class MaxComputeExternalTable extends ExternalTable { } else { partitionSpecs = ImmutableList.of(); } - // sort partition columns to align partitionTypes and partitionName. - Map<String, Column> partitionNameToColumns = Maps.newHashMap(); - for (com.aliyun.odps.Column partColumn : partitionColumns) { - Column dorisCol = new Column(partColumn.getName(), - mcTypeToDorisType(partColumn.getTypeInfo()), true, null, - true, partColumn.getComment(), true, -1); - partitionNameToColumns.put(dorisCol.getName(), dorisCol); - } - List<Type> partitionTypes = partitionNameToColumns.values() - .stream() - .map(Column::getType) - .collect(Collectors.toList()); - schema.addAll(partitionNameToColumns.values()); - return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable, partitionSpecs, partitionNameToColumns, - partitionTypes)); + return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable, partitionColumnNames, + partitionSpecs, partitionDorisColumns, partitionTypes)); } private Type mcTypeToDorisType(TypeInfo typeInfo) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java index b8337d96120..0d0fb69a3e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java @@ -22,36 +22,35 @@ import org.apache.doris.catalog.Type; import org.apache.doris.datasource.SchemaCacheValue; import com.aliyun.odps.Table; -import com.google.common.collect.Lists; import lombok.Getter; import lombok.Setter; import java.util.List; -import java.util.Map; -import java.util.Set; @Getter @Setter public class MaxComputeSchemaCacheValue extends SchemaCacheValue { private Table odpsTable; + private List<String> partitionColumnNames; private List<String> partitionSpecs; - private Map<String, Column> partitionNameToColumns; + private List<Column> partitionColumns; private List<Type> partitionTypes; - public MaxComputeSchemaCacheValue(List<Column> schema, Table odpsTable, List<String> partitionSpecs, - Map<String, Column> partitionNameToColumns, List<Type> partitionTypes) { + public MaxComputeSchemaCacheValue(List<Column> schema, Table odpsTable, List<String> partitionColumnNames, + List<String> partitionSpecs, List<Column> partitionColumns, List<Type> partitionTypes) { super(schema); this.odpsTable = odpsTable; this.partitionSpecs = partitionSpecs; - this.partitionNameToColumns = partitionNameToColumns; + this.partitionColumnNames = partitionColumnNames; + this.partitionColumns = partitionColumns; this.partitionTypes = partitionTypes; } - public Set<String> getPartitionColNames() { - return partitionNameToColumns.keySet(); + public List<Column> getPartitionColumns() { + return partitionColumns; } - public List<Column> getPartitionColumns() { - return Lists.newArrayList(partitionNameToColumns.values()); + public List<String> getPartitionColumnNames() { + return partitionColumnNames; } } diff --git a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out index 63677f14720..c388e4eddf6 100644 Binary files a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out and b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out differ diff --git a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy index 81133270fb6..6843a586d86 100644 --- a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy +++ b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy @@ -314,8 +314,17 @@ INSERT INTO `other_db_mc_parts` PARTITION (dt='e') VALUES (1005, 'Sample data 5'); - - + CREATE TABLE `mc_parts2` ( + `id` STRING COMMENT 'Id' + ) + PARTITIONED BY ( + `ds` STRING , + `audit_flag` STRING + ); + INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2024-01-01', `audit_flag`='Y') values ('1'); + INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2024-04-04', `audit_flag`='N') values ('2'); + INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2027-01-09', `audit_flag`='Y') values ('3'); + INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2024-01-01', `audit_flag`='N') values ('4'); */ suite("test_external_catalog_maxcompute", "p2,external,maxcompute,external_remote,external_remote_maxcompute") { String enabled = context.config.otherConfigs.get("enableMaxComputeTest") @@ -353,6 +362,7 @@ suite("test_external_catalog_maxcompute", "p2,external,maxcompute,external_remot order_qt_q5 """ select * from mc_parts where dt > '2023-08-03' order by mc_bigint """ order_qt_q6 """ select * from mc_parts where dt > '2023-08-03' and mc_bigint > 1002 """ order_qt_q7 """ select * from mc_parts where dt < '2023-08-03' or (mc_bigint > 1003 and dt > '2023-08-04') order by mc_bigint, dt; """ + qt_q8 """ desc mc_parts """ } sql """ switch `${mc_catalog_name}`; """ @@ -380,6 +390,8 @@ suite("test_external_catalog_maxcompute", "p2,external,maxcompute,external_remot sql """ refresh catalog ${mc_catalog_name} """ sql """ switch `${mc_catalog_name}`; """ sql """ use `${mc_db}`; """ + + qt_multi_partition_q1 """ desc multi_partitions """ order_qt_multi_partition_q1 """ show partitions from multi_partitions; """ order_qt_multi_partition_q2 """ select pt, create_time, yy, mm, dd from multi_partitions where pt>-1 and yy > '' and mm > '' and dd >'' order by pt , dd; """ order_qt_multi_partition_q3 """ select sum(pt), create_time, yy, mm, dd from multi_partitions where yy > '' and mm > '' and dd >'' group by create_time, yy, mm, dd order by create_time,dd ; """ @@ -425,5 +437,15 @@ suite("test_external_catalog_maxcompute", "p2,external,maxcompute,external_remot order_qt_show_partition """ show partitions from mc_parts """ + order_qt_part2_q1 """ select * from mc_parts2 """ + order_qt_part2_q2 """ SELECT * FROM `mc_parts2` WHERE `ds` = '2024-01-01' AND `audit_flag` = 'Y'; """ + order_qt_part2_q3 """ SELECT * FROM `mc_parts2` WHERE `audit_flag` = 'Y';""" + order_qt_part2_q4 """ SELECT * FROM `mc_parts2` WHERE `ds` BETWEEN '2024-01-01' AND '2027-01-01';""" + order_qt_part2_q5 """ SELECT ds FROM `mc_parts2` WHERE `ds` != '2027-01-09';""" + order_qt_part2_q6 """ SELECT ds,audit_flag,id FROM `mc_parts2` WHERE `ds` != '2027-01-09';""" + order_qt_part2_q7 """ SELECT audit_flag,ds,ds,id,id,id FROM `mc_parts2`;""" + order_qt_part2_q8 """ SELECT audit_flag FROM `mc_parts2` WHERE `ds` != '2027-01-09';""" + qt_part2_q9 """ desc mc_parts2 """ + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org