This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c334a01575a [fix](external) check duplicate column names for external 
table schema (#52315)
c334a01575a is described below

commit c334a01575afb307e0cb119982aedd2ff21d27a3
Author: Socrates <[email protected]>
AuthorDate: Mon Jul 7 09:09:46 2025 +0800

    [fix](external) check duplicate column names for external table schema 
(#52315)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    Flink is case-sensitive when creating table column names. For example,
    it may create a paimon table like this:
    ```sql
    create table dup_column_table(id int, ID int);
    ```
    However, Doris is case-insensitive to column names, which will cause
    errors when looking up the table.
---
 ...ta-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet | Bin 0 -> 504 bytes
 .../manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0  | Bin 0 -> 1942 bytes
 ...ifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0 | Bin 0 -> 884 bytes
 ...ifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1 | Bin 0 -> 989 bytes
 .../paimon1/db1.db/dup_columns_table/schema/schema-0 |  19 +++++++++++++++++++
 .../db1.db/dup_columns_table/snapshot/EARLIEST       |   1 +
 .../paimon1/db1.db/dup_columns_table/snapshot/LATEST |   1 +
 .../db1.db/dup_columns_table/snapshot/snapshot-1     |  19 +++++++++++++++++++
 .../apache/doris/datasource/ExternalSchemaCache.java |   3 +++
 .../apache/doris/datasource/SchemaCacheValue.java    |  12 ++++++++++++
 .../paimon/test_paimon_catalog.groovy                |   6 ++++++
 11 files changed, 61 insertions(+)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/bucket-0/data-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/bucket-0/data-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet
new file mode 100644
index 00000000000..4ced9cc1688
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/bucket-0/data-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0
new file mode 100644
index 00000000000..0aade3101c0
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0
new file mode 100644
index 00000000000..34bd4182196
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1
new file mode 100644
index 00000000000..b4d11e9dba4
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/schema/schema-0
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/schema/schema-0
new file mode 100644
index 00000000000..4578d38066e
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/schema/schema-0
@@ -0,0 +1,19 @@
+{
+  "version" : 3,
+  "id" : 0,
+  "fields" : [ {
+    "id" : 0,
+    "name" : "id",
+    "type" : "INT"
+  }, {
+    "id" : 1,
+    "name" : "ID",
+    "type" : "INT"
+  } ],
+  "highestFieldId" : 1,
+  "partitionKeys" : [ ],
+  "primaryKeys" : [ ],
+  "options" : { },
+  "comment" : "",
+  "timeMillis" : 1750851313662
+}
\ No newline at end of file
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/EARLIEST
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/EARLIEST
new file mode 100644
index 00000000000..56a6051ca2b
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/EARLIEST
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/LATEST
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/LATEST
new file mode 100644
index 00000000000..56a6051ca2b
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/LATEST
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/snapshot-1
 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/snapshot-1
new file mode 100644
index 00000000000..14de268c592
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/snapshot-1
@@ -0,0 +1,19 @@
+{
+  "version" : 3,
+  "id" : 1,
+  "schemaId" : 0,
+  "baseManifestList" : "manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0",
+  "baseManifestListSize" : 884,
+  "deltaManifestList" : "manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1",
+  "deltaManifestListSize" : 989,
+  "changelogManifestList" : null,
+  "commitUser" : "67873442-3b91-4ce9-983c-ae4df219a769",
+  "commitIdentifier" : 9223372036854775807,
+  "commitKind" : "APPEND",
+  "timeMillis" : 1750851330423,
+  "logOffsets" : { },
+  "totalRecordCount" : 2,
+  "deltaRecordCount" : 2,
+  "changelogRecordCount" : 0,
+  "watermark" : -9223372036854775808
+}
\ No newline at end of file
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
index 73b96198ccf..85e6fe4e41f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
@@ -76,6 +76,9 @@ public class ExternalSchemaCache {
 
     private Optional<SchemaCacheValue> loadSchema(SchemaCacheKey key) {
         Optional<SchemaCacheValue> schema = catalog.getSchema(key);
+        if (schema.isPresent()) {
+            schema.get().validateSchema();
+        }
         if (LOG.isDebugEnabled()) {
             LOG.debug("load schema for {} in catalog {}", key, 
catalog.getName());
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java
index b02b8bda840..4611a0d9816 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java
@@ -19,7 +19,9 @@ package org.apache.doris.datasource;
 
 import org.apache.doris.catalog.Column;
 
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 /**
  * The cache value of ExternalSchemaCache.
@@ -37,4 +39,14 @@ public class SchemaCacheValue {
     public List<Column> getSchema() {
         return schema;
     }
+
+    public void validateSchema() throws IllegalArgumentException {
+        Set<String> columnNames = new HashSet<>();
+        for (Column column : schema) {
+            if (!columnNames.add(column.getName().toLowerCase())) {
+                throw new IllegalArgumentException("Duplicate column name 
found: " + column.getName());
+            }
+            // Add more validation logic if needed
+        }
+    }
 }
diff --git 
a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy 
b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
index 41afb02e0f9..35d36c9a5c3 100644
--- a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
+++ b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
@@ -295,6 +295,12 @@ suite("test_paimon_catalog", 
"p0,external,doris,external_docker,external_docker_
             test_cases("false", "true")
             test_cases("true", "false")
             test_cases("true", "true")
+
+            test {
+                sql """select * from dup_columns_table;"""
+                exception "Duplicate column name found: id"
+            }
+
             sql """ set force_jni_scanner=false; """
 
             // test view from jion paimon


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to