This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new bf219503488 [fix](hive) fix write hive partition by Doris (#50864)
bf219503488 is described below

commit bf219503488b89341fde7428a80018dad5789871
Author: Socrates <suxiaogang...@icloud.com>
AuthorDate: Wed May 14 23:25:50 2025 +0800

    [fix](hive) fix write hive partition by Doris (#50864)
    
    ### What problem does this PR solve?
    Problem Summary:
    After writing to a Hive partitioned table and adding a new partition
    using Doris, writing data to the same partition using Hive results in an
    error: "Partition column xxx conflicts with table columns."
---
 .../docker-compose/hive/hadoop-hive-2x.env.tpl     |   2 ++
 .../docker-compose/hive/hadoop-hive-3x.env.tpl     |   2 +-
 .../docker-compose/hive/hadoop-hive.env.tpl        |   1 +
 .../scripts/create_preinstalled_scripts/run77.hql  |  30 +++++++++++++++++++++
 .../doris/datasource/hive/HMSTransaction.java      |  12 +++------
 .../datasource/paimon/source/PaimonScanNode.java   |   2 +-
 .../hive/write/test_hive_write_partitions.out      | Bin 73239 -> 74043 bytes
 .../hive/write/test_hive_write_partitions.groovy   |  22 ++++++++++++---
 8 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl 
b/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl
index 6222972176a..f622f28bc03 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl
@@ -15,3 +15,5 @@
 # limitations under the License.
 #
 
+
+HIVE_SITE_CONF_hive_stats_column_autogather=false
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl 
b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
index 84bfce1754f..4d92bab5351 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
@@ -19,4 +19,4 @@
 HIVE_SITE_CONF_hive_metastore_event_db_notification_api_auth=false
 HIVE_SITE_CONF_hive_metastore_dml_events=true
 
HIVE_SITE_CONF_hive_metastore_transactional_event_listeners=org.apache.hive.hcatalog.listener.DbNotificationListener
-
+HIVE_SITE_CONF_hive_stats_column_autogather=false
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl 
b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
index 0e074228410..7db18ab998f 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
@@ -28,6 +28,7 @@ HIVE_SITE_CONF_hive_server2_webui_port=0
 HIVE_SITE_CONF_hive_compactor_initiator_on=true
 HIVE_SITE_CONF_hive_compactor_worker_threads=2
 
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader
+HIVE_SITE_CONF_hive_stats_column_autogather=false
 
 CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT}
 CORE_CONF_hadoop_http_staticuser_user=root
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql
new file mode 100755
index 00000000000..209981b60d3
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql
@@ -0,0 +1,30 @@
+create database if not exists write_test;
+use write_test;
+
+DROP TABLE IF EXISTS test_doris_write_hive_partition_table_original; 
+CREATE TABLE test_doris_write_hive_partition_table_original (
+  `v1` decimal(3,0), 
+  `v2` string )
+PARTITIONED BY ( 
+  `test_date` string, 
+  `v3` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION 
(test_date='2025-05-01', v3='project1')
+VALUES (1, 'test1'),
+       (2, 'test2');
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION 
(test_date='2025-05-01', v3='project2')
+VALUES (3, 'test3');
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION 
(test_date='2025-05-02', v3='project1')
+VALUES (4, 'test4');
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION 
(test_date='2025-05-02', v3='project2')
+VALUES (5, 'test5'),
+       (6, 'test6');
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
index 9b88f7a8dea..dce7cc7cdd7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
@@ -279,7 +279,7 @@ public class HMSTransaction implements Transaction {
                                 Maps.newHashMap(),
                                 sd.getOutputFormat(),
                                 sd.getSerdeInfo().getSerializationLib(),
-                                getTableColumns(tableInfo)
+                                sd.getCols()
                         );
                         if (updateMode == TUpdateMode.OVERWRITE) {
                             dropPartition(tableInfo, 
hivePartition.getPartitionValues(), true);
@@ -436,7 +436,7 @@ public class HMSTransaction implements Transaction {
                         partition.getParameters(),
                         sd.getOutputFormat(),
                         sd.getSerdeInfo().getSerializationLib(),
-                        getTableColumns(tableInfo)
+                        sd.getCols()
                 );
 
                 partitionActionsForTable.put(
@@ -919,11 +919,6 @@ public class HMSTransaction implements Transaction {
         throw new RuntimeException("Not Found table: " + tableInfo);
     }
 
-    public synchronized List<FieldSchema> getTableColumns(SimpleTableInfo 
tableInfo) {
-        return tableColumns.computeIfAbsent(tableInfo,
-                key -> hiveOps.getClient().getSchema(tableInfo.getDbName(), 
tableInfo.getTbName()));
-    }
-
     public synchronized void finishChangingExistingTable(
             ActionType actionType,
             SimpleTableInfo tableInfo,
@@ -1282,7 +1277,7 @@ public class HMSTransaction implements Transaction {
                     Maps.newHashMap(),
                     sd.getOutputFormat(),
                     sd.getSerdeInfo().getSerializationLib(),
-                    getTableColumns(tableInfo)
+                    sd.getCols()
             );
 
             HivePartitionWithStatistics partitionWithStats =
@@ -1654,4 +1649,3 @@ public class HMSTransaction implements Transaction {
         }
     }
 }
-
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index 023bbe9cd4d..b5990e9f0b2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -412,7 +412,7 @@ public class PaimonScanNode extends FileQueryScanNode {
     @Override
     public List<String> getPathPartitionKeys() throws DdlException, 
MetaNotFoundException {
         // return new ArrayList<>(source.getPaimonTable().partitionKeys());
-        // Paymon is not aware of partitions and bypasses some existing logic 
by
+        // Paimon is not aware of partitions and bypasses some existing logic 
by
         // returning an empty list
         return new ArrayList<>();
     }
diff --git 
a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
 
b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
index 209315d1a9e..4d68899e605 100644
Binary files 
a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
 and 
b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
 differ
diff --git 
a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
 
b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
index 7e3f070636e..bf59f5e3d55 100644
--- 
a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
@@ -189,13 +189,30 @@ suite("test_hive_write_partitions", 
"p0,external,hive,external_docker,external_d
         hive_docker """ DROP TABLE IF EXISTS 
all_partition_types1_${format_compression}_${catalog_name}_q04; """
     }
 
+    def test_doris_write_hive_partition_table = { String catalog_name ->
+        // After writing to a Hive partitioned table and adding a new 
partition using Doris,
+        // writing data to the same partition using Hive results in an error: 
"Partition column xxx conflicts with table columns."
+        String tableName = "test_doris_write_hive_partition_table"
+        String originalTableName = 
"test_doris_write_hive_partition_table_original"
+        hive_docker """ drop table if exists ${tableName}; """
+        hive_docker """ create table ${tableName} like ${originalTableName}; 
"""
+        sql """ refresh catalog ${catalog_name};"""
+        // Insert data and add new partitions by doris
+        sql """ insert into ${tableName} select * from ${originalTableName} 
where test_date between '2025-05-01' and '2025-05-02'; """
+        order_qt_test_doris_write_hive_partition_table1 """ select * from 
${tableName}; """
+        // Overwrite the partition by hive, this will cause error before fix pr
+        hive_docker """ insert overwrite table ${tableName} 
partition(test_date='2025-05-01',v3='project1') values(7, 'test7');"""
+        sql """ refresh catalog ${catalog_name};"""
+        order_qt_test_doris_write_hive_partition_table2 """ select * from 
${tableName}; """
+    }
+
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled == null || !enabled.equalsIgnoreCase("true")) {
         logger.info("disable Hive test.")
         return;
     }
 
-    for (String hivePrefix : ["hive3"]) {
+    for (String hivePrefix : ["hive2", "hive3"]) {
         setHivePrefix(hivePrefix)
         try {
             String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
@@ -222,10 +239,9 @@ suite("test_hive_write_partitions", 
"p0,external,hive,external_docker,external_d
                 q03(format_compression, catalog_name)
                 q04(format_compression, catalog_name)
             }
+            test_doris_write_hive_partition_table(catalog_name)
             sql """drop catalog if exists ${catalog_name}"""
         } finally {
         }
     }
 }
-
-


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to