This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new bf219503488 [fix](hive) fix write hive partition by Doris (#50864) bf219503488 is described below commit bf219503488b89341fde7428a80018dad5789871 Author: Socrates <suxiaogang...@icloud.com> AuthorDate: Wed May 14 23:25:50 2025 +0800 [fix](hive) fix write hive partition by Doris (#50864) ### What problem does this PR solve? Problem Summary: After writing to a Hive partitioned table and adding a new partition using Doris, writing data to the same partition using Hive results in an error: "Partition column xxx conflicts with table columns." --- .../docker-compose/hive/hadoop-hive-2x.env.tpl | 2 ++ .../docker-compose/hive/hadoop-hive-3x.env.tpl | 2 +- .../docker-compose/hive/hadoop-hive.env.tpl | 1 + .../scripts/create_preinstalled_scripts/run77.hql | 30 +++++++++++++++++++++ .../doris/datasource/hive/HMSTransaction.java | 12 +++------ .../datasource/paimon/source/PaimonScanNode.java | 2 +- .../hive/write/test_hive_write_partitions.out | Bin 73239 -> 74043 bytes .../hive/write/test_hive_write_partitions.groovy | 22 ++++++++++++--- 8 files changed, 57 insertions(+), 14 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl b/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl index 6222972176a..f622f28bc03 100644 --- a/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl +++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl @@ -15,3 +15,5 @@ # limitations under the License. # + +HIVE_SITE_CONF_hive_stats_column_autogather=false diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl index 84bfce1754f..4d92bab5351 100644 --- a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl +++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl @@ -19,4 +19,4 @@ HIVE_SITE_CONF_hive_metastore_event_db_notification_api_auth=false HIVE_SITE_CONF_hive_metastore_dml_events=true HIVE_SITE_CONF_hive_metastore_transactional_event_listeners=org.apache.hive.hcatalog.listener.DbNotificationListener - +HIVE_SITE_CONF_hive_stats_column_autogather=false diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl index 0e074228410..7db18ab998f 100644 --- a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl +++ b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl @@ -28,6 +28,7 @@ HIVE_SITE_CONF_hive_server2_webui_port=0 HIVE_SITE_CONF_hive_compactor_initiator_on=true HIVE_SITE_CONF_hive_compactor_worker_threads=2 HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader +HIVE_SITE_CONF_hive_stats_column_autogather=false CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT} CORE_CONF_hadoop_http_staticuser_user=root diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql new file mode 100755 index 00000000000..209981b60d3 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql @@ -0,0 +1,30 @@ +create database if not exists write_test; +use write_test; + +DROP TABLE IF EXISTS test_doris_write_hive_partition_table_original; +CREATE TABLE test_doris_write_hive_partition_table_original ( + `v1` decimal(3,0), + `v2` string ) +PARTITIONED BY ( + `test_date` string, + `v3` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; + +INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION (test_date='2025-05-01', v3='project1') +VALUES (1, 'test1'), + (2, 'test2'); + +INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION (test_date='2025-05-01', v3='project2') +VALUES (3, 'test3'); + +INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION (test_date='2025-05-02', v3='project1') +VALUES (4, 'test4'); + +INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION (test_date='2025-05-02', v3='project2') +VALUES (5, 'test5'), + (6, 'test6'); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java index 9b88f7a8dea..dce7cc7cdd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java @@ -279,7 +279,7 @@ public class HMSTransaction implements Transaction { Maps.newHashMap(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - getTableColumns(tableInfo) + sd.getCols() ); if (updateMode == TUpdateMode.OVERWRITE) { dropPartition(tableInfo, hivePartition.getPartitionValues(), true); @@ -436,7 +436,7 @@ public class HMSTransaction implements Transaction { partition.getParameters(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - getTableColumns(tableInfo) + sd.getCols() ); partitionActionsForTable.put( @@ -919,11 +919,6 @@ public class HMSTransaction implements Transaction { throw new RuntimeException("Not Found table: " + tableInfo); } - public synchronized List<FieldSchema> getTableColumns(SimpleTableInfo tableInfo) { - return tableColumns.computeIfAbsent(tableInfo, - key -> hiveOps.getClient().getSchema(tableInfo.getDbName(), tableInfo.getTbName())); - } - public synchronized void finishChangingExistingTable( ActionType actionType, SimpleTableInfo tableInfo, @@ -1282,7 +1277,7 @@ public class HMSTransaction implements Transaction { Maps.newHashMap(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - getTableColumns(tableInfo) + sd.getCols() ); HivePartitionWithStatistics partitionWithStats = @@ -1654,4 +1649,3 @@ public class HMSTransaction implements Transaction { } } } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index 023bbe9cd4d..b5990e9f0b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -412,7 +412,7 @@ public class PaimonScanNode extends FileQueryScanNode { @Override public List<String> getPathPartitionKeys() throws DdlException, MetaNotFoundException { // return new ArrayList<>(source.getPaimonTable().partitionKeys()); - // Paymon is not aware of partitions and bypasses some existing logic by + // Paimon is not aware of partitions and bypasses some existing logic by // returning an empty list return new ArrayList<>(); } diff --git a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out index 209315d1a9e..4d68899e605 100644 Binary files a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out and b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out differ diff --git a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy index 7e3f070636e..bf59f5e3d55 100644 --- a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy +++ b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy @@ -189,13 +189,30 @@ suite("test_hive_write_partitions", "p0,external,hive,external_docker,external_d hive_docker """ DROP TABLE IF EXISTS all_partition_types1_${format_compression}_${catalog_name}_q04; """ } + def test_doris_write_hive_partition_table = { String catalog_name -> + // After writing to a Hive partitioned table and adding a new partition using Doris, + // writing data to the same partition using Hive results in an error: "Partition column xxx conflicts with table columns." + String tableName = "test_doris_write_hive_partition_table" + String originalTableName = "test_doris_write_hive_partition_table_original" + hive_docker """ drop table if exists ${tableName}; """ + hive_docker """ create table ${tableName} like ${originalTableName}; """ + sql """ refresh catalog ${catalog_name};""" + // Insert data and add new partitions by doris + sql """ insert into ${tableName} select * from ${originalTableName} where test_date between '2025-05-01' and '2025-05-02'; """ + order_qt_test_doris_write_hive_partition_table1 """ select * from ${tableName}; """ + // Overwrite the partition by hive, this will cause error before fix pr + hive_docker """ insert overwrite table ${tableName} partition(test_date='2025-05-01',v3='project1') values(7, 'test7');""" + sql """ refresh catalog ${catalog_name};""" + order_qt_test_doris_write_hive_partition_table2 """ select * from ${tableName}; """ + } + String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled == null || !enabled.equalsIgnoreCase("true")) { logger.info("disable Hive test.") return; } - for (String hivePrefix : ["hive3"]) { + for (String hivePrefix : ["hive2", "hive3"]) { setHivePrefix(hivePrefix) try { String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") @@ -222,10 +239,9 @@ suite("test_hive_write_partitions", "p0,external,hive,external_docker,external_d q03(format_compression, catalog_name) q04(format_compression, catalog_name) } + test_doris_write_hive_partition_table(catalog_name) sql """drop catalog if exists ${catalog_name}""" } finally { } } } - - --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org