This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit 21184bd26bc2f5b30996403d792a80e0d23b7011 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Tue Jul 18 18:14:56 2023 +0800 [Fix](multi catalog)Fix hive partition value contains special character bug. #21895 Hive partition value may contain special characters, such as '/'. This may cause the partition value splitter to get wrong partition values because it use '/' to split partition value. So we need to split the encoded partition name and then decode it to the original value. --- .../doris/datasource/hive/HiveMetaStoreCache.java | 14 ++++-- .../hive/test_hive_special_char_partition.out | 51 ++++++++++++++++++++++ .../hive/test_hive_special_char_partition.groovy | 51 ++++++++++++++++++++++ 3 files changed, 113 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 169ed3727b..06a72c112b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -179,8 +179,9 @@ public class HiveMetaStoreCache { Map<Long, List<UniqueId>> idToUniqueIdsMap = Maps.newHashMapWithExpectedSize(partitionNames.size()); long idx = 0; for (String partitionName : partitionNames) { + String decodedPartitionName; try { - partitionName = URLDecoder.decode(partitionName, StandardCharsets.UTF_8.name()); + decodedPartitionName = URLDecoder.decode(partitionName, StandardCharsets.UTF_8.name()); } catch (UnsupportedEncodingException e) { // It should not be here throw new RuntimeException(e); @@ -188,7 +189,7 @@ public class HiveMetaStoreCache { long partitionId = idx++; ListPartitionItem listPartitionItem = toListPartitionItem(partitionName, key.types); idToPartitionItem.put(partitionId, listPartitionItem); - partitionNameToIdMap.put(partitionName, partitionId); + partitionNameToIdMap.put(decodedPartitionName, partitionId); } Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = null; @@ -219,7 +220,14 @@ public class HiveMetaStoreCache { for (String part : parts) { String[] kv = part.split("="); Preconditions.checkState(kv.length == 2, partitionName); - values.add(new PartitionValue(kv[1], HIVE_DEFAULT_PARTITION.equals(kv[1]))); + String decodedValue = null; + try { + decodedValue = URLDecoder.decode(kv[1], StandardCharsets.UTF_8.name()); + } catch (UnsupportedEncodingException e) { + // It should not be here + throw new RuntimeException(e); + } + values.add(new PartitionValue(decodedValue, HIVE_DEFAULT_PARTITION.equals(decodedValue))); } try { PartitionKey key = PartitionKey.createListPartitionKeyWithTypes(values, types); diff --git a/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out new file mode 100644 index 0000000000..0bd26b1276 --- /dev/null +++ b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out @@ -0,0 +1,51 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +name# 2023#01#01 +name1 2023/01/01 +name10 2023<01><01> +name11 2023\\01\\01 +name12 2023.01.01 +name2 2023 01 01 +name3 2023:01:01 +name4 2023?01?01 +name5 2023=01=01 +name6 2023%01%01 +name8 2023"01"01 +name9 2023'01'01 + +-- !2 -- +name2 + +-- !3 -- +name1 + +-- !4 -- +name4 2023?01?01 + +-- !5 -- +name12 2023.01.01 + +-- !6 -- +name10 2023<01><01> + +-- !7 -- +name3 2023:01:01 + +-- !8 -- +name5 2023=01=01 + +-- !9 -- +name8 2023"01"01 + +-- !10 -- +name9 2023'01'01 + +-- !11 -- +name11 2023\\01\\01 + +-- !12 -- +name6 2023%01%01 + +-- !13 -- +name# 2023#01#01 + diff --git a/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy new file mode 100644 index 0000000000..cb862469f6 --- /dev/null +++ b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_special_char_partition", "p2") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_hive_special_char_partition" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use multi_catalog;""" + qt_1 "select * from special_character_1_partition order by name" + qt_2 "select name from special_character_1_partition where part='2023 01 01'" + qt_3 "select name from special_character_1_partition where part='2023/01/01'" + qt_4 "select * from special_character_1_partition where part='2023?01?01'" + qt_5 "select * from special_character_1_partition where part='2023.01.01'" + qt_6 "select * from special_character_1_partition where part='2023<01><01>'" + qt_7 "select * from special_character_1_partition where part='2023:01:01'" + qt_8 "select * from special_character_1_partition where part='2023=01=01'" + qt_9 "select * from special_character_1_partition where part='2023\"01\"01'" + qt_10 "select * from special_character_1_partition where part='2023\\'01\\'01'" + qt_11 "select * from special_character_1_partition where part='2023\\\\01\\\\01'" + qt_12 "select * from special_character_1_partition where part='2023%01%01'" + qt_13 "select * from special_character_1_partition where part='2023#01#01'" + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org