This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 21184bd26bc2f5b30996403d792a80e0d23b7011
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Tue Jul 18 18:14:56 2023 +0800

    [Fix](multi catalog)Fix hive partition value contains special character 
bug. #21895
    
    Hive partition value may contain special characters, such as '/'. This may 
cause the partition value splitter to get wrong partition values because it use 
'/' to split partition value. So we need to split the encoded partition name 
and then decode it to the original value.
---
 .../doris/datasource/hive/HiveMetaStoreCache.java  | 14 ++++--
 .../hive/test_hive_special_char_partition.out      | 51 ++++++++++++++++++++++
 .../hive/test_hive_special_char_partition.groovy   | 51 ++++++++++++++++++++++
 3 files changed, 113 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 169ed3727b..06a72c112b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -179,8 +179,9 @@ public class HiveMetaStoreCache {
         Map<Long, List<UniqueId>> idToUniqueIdsMap = 
Maps.newHashMapWithExpectedSize(partitionNames.size());
         long idx = 0;
         for (String partitionName : partitionNames) {
+            String decodedPartitionName;
             try {
-                partitionName = URLDecoder.decode(partitionName, 
StandardCharsets.UTF_8.name());
+                decodedPartitionName = URLDecoder.decode(partitionName, 
StandardCharsets.UTF_8.name());
             } catch (UnsupportedEncodingException e) {
                 // It should not be here
                 throw new RuntimeException(e);
@@ -188,7 +189,7 @@ public class HiveMetaStoreCache {
             long partitionId = idx++;
             ListPartitionItem listPartitionItem = 
toListPartitionItem(partitionName, key.types);
             idToPartitionItem.put(partitionId, listPartitionItem);
-            partitionNameToIdMap.put(partitionName, partitionId);
+            partitionNameToIdMap.put(decodedPartitionName, partitionId);
         }
 
         Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = null;
@@ -219,7 +220,14 @@ public class HiveMetaStoreCache {
         for (String part : parts) {
             String[] kv = part.split("=");
             Preconditions.checkState(kv.length == 2, partitionName);
-            values.add(new PartitionValue(kv[1], 
HIVE_DEFAULT_PARTITION.equals(kv[1])));
+            String decodedValue = null;
+            try {
+                decodedValue = URLDecoder.decode(kv[1], 
StandardCharsets.UTF_8.name());
+            } catch (UnsupportedEncodingException e) {
+                // It should not be here
+                throw new RuntimeException(e);
+            }
+            values.add(new PartitionValue(decodedValue, 
HIVE_DEFAULT_PARTITION.equals(decodedValue)));
         }
         try {
             PartitionKey key = 
PartitionKey.createListPartitionKeyWithTypes(values, types);
diff --git 
a/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out
 
b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out
new file mode 100644
index 0000000000..0bd26b1276
--- /dev/null
+++ 
b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out
@@ -0,0 +1,51 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !1 --
+name#  2023#01#01
+name1  2023/01/01
+name10 2023<01><01>
+name11 2023\\01\\01
+name12 2023.01.01
+name2  2023 01 01
+name3  2023:01:01
+name4  2023?01?01
+name5  2023=01=01
+name6  2023%01%01
+name8  2023"01"01
+name9  2023'01'01
+
+-- !2 --
+name2
+
+-- !3 --
+name1
+
+-- !4 --
+name4  2023?01?01
+
+-- !5 --
+name12 2023.01.01
+
+-- !6 --
+name10 2023<01><01>
+
+-- !7 --
+name3  2023:01:01
+
+-- !8 --
+name5  2023=01=01
+
+-- !9 --
+name8  2023"01"01
+
+-- !10 --
+name9  2023'01'01
+
+-- !11 --
+name11 2023\\01\\01
+
+-- !12 --
+name6  2023%01%01
+
+-- !13 --
+name#  2023#01#01
+
diff --git 
a/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy
 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy
new file mode 100644
index 0000000000..cb862469f6
--- /dev/null
+++ 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_special_char_partition", "p2") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_hive_special_char_partition"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """use multi_catalog;"""
+        qt_1 "select * from special_character_1_partition order by name"
+        qt_2 "select name from special_character_1_partition where part='2023 
01 01'"
+        qt_3 "select name from special_character_1_partition where 
part='2023/01/01'"
+        qt_4 "select * from special_character_1_partition where 
part='2023?01?01'"
+        qt_5 "select * from special_character_1_partition where 
part='2023.01.01'"
+        qt_6 "select * from special_character_1_partition where 
part='2023<01><01>'"
+        qt_7 "select * from special_character_1_partition where 
part='2023:01:01'"
+        qt_8 "select * from special_character_1_partition where 
part='2023=01=01'"
+        qt_9 "select * from special_character_1_partition where 
part='2023\"01\"01'"
+        qt_10 "select * from special_character_1_partition where 
part='2023\\'01\\'01'"
+        qt_11 "select * from special_character_1_partition where 
part='2023\\\\01\\\\01'"
+        qt_12 "select * from special_character_1_partition where 
part='2023%01%01'"
+        qt_13 "select * from special_character_1_partition where 
part='2023#01#01'"
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to