This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 94986fc5746 branch-2.1: [fix](multi-catalog) Fix bug: "Can not create 
a Path from an empty string" (#49382) (#49641)
94986fc5746 is described below

commit 94986fc5746b0751daaf71d79af50dcd6004e39f
Author: Socrates <suyit...@selectdb.com>
AuthorDate: Sat Mar 29 09:13:43 2025 +0800

    branch-2.1: [fix](multi-catalog) Fix bug: "Can not create a Path from an 
empty string" (#49382) (#49641)
    
    ### What problem does this PR solve?
    Problem Summary:
    In HiveMetaStoreCache, the function FileInputFormat.setInputPaths is
    used to set input paths. However, this function splits paths using
    commas, which is not the expected behavior. As a result, when partition
    values contain commas, it leads to incorrect path parsing and potential
    errors.
    ```java
      public static void setInputPaths(JobConf conf, String 
org.apache.hadoop.shaded.com.aSeparatedPaths) {
        setInputPaths(conf, StringUtils.stringToPath(
                            
getPathStrings(org.apache.hadoop.shaded.com.aSeparatedPaths)));
      }
    ```
    To prevent FileInputFormat.setInputPaths from splitting paths by commas,
    we use another overloaded version of the method. Instead of passing a
    comma-separated string, we explicitly pass a Path object, ensuring that
    partition values containing commas are handled correctly.
    ```java
      public static void setInputPaths(JobConf conf, Path... inputPaths) {
        Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
        StringBuffer str = new 
StringBuffer(StringUtils.escapeString(path.toString()));
        for(int i = 1; i < inputPaths.length;i++) {
          str.append(StringUtils.COMMA_STR);
          path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
          str.append(StringUtils.escapeString(path.toString()));
        }
        conf.set(org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.input.
          FileInputFormat.INPUT_DIR, str.toString());
      }
    ```
    
    ### Release note
    
    None
---
 .../scripts/create_preinstalled_scripts/run74.hql  |  53 +++++++++++++++++++++
 .../partition_col=,/000000_0                       | Bin 0 -> 408 bytes
 .../partition_col=a, b, c/000000_0                 | Bin 0 -> 408 bytes
 .../partition_col=a, b/000000_0                    | Bin 0 -> 408 bytes
 .../doris/datasource/hive/HiveMetaStoreCache.java  |   3 +-
 .../hive/test_hive_partitions.out                  | Bin 3171 -> 3333 bytes
 .../hive/test_hive_partitions.groovy               |   6 ++-
 7 files changed, 59 insertions(+), 3 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql
new file mode 100644
index 00000000000..31e98f370d5
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql
@@ -0,0 +1,53 @@
+create database if not exists partition_tables;
+use partition_tables;
+
+CREATE TABLE decimal_partition_table (
+    id INT,
+    name STRING,
+    value FLOAT
+)
+PARTITIONED BY (partition_col DECIMAL(10, 2))
+STORED AS PARQUET
+LOCATION 
'/user/doris/preinstalled_data/partition_tables/decimal_partition_table';
+
+CREATE TABLE int_partition_table (
+    id INT,
+    name STRING,
+    value FLOAT
+)
+PARTITIONED BY (partition_col INT)
+STORED AS PARQUET
+LOCATION '/user/doris/preinstalled_data/partition_tables/int_partition_table';
+
+CREATE TABLE string_partition_table (
+    id INT,
+    name STRING,
+    value FLOAT
+)
+PARTITIONED BY (partition_col STRING)
+STORED AS PARQUET
+LOCATION 
'/user/doris/preinstalled_data/partition_tables/string_partition_table';
+
+CREATE TABLE date_partition_table (
+    id INT,
+    name STRING,
+    value FLOAT
+)
+PARTITIONED BY (partition_col DATE)
+STORED AS PARQUET
+LOCATION '/user/doris/preinstalled_data/partition_tables/date_partition_table';
+
+CREATE TABLE string_partition_table_with_comma (
+    id INT,
+    name STRING,
+    value FLOAT
+)
+PARTITIONED BY (partition_col STRING)
+STORED AS PARQUET
+LOCATION 
'/user/doris/preinstalled_data/partition_tables/string_partition_table_with_comma';
+
+msck repair table decimal_partition_table;
+msck repair table int_partition_table;
+msck repair table string_partition_table;
+msck repair table date_partition_table;
+msck repair table string_partition_table_with_comma;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0
new file mode 100644
index 00000000000..a93ce013162
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
 b, c/000000_0 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
 b, c/000000_0
new file mode 100644
index 00000000000..4e6e043ccf5
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
 b, c/000000_0 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
 b/000000_0 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
 b/000000_0
new file mode 100644
index 00000000000..63b7f592e7b
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
 b/000000_0 differ
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 48a625c35a7..751919e85f4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -404,7 +404,8 @@ public class HiveMetaStoreCache {
             } catch (Exception e) {
                 LOG.warn("unknown scheme in path: " + finalLocation, e);
             }
-            FileInputFormat.setInputPaths(jobConf, finalLocation.get());
+            // NOTICE: the setInputPaths has 2 overloads, the 2nd arg should 
be Path not String
+            FileInputFormat.setInputPaths(jobConf, finalLocation.getPath());
             try {
                 FileCacheValue result = getFileCache(finalLocation.get(), 
key.inputFormat, jobConf,
                         key.getPartitionValues(), key.bindBrokerName);
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_partitions.out 
b/regression-test/data/external_table_p0/hive/test_hive_partitions.out
index 904eb6eda30..ea0c8f1518c 100644
Binary files 
a/regression-test/data/external_table_p0/hive/test_hive_partitions.out and 
b/regression-test/data/external_table_p0/hive/test_hive_partitions.out differ
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy
index 0e41adc3127..cc3425106a5 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy
@@ -91,6 +91,10 @@ suite("test_hive_partitions", 
"p0,external,hive,external_docker,external_docker_
 
             q01()
 
+            qt_string_partition_table_with_comma """
+                select * from 
partition_tables.string_partition_table_with_comma order by id;
+            """
+
             sql """set num_partitions_in_batch_mode=1"""
             explain {
                 sql ("select * from partition_table")
@@ -99,8 +103,6 @@ suite("test_hive_partitions", 
"p0,external,hive,external_docker,external_docker_
                 contains "(approximate)inputSplitNum=60"
             }
             sql """unset variable num_partitions_in_batch_mode"""
-
-            // sql """drop catalog if exists ${catalog_name}"""
         } finally {
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to