This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 94986fc5746 branch-2.1: [fix](multi-catalog) Fix bug: "Can not create a Path from an empty string" (#49382) (#49641) 94986fc5746 is described below commit 94986fc5746b0751daaf71d79af50dcd6004e39f Author: Socrates <suyit...@selectdb.com> AuthorDate: Sat Mar 29 09:13:43 2025 +0800 branch-2.1: [fix](multi-catalog) Fix bug: "Can not create a Path from an empty string" (#49382) (#49641) ### What problem does this PR solve? Problem Summary: In HiveMetaStoreCache, the function FileInputFormat.setInputPaths is used to set input paths. However, this function splits paths using commas, which is not the expected behavior. As a result, when partition values contain commas, it leads to incorrect path parsing and potential errors. ```java public static void setInputPaths(JobConf conf, String org.apache.hadoop.shaded.com.aSeparatedPaths) { setInputPaths(conf, StringUtils.stringToPath( getPathStrings(org.apache.hadoop.shaded.com.aSeparatedPaths))); } ``` To prevent FileInputFormat.setInputPaths from splitting paths by commas, we use another overloaded version of the method. Instead of passing a comma-separated string, we explicitly pass a Path object, ensuring that partition values containing commas are handled correctly. ```java public static void setInputPaths(JobConf conf, Path... inputPaths) { Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]); StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); for(int i = 1; i < inputPaths.length;i++) { str.append(StringUtils.COMMA_STR); path = new Path(conf.getWorkingDirectory(), inputPaths[i]); str.append(StringUtils.escapeString(path.toString())); } conf.set(org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.input. FileInputFormat.INPUT_DIR, str.toString()); } ``` ### Release note None --- .../scripts/create_preinstalled_scripts/run74.hql | 53 +++++++++++++++++++++ .../partition_col=,/000000_0 | Bin 0 -> 408 bytes .../partition_col=a, b, c/000000_0 | Bin 0 -> 408 bytes .../partition_col=a, b/000000_0 | Bin 0 -> 408 bytes .../doris/datasource/hive/HiveMetaStoreCache.java | 3 +- .../hive/test_hive_partitions.out | Bin 3171 -> 3333 bytes .../hive/test_hive_partitions.groovy | 6 ++- 7 files changed, 59 insertions(+), 3 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql new file mode 100644 index 00000000000..31e98f370d5 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql @@ -0,0 +1,53 @@ +create database if not exists partition_tables; +use partition_tables; + +CREATE TABLE decimal_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col DECIMAL(10, 2)) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/decimal_partition_table'; + +CREATE TABLE int_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col INT) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/int_partition_table'; + +CREATE TABLE string_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col STRING) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table'; + +CREATE TABLE date_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col DATE) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/date_partition_table'; + +CREATE TABLE string_partition_table_with_comma ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col STRING) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table_with_comma'; + +msck repair table decimal_partition_table; +msck repair table int_partition_table; +msck repair table string_partition_table; +msck repair table date_partition_table; +msck repair table string_partition_table_with_comma; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0 new file mode 100644 index 00000000000..a93ce013162 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b, c/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b, c/000000_0 new file mode 100644 index 00000000000..4e6e043ccf5 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b, c/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b/000000_0 new file mode 100644 index 00000000000..63b7f592e7b Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b/000000_0 differ diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 48a625c35a7..751919e85f4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -404,7 +404,8 @@ public class HiveMetaStoreCache { } catch (Exception e) { LOG.warn("unknown scheme in path: " + finalLocation, e); } - FileInputFormat.setInputPaths(jobConf, finalLocation.get()); + // NOTICE: the setInputPaths has 2 overloads, the 2nd arg should be Path not String + FileInputFormat.setInputPaths(jobConf, finalLocation.getPath()); try { FileCacheValue result = getFileCache(finalLocation.get(), key.inputFormat, jobConf, key.getPartitionValues(), key.bindBrokerName); diff --git a/regression-test/data/external_table_p0/hive/test_hive_partitions.out b/regression-test/data/external_table_p0/hive/test_hive_partitions.out index 904eb6eda30..ea0c8f1518c 100644 Binary files a/regression-test/data/external_table_p0/hive/test_hive_partitions.out and b/regression-test/data/external_table_p0/hive/test_hive_partitions.out differ diff --git a/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy b/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy index 0e41adc3127..cc3425106a5 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy @@ -91,6 +91,10 @@ suite("test_hive_partitions", "p0,external,hive,external_docker,external_docker_ q01() + qt_string_partition_table_with_comma """ + select * from partition_tables.string_partition_table_with_comma order by id; + """ + sql """set num_partitions_in_batch_mode=1""" explain { sql ("select * from partition_table") @@ -99,8 +103,6 @@ suite("test_hive_partitions", "p0,external,hive,external_docker,external_docker_ contains "(approximate)inputSplitNum=60" } sql """unset variable num_partitions_in_batch_mode""" - - // sql """drop catalog if exists ${catalog_name}""" } finally { } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org