This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new f7068b56587 [cherry-pick](branch-2.1) Make doris read hive text table parameters and behavior consistent with hive (#37840) f7068b56587 is described below commit f7068b56587172581d8e248532daec95102e01dc Author: 苏小刚 <suxiaogang...@icloud.com> AuthorDate: Tue Jul 16 22:24:50 2024 +0800 [cherry-pick](branch-2.1) Make doris read hive text table parameters and behavior consistent with hive (#37840) ## Proposed changes pick from master https://github.com/apache/doris/pull/37638 <!--Describe your changes.--> --- .../regression/serde_prop/some_serde_table.hql | 57 ++++++++++++++++++++++ .../datasource/hive/HiveMetaStoreClientHelper.java | 17 +++++++ .../doris/datasource/hive/source/HiveScanNode.java | 44 +++++++++-------- .../hive/test_hive_serde_prop.out | 36 +++++++++++++- .../hive/test_hive_serde_prop.groovy | 6 ++- 5 files changed, 137 insertions(+), 23 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql index fa6ad791118..13e7cb86e03 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql @@ -30,5 +30,62 @@ TBLPROPERTIES ( 'field.delim'='|' ); +CREATE TABLE `serde_test3`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'serialization.format'='g') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + + +CREATE TABLE `serde_test4`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'field.delim' = 'gg', + "line.delim" = "hh") +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + +CREATE TABLE `serde_test5`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'field.delim' = '16', + "line.delim" = "21") +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + +CREATE TABLE `serde_test6`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'field.delim' = '\16', + "line.delim" = "\21") +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + + insert into serde_test1 values(1, "abc"),(2, "def"); insert into serde_test2 values(1, "abc"),(2, "def"); +insert into serde_test3 values(1, "abc"),(2, "def"); +insert into serde_test4 values(1, "abc"),(2, "def"); +insert into serde_test5 values(1, "abc"),(2, "def"); +insert into serde_test6 values(1, "abc"),(2, "def"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java index 22bf13755a2..c086172f1f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java @@ -873,4 +873,21 @@ public class HiveMetaStoreClientHelper { } return defaultValue; } + + /** + * Return the byte value of the number string. + * + * @param altValue + * The string containing a number. + */ + public static String getByte(String altValue) { + if (altValue != null && altValue.length() > 0) { + try { + return String.valueOf((char) ((Byte.parseByte(altValue) + 256) % 256)); + } catch (NumberFormatException e) { + return altValue.substring(0, 1); + } + } + return null; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index 0214ecc4642..abb8cc8dda3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -86,6 +86,7 @@ public class HiveScanNode extends FileQueryScanNode { public static final String DEFAULT_LINE_DELIMITER = "\n"; public static final String PROP_SEPARATOR_CHAR = "separatorChar"; public static final String PROP_QUOTE_CHAR = "quoteChar"; + public static final String PROP_SERIALIZATION_FORMAT = "serialization.format"; public static final String PROP_COLLECTION_DELIMITER_HIVE2 = "colelction.delim"; public static final String PROP_COLLECTION_DELIMITER_HIVE3 = "collection.delim"; @@ -447,29 +448,32 @@ public class HiveScanNode extends FileQueryScanNode { TFileTextScanRangeParams textParams = new TFileTextScanRangeParams(); // 1. set column separator - Optional<String> fieldDelim = - HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_FIELD_DELIMITER); - Optional<String> columnSeparator = - HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_SEPARATOR_CHAR); - textParams.setColumnSeparator(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator)); + Optional<String> fieldDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_FIELD_DELIMITER); + Optional<String> serFormat = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_SERIALIZATION_FORMAT); + Optional<String> columnSeparator = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_SEPARATOR_CHAR); + textParams.setColumnSeparator(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( + DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator, serFormat))); // 2. set line delimiter - Optional<String> lineDelim = - HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_LINE_DELIMITER); - textParams.setLineDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_LINE_DELIMITER, lineDelim)); + Optional<String> lineDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_LINE_DELIMITER); + textParams.setLineDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( + DEFAULT_LINE_DELIMITER, lineDelim))); // 3. set mapkv delimiter - Optional<String> mapkvDelim = - HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_MAP_KV_DELIMITER); - textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_MAP_KV_DELIMITER, mapkvDelim)); + Optional<String> mapkvDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_MAP_KV_DELIMITER); + textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( + DEFAULT_MAP_KV_DELIMITER, mapkvDelim))); // 4. set collection delimiter - Optional<String> collectionDelimHive2 = - HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_COLLECTION_DELIMITER_HIVE2); - Optional<String> collectionDelimHive3 = - HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_COLLECTION_DELIMITER_HIVE3); - textParams.setCollectionDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, collectionDelimHive3)); + Optional<String> collectionDelimHive2 = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_COLLECTION_DELIMITER_HIVE2); + Optional<String> collectionDelimHive3 = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_COLLECTION_DELIMITER_HIVE3); + textParams.setCollectionDelimiter( + HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( + DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, collectionDelimHive3))); // 5. set quote char Map<String, String> serdeParams = hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters(); if (serdeParams.containsKey(PROP_QUOTE_CHAR)) { diff --git a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out index b00eebec49d..38918c3fc6f 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out +++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out @@ -7,7 +7,23 @@ b 2.2 1 abc 2 def --- !2 -- +-- !3 -- +1 abc +2 def + +-- !4 -- +1 abc +2 def + +-- !5 -- +1 abc +2 def + +-- !6 -- +1 abc +2 def + +-- !7 -- 1 abc 2 def @@ -19,7 +35,23 @@ b 2.2 1 abc 2 def --- !2 -- +-- !3 -- +1 abc +2 def + +-- !4 -- +1 abc +2 def + +-- !5 -- +1 abc +2 def + +-- !6 -- +1 abc +2 def + +-- !7 -- 1 abc 2 def diff --git a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy index 3ae6b21bbba..0da2eb3160a 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy @@ -40,7 +40,11 @@ suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,exte qt_2 """select * from ${catalog_name}.regression.serde_test1 order by id;""" - qt_2 """select * from ${catalog_name}.regression.serde_test2 order by id;""" + qt_3 """select * from ${catalog_name}.regression.serde_test2 order by id;""" + qt_4 """select * from ${catalog_name}.regression.serde_test3 order by id;""" + qt_5 """select * from ${catalog_name}.regression.serde_test4 order by id;""" + qt_6 """select * from ${catalog_name}.regression.serde_test5 order by id;""" + qt_7 """select * from ${catalog_name}.regression.serde_test6 order by id;""" } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org