This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 00b065dc24c9615eaf43546632a592b8523d7d87 Author: Socrates <suxiaogang...@icloud.com> AuthorDate: Thu Aug 22 22:45:08 2024 +0800 [fix](hive) report error with escape char and null format (#39700) ## Proposed changes Because be did not process escape char and null format when reading the hive text table, an error was reported when fe found that this value was not the default value. --- .../regression/serde_prop/some_serde_table.hql | 27 ++++++++++++++++++++++ .../doris/datasource/hive/source/HiveScanNode.java | 20 ++++++++++++++++ .../hive/test_hive_serde_prop.groovy | 18 +++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql index 13e7cb86e03..b5d963a1c2b 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql @@ -82,6 +82,31 @@ STORED AS INPUTFORMAT OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; +CREATE TABLE `serde_test7`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'escape.delim' = '|' +) +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + +CREATE TABLE `serde_test8`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'serialization.null.format' = 'null' +) +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; insert into serde_test1 values(1, "abc"),(2, "def"); insert into serde_test2 values(1, "abc"),(2, "def"); @@ -89,3 +114,5 @@ insert into serde_test3 values(1, "abc"),(2, "def"); insert into serde_test4 values(1, "abc"),(2, "def"); insert into serde_test5 values(1, "abc"),(2, "def"); insert into serde_test6 values(1, "abc"),(2, "def"); +insert into serde_test7 values(1, "abc"),(2, "def"); +insert into serde_test8 values(1, "abc"),(2, "def"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index 6ef551825e2..be722b31c7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -95,6 +95,11 @@ public class HiveScanNode extends FileQueryScanNode { public static final String PROP_MAP_KV_DELIMITER = "mapkey.delim"; public static final String DEFAULT_MAP_KV_DELIMITER = "\003"; + public static final String PROP_ESCAPE_DELIMITER = "escape.delim"; + public static final String DEFAULT_ESCAPE_DELIMIER = "\\"; + public static final String PROP_NULL_FORMAT = "serialization.null.format"; + public static final String DEFAULT_NULL_FORMAT = "\\N"; + protected final HMSExternalTable hmsTable; private HiveTransaction hiveTransaction = null; @@ -476,6 +481,21 @@ public class HiveScanNode extends FileQueryScanNode { textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]); } + // TODO: support escape char and null format in csv_reader + Optional<String> escapeChar = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_ESCAPE_DELIMITER); + if (escapeChar.isPresent() && !escapeChar.get().equals(DEFAULT_ESCAPE_DELIMIER)) { + throw new UserException( + "not support serde prop " + PROP_ESCAPE_DELIMITER + " in hive text reading"); + } + + Optional<String> nullFormat = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_NULL_FORMAT); + if (nullFormat.isPresent() && !nullFormat.get().equals(DEFAULT_NULL_FORMAT)) { + throw new UserException( + "not support serde prop " + PROP_NULL_FORMAT + " in hive text reading"); + } + TFileAttributes fileAttributes = new TFileAttributes(); fileAttributes.setTextParams(textParams); fileAttributes.setHeaderType(""); diff --git a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy index 0da2eb3160a..8aa97e63123 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy @@ -45,6 +45,24 @@ suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,exte qt_5 """select * from ${catalog_name}.regression.serde_test4 order by id;""" qt_6 """select * from ${catalog_name}.regression.serde_test5 order by id;""" qt_7 """select * from ${catalog_name}.regression.serde_test6 order by id;""" + + def success = true; + try { + sql """select * from ${catalog_name}.regression.serde_test7 order by id;""" + } catch(Exception e) { + assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage()) + success = false; + } + assertEquals(success, false) + + success = true; + try { + sql """select * from ${catalog_name}.regression.serde_test8 order by id;""" + } catch(Exception e) { + assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage()) + success = false; + } + assertEquals(success, false) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org