This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 00b065dc24c9615eaf43546632a592b8523d7d87
Author: Socrates <suxiaogang...@icloud.com>
AuthorDate: Thu Aug 22 22:45:08 2024 +0800

    [fix](hive) report error with escape char and null format (#39700)
    
    ## Proposed changes
    
    Because be did not process escape char and null format when reading the
    hive text table, an error was reported when fe found that this value was
    not the default value.
---
 .../regression/serde_prop/some_serde_table.hql     | 27 ++++++++++++++++++++++
 .../doris/datasource/hive/source/HiveScanNode.java | 20 ++++++++++++++++
 .../hive/test_hive_serde_prop.groovy               | 18 +++++++++++++++
 3 files changed, 65 insertions(+)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
index 13e7cb86e03..b5d963a1c2b 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -82,6 +82,31 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
 
+CREATE TABLE `serde_test7`(
+  `id` int, 
+  `name` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES (
+  'escape.delim' = '|'
+)
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+CREATE TABLE `serde_test8`(
+  `id` int, 
+  `name` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES (
+  'serialization.null.format' = 'null'
+)
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
 
 insert into serde_test1 values(1, "abc"),(2, "def");
 insert into serde_test2 values(1, "abc"),(2, "def");
@@ -89,3 +114,5 @@ insert into serde_test3 values(1, "abc"),(2, "def");
 insert into serde_test4 values(1, "abc"),(2, "def");
 insert into serde_test5 values(1, "abc"),(2, "def");
 insert into serde_test6 values(1, "abc"),(2, "def");
+insert into serde_test7 values(1, "abc"),(2, "def");
+insert into serde_test8 values(1, "abc"),(2, "def");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 6ef551825e2..be722b31c7b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -95,6 +95,11 @@ public class HiveScanNode extends FileQueryScanNode {
     public static final String PROP_MAP_KV_DELIMITER = "mapkey.delim";
     public static final String DEFAULT_MAP_KV_DELIMITER = "\003";
 
+    public static final String PROP_ESCAPE_DELIMITER = "escape.delim";
+    public static final String DEFAULT_ESCAPE_DELIMIER = "\\";
+    public static final String PROP_NULL_FORMAT = "serialization.null.format";
+    public static final String DEFAULT_NULL_FORMAT = "\\N";
+
     protected final HMSExternalTable hmsTable;
     private HiveTransaction hiveTransaction = null;
 
@@ -476,6 +481,21 @@ public class HiveScanNode extends FileQueryScanNode {
             
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
         }
 
+        // TODO: support escape char and null format in csv_reader
+        Optional<String> escapeChar = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_ESCAPE_DELIMITER);
+        if (escapeChar.isPresent() && 
!escapeChar.get().equals(DEFAULT_ESCAPE_DELIMIER)) {
+            throw new UserException(
+                    "not support serde prop " + PROP_ESCAPE_DELIMITER + " in 
hive text reading");
+        }
+
+        Optional<String> nullFormat = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_NULL_FORMAT);
+        if (nullFormat.isPresent() && 
!nullFormat.get().equals(DEFAULT_NULL_FORMAT)) {
+            throw new UserException(
+                    "not support serde prop " + PROP_NULL_FORMAT + " in hive 
text reading");
+        }
+
         TFileAttributes fileAttributes = new TFileAttributes();
         fileAttributes.setTextParams(textParams);
         fileAttributes.setHeaderType("");
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 0da2eb3160a..8aa97e63123 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -45,6 +45,24 @@ suite("test_hive_serde_prop", 
"external_docker,hive,external_docker_hive,p0,exte
         qt_5 """select * from ${catalog_name}.regression.serde_test4 order by 
id;"""
         qt_6 """select * from ${catalog_name}.regression.serde_test5 order by 
id;"""
         qt_7 """select * from ${catalog_name}.regression.serde_test6 order by 
id;"""
+
+        def success = true;
+        try {
+            sql """select * from ${catalog_name}.regression.serde_test7 order 
by id;"""
+        } catch(Exception e) {
+            assertTrue(e.getMessage().contains("not support serde prop"), 
e.getMessage())
+            success = false;
+        }
+        assertEquals(success, false)
+
+        success = true;
+        try {
+            sql """select * from ${catalog_name}.regression.serde_test8 order 
by id;"""
+        } catch(Exception e) {
+            assertTrue(e.getMessage().contains("not support serde prop"), 
e.getMessage())
+            success = false;
+        }
+        assertEquals(success, false)
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to