This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new f7068b56587 [cherry-pick](branch-2.1) Make doris read hive text table 
parameters and behavior consistent with hive (#37840)
f7068b56587 is described below

commit f7068b56587172581d8e248532daec95102e01dc
Author: 苏小刚 <suxiaogang...@icloud.com>
AuthorDate: Tue Jul 16 22:24:50 2024 +0800

    [cherry-pick](branch-2.1) Make doris read hive text table parameters and 
behavior consistent with hive (#37840)
    
    ## Proposed changes
    
    pick from master https://github.com/apache/doris/pull/37638
    
    <!--Describe your changes.-->
---
 .../regression/serde_prop/some_serde_table.hql     | 57 ++++++++++++++++++++++
 .../datasource/hive/HiveMetaStoreClientHelper.java | 17 +++++++
 .../doris/datasource/hive/source/HiveScanNode.java | 44 +++++++++--------
 .../hive/test_hive_serde_prop.out                  | 36 +++++++++++++-
 .../hive/test_hive_serde_prop.groovy               |  6 ++-
 5 files changed, 137 insertions(+), 23 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
index fa6ad791118..13e7cb86e03 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -30,5 +30,62 @@ TBLPROPERTIES (
    'field.delim'='|'
 );
 
+CREATE TABLE `serde_test3`(
+  `id` int, 
+  `name` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES (  
+  'serialization.format'='g') 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+
+CREATE TABLE `serde_test4`(
+  `id` int, 
+  `name` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES (
+  'field.delim' = 'gg',
+  "line.delim" = "hh")
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+CREATE TABLE `serde_test5`(
+  `id` int, 
+  `name` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES (
+  'field.delim' = '16',
+  "line.delim" = "21")
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+CREATE TABLE `serde_test6`(
+  `id` int, 
+  `name` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES (
+  'field.delim' = '\16',
+  "line.delim" = "\21")
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+
 insert into serde_test1 values(1, "abc"),(2, "def");
 insert into serde_test2 values(1, "abc"),(2, "def");
+insert into serde_test3 values(1, "abc"),(2, "def");
+insert into serde_test4 values(1, "abc"),(2, "def");
+insert into serde_test5 values(1, "abc"),(2, "def");
+insert into serde_test6 values(1, "abc"),(2, "def");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 22bf13755a2..c086172f1f9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -873,4 +873,21 @@ public class HiveMetaStoreClientHelper {
         }
         return defaultValue;
     }
+
+    /**
+     * Return the byte value of the number string.
+     *
+     * @param altValue
+     *                 The string containing a number.
+     */
+    public static String getByte(String altValue) {
+        if (altValue != null && altValue.length() > 0) {
+            try {
+                return String.valueOf((char) ((Byte.parseByte(altValue) + 256) 
% 256));
+            } catch (NumberFormatException e) {
+                return altValue.substring(0, 1);
+            }
+        }
+        return null;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 0214ecc4642..abb8cc8dda3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -86,6 +86,7 @@ public class HiveScanNode extends FileQueryScanNode {
     public static final String DEFAULT_LINE_DELIMITER = "\n";
     public static final String PROP_SEPARATOR_CHAR = "separatorChar";
     public static final String PROP_QUOTE_CHAR = "quoteChar";
+    public static final String PROP_SERIALIZATION_FORMAT = 
"serialization.format";
 
     public static final String PROP_COLLECTION_DELIMITER_HIVE2 = 
"colelction.delim";
     public static final String PROP_COLLECTION_DELIMITER_HIVE3 = 
"collection.delim";
@@ -447,29 +448,32 @@ public class HiveScanNode extends FileQueryScanNode {
         TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
 
         // 1. set column separator
-        Optional<String> fieldDelim =
-                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_FIELD_DELIMITER);
-        Optional<String> columnSeparator =
-                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_SEPARATOR_CHAR);
-        
textParams.setColumnSeparator(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator));
+        Optional<String> fieldDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_FIELD_DELIMITER);
+        Optional<String> serFormat = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_SERIALIZATION_FORMAT);
+        Optional<String> columnSeparator = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_SEPARATOR_CHAR);
+        
textParams.setColumnSeparator(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator, 
serFormat)));
         // 2. set line delimiter
-        Optional<String> lineDelim =
-                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_LINE_DELIMITER);
-        
textParams.setLineDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_LINE_DELIMITER, lineDelim));
+        Optional<String> lineDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_LINE_DELIMITER);
+        
textParams.setLineDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_LINE_DELIMITER, lineDelim)));
         // 3. set mapkv delimiter
-        Optional<String> mapkvDelim =
-                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_MAP_KV_DELIMITER);
-        
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
+        Optional<String> mapkvDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_MAP_KV_DELIMITER);
+        
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_MAP_KV_DELIMITER, mapkvDelim)));
         // 4. set collection delimiter
-        Optional<String> collectionDelimHive2 =
-                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_COLLECTION_DELIMITER_HIVE2);
-        Optional<String> collectionDelimHive3 =
-                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_COLLECTION_DELIMITER_HIVE3);
-        
textParams.setCollectionDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, 
collectionDelimHive3));
+        Optional<String> collectionDelimHive2 = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_COLLECTION_DELIMITER_HIVE2);
+        Optional<String> collectionDelimHive3 = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+                PROP_COLLECTION_DELIMITER_HIVE3);
+        textParams.setCollectionDelimiter(
+                
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                        DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, 
collectionDelimHive3)));
         // 5. set quote char
         Map<String, String> serdeParams = 
hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
         if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out 
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index b00eebec49d..38918c3fc6f 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -7,7 +7,23 @@ b      2.2
 1      abc
 2      def
 
--- !2 --
+-- !3 --
+1      abc
+2      def
+
+-- !4 --
+1      abc
+2      def
+
+-- !5 --
+1      abc
+2      def
+
+-- !6 --
+1      abc
+2      def
+
+-- !7 --
 1      abc
 2      def
 
@@ -19,7 +35,23 @@ b    2.2
 1      abc
 2      def
 
--- !2 --
+-- !3 --
+1      abc
+2      def
+
+-- !4 --
+1      abc
+2      def
+
+-- !5 --
+1      abc
+2      def
+
+-- !6 --
+1      abc
+2      def
+
+-- !7 --
 1      abc
 2      def
 
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 3ae6b21bbba..0da2eb3160a 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -40,7 +40,11 @@ suite("test_hive_serde_prop", 
"external_docker,hive,external_docker_hive,p0,exte
 
 
         qt_2 """select * from ${catalog_name}.regression.serde_test1 order by 
id;"""
-        qt_2 """select * from ${catalog_name}.regression.serde_test2 order by 
id;"""
+        qt_3 """select * from ${catalog_name}.regression.serde_test2 order by 
id;"""
+        qt_4 """select * from ${catalog_name}.regression.serde_test3 order by 
id;"""
+        qt_5 """select * from ${catalog_name}.regression.serde_test4 order by 
id;"""
+        qt_6 """select * from ${catalog_name}.regression.serde_test5 order by 
id;"""
+        qt_7 """select * from ${catalog_name}.regression.serde_test6 order by 
id;"""
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to