This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 3613413a54b [fix](hive) support find serde info from both tbl 
properties and serde properties (#37043) (#37188)
3613413a54b is described below

commit 3613413a54b63eb8ec51ab29c676fed778e90d59
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Thu Jul 4 13:55:38 2024 +0800

    [fix](hive) support find serde info from both tbl properties and serde 
properties (#37043) (#37188)
    
    bp #37043
---
 .../hive/scripts/data/regression/serde_prop/run.sh |  9 ++++
 .../regression/serde_prop/some_serde_table.hql     | 34 +++++++++++++
 .../datasource/hive/HiveMetaStoreClientHelper.java | 26 ++++++++++
 .../doris/datasource/hive/source/HiveScanNode.java | 58 ++++++++++++----------
 .../hive/test_hive_serde_prop.out                  | 16 ++++++
 .../hive/test_hive_serde_prop.groovy               |  4 ++
 6 files changed, 121 insertions(+), 26 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/run.sh
 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/run.sh
new file mode 100755
index 00000000000..ef6538563d5
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+# create table
+hive -f "${CUR_DIR}"/some_serde_table.hql
+
+
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
new file mode 100644
index 00000000000..fa6ad791118
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -0,0 +1,34 @@
+create database if not exists regression;
+use regression;
+
+CREATE TABLE `serde_test1`(
+  `id` int,
+  `name` string)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+  'field.delim'='',
+  'serialization.format'='')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+CREATE TABLE `serde_test2`(
+  `id` int, 
+  `name` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES ( 
+  'field.delim'='', 
+  'serialization.format'='') 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+TBLPROPERTIES (
+   'field.delim'='|'
+);
+
+insert into serde_test1 values(1, "abc"),(2, "def");
+insert into serde_test2 values(1, "abc"),(2, "def");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 2e7693619b8..7ad7621f7cc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -79,6 +80,7 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -847,4 +849,28 @@ public class HiveMetaStoreClientHelper {
         }
         return conf;
     }
+
+    public static Optional<String> getSerdeProperty(Table table, String key) {
+        String valueFromSd = 
table.getSd().getSerdeInfo().getParameters().get(key);
+        String valueFromTbl = table.getParameters().get(key);
+        return firstNonNullable(valueFromTbl, valueFromSd);
+    }
+
+    private static Optional<String> firstNonNullable(String... values) {
+        for (String value : values) {
+            if (!Strings.isNullOrEmpty(value)) {
+                return Optional.of(value);
+            }
+        }
+        return Optional.empty();
+    }
+
+    public static String firstPresentOrDefault(String defaultValue, 
Optional<String>... values) {
+        for (Optional<String> value : values) {
+            if (value.isPresent()) {
+                return value.get();
+            }
+        }
+        return defaultValue;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 1970a48f2d4..0214ecc4642 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -85,7 +85,7 @@ public class HiveScanNode extends FileQueryScanNode {
     public static final String PROP_LINE_DELIMITER = "line.delim";
     public static final String DEFAULT_LINE_DELIMITER = "\n";
     public static final String PROP_SEPARATOR_CHAR = "separatorChar";
-    public static final String PROP_QUOTA_CHAR = "quoteChar";
+    public static final String PROP_QUOTE_CHAR = "quoteChar";
 
     public static final String PROP_COLLECTION_DELIMITER_HIVE2 = 
"colelction.delim";
     public static final String PROP_COLLECTION_DELIMITER_HIVE3 = 
"collection.delim";
@@ -445,32 +445,37 @@ public class HiveScanNode extends FileQueryScanNode {
     @Override
     protected TFileAttributes getFileAttributes() throws UserException {
         TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
-        java.util.Map<String, String> delimiter = 
hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
-        if (delimiter.containsKey(PROP_FIELD_DELIMITER)) {
-            if (delimiter.get(PROP_FIELD_DELIMITER).length() == 0) {
-                textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
-            } else {
-                
textParams.setColumnSeparator(delimiter.get(PROP_FIELD_DELIMITER));
-            }
-        } else if (delimiter.containsKey(PROP_SEPARATOR_CHAR)) {
-            textParams.setColumnSeparator(delimiter.get(PROP_SEPARATOR_CHAR));
-        } else {
-            textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
-        }
-        if (delimiter.containsKey(PROP_QUOTA_CHAR)) {
-            
textParams.setEnclose(delimiter.get(PROP_QUOTA_CHAR).getBytes()[0]);
-        }
-        
textParams.setLineDelimiter(delimiter.getOrDefault(PROP_LINE_DELIMITER, 
DEFAULT_LINE_DELIMITER));
-        
textParams.setMapkvDelimiter(delimiter.getOrDefault(PROP_MAP_KV_DELIMITER, 
DEFAULT_MAP_KV_DELIMITER));
-
-        //  textParams.collection_delimiter field is map, array and struct 
delimiter;
-        if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2) != null) {
-            
textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2));
-        } else if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3) != null) {
-            
textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3));
-        } else {
-            textParams.setCollectionDelimiter(DEFAULT_COLLECTION_DELIMITER);
+
+        // 1. set column separator
+        Optional<String> fieldDelim =
+                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_FIELD_DELIMITER);
+        Optional<String> columnSeparator =
+                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_SEPARATOR_CHAR);
+        
textParams.setColumnSeparator(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator));
+        // 2. set line delimiter
+        Optional<String> lineDelim =
+                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_LINE_DELIMITER);
+        
textParams.setLineDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_LINE_DELIMITER, lineDelim));
+        // 3. set mapkv delimiter
+        Optional<String> mapkvDelim =
+                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_MAP_KV_DELIMITER);
+        
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
+        // 4. set collection delimiter
+        Optional<String> collectionDelimHive2 =
+                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_COLLECTION_DELIMITER_HIVE2);
+        Optional<String> collectionDelimHive3 =
+                
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), 
PROP_COLLECTION_DELIMITER_HIVE3);
+        
textParams.setCollectionDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, 
collectionDelimHive3));
+        // 5. set quote char
+        Map<String, String> serdeParams = 
hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
+        if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
+            
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
         }
+
         TFileAttributes fileAttributes = new TFileAttributes();
         fileAttributes.setTextParams(textParams);
         fileAttributes.setHeaderType("");
@@ -502,3 +507,4 @@ public class HiveScanNode extends FileQueryScanNode {
         return compressType;
     }
 }
+
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out 
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index 818db069d50..b00eebec49d 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -3,7 +3,23 @@
 a      1.1
 b      2.2
 
+-- !2 --
+1      abc
+2      def
+
+-- !2 --
+1      abc
+2      def
+
 -- !1 --
 a      1.1
 b      2.2
 
+-- !2 --
+1      abc
+2      def
+
+-- !2 --
+1      abc
+2      def
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 7ac366748b6..3ae6b21bbba 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -37,6 +37,10 @@ suite("test_hive_serde_prop", 
"external_docker,hive,external_docker_hive,p0,exte
             );"""
 
                qt_1 """select * from ${catalog_name}.${ex_db_name}.employee_gz 
order by name;"""
+
+
+        qt_2 """select * from ${catalog_name}.regression.serde_test1 order by 
id;"""
+        qt_2 """select * from ${catalog_name}.regression.serde_test2 order by 
id;"""
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to