This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-seq_rc_file_hive
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6c323af213c45254b4a759a30c7ed98a7351fb13
Author: morningman <morning...@163.com>
AuthorDate: Tue Sep 24 22:45:58 2024 +0800

    [wip] support hive with sequence file and rcfile
---
 .../doris/datasource/hive/HMSExternalTable.java    |  2 ++
 .../datasource/hive/HiveMetaStoreClientHelper.java |  4 ++-
 .../doris/datasource/hive/source/HiveScanNode.java | 29 +++++++++++++++-------
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 6179bf5f19c..3934abb3169 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -120,6 +120,8 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         // So add to SUPPORTED_HIVE_FILE_FORMATS and treat is as a hive table.
         // Then Doris will just list the files from location and read parquet 
files directly.
         
SUPPORTED_HIVE_FILE_FORMATS.add("org.apache.hudi.hadoop.HoodieParquetInputFormatBase");
+        
SUPPORTED_HIVE_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.RCFileInputFormat");
+        
SUPPORTED_HIVE_FILE_FORMATS.add("org.apache.hadoop.mapred.SequenceFileInputFormat");
 
         SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS = Sets.newHashSet();
         
SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index db6019eda97..31f3536a66b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -98,7 +98,9 @@ public class HiveMetaStoreClientHelper {
     public enum HiveFileFormat {
         TEXT_FILE(0, "text"),
         PARQUET(1, "parquet"),
-        ORC(2, "orc");
+        ORC(2, "orc"),
+        RCFILE(3, "rcfile"),
+        SEQUENCE_FILE(4, "sequencefile");
 
         private int index;
         private String desc;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 634c596c69f..2595a8ce2aa 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -37,6 +37,7 @@ import org.apache.doris.datasource.hive.HMSExternalTable;
 import org.apache.doris.datasource.hive.HiveMetaStoreCache;
 import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue;
 import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
+import 
org.apache.doris.datasource.hive.HiveMetaStoreClientHelper.HiveFileFormat;
 import org.apache.doris.datasource.hive.HivePartition;
 import org.apache.doris.datasource.hive.HiveTransaction;
 import org.apache.doris.datasource.hive.source.HiveSplit.HiveSplitCreator;
@@ -410,17 +411,27 @@ public class HiveScanNode extends FileQueryScanNode {
 
     @Override
     public TFileFormatType getFileFormatType() throws UserException {
-        TFileFormatType type = null;
         String inputFormatName = 
hmsTable.getRemoteTable().getSd().getInputFormat();
-        String hiveFormat = 
HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName);
-        if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) 
{
-            type = TFileFormatType.FORMAT_PARQUET;
-        } else if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.ORC.getDesc())) {
-            type = TFileFormatType.FORMAT_ORC;
-        } else if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.TEXT_FILE.getDesc()))
 {
-            type = TFileFormatType.FORMAT_CSV_PLAIN;
+        String hiveFormat = HiveFileFormat.getFormat(inputFormatName);
+        if (hiveFormat.equals(HiveFileFormat.PARQUET.getDesc())) {
+            return TFileFormatType.FORMAT_PARQUET;
+        } else if (hiveFormat.equals(HiveFileFormat.ORC.getDesc())) {
+            return TFileFormatType.FORMAT_ORC;
+        } else if (hiveFormat.equals(HiveFileFormat.TEXT_FILE.getDesc())) {
+            return TFileFormatType.FORMAT_CSV_PLAIN;
+        } else if (hiveFormat.equals(HiveFileFormat.SEQUENCE_FILE.getDesc())) {
+            return TFileFormatType.FORMAT_SEQUENCE;
+        } else if (hiveFormat.equals(HiveFileFormat.RCFILE.getDesc())) {
+            String serdeLib = 
hmsTable.getRemoteTable().getSd().getSerdeInfo().getSerializationLib();
+            if 
(serdeLib.equals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) {
+                return TFileFormatType.FORMAT_RCTEXT;
+            } else if 
(serdeLib.equals("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
 {
+                return TFileFormatType.FORMAT_RCBINARY;
+            } else {
+                throw new UserException("not support RCFile serdeLib: " + 
serdeLib);
+            }
         }
-        return type;
+        throw new UserException("unsupported hive file format: " + hiveFormat);
     }
 
     @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to