This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-seq_rc_file_hive in repository https://gitbox.apache.org/repos/asf/doris.git
commit 6c323af213c45254b4a759a30c7ed98a7351fb13 Author: morningman <morning...@163.com> AuthorDate: Tue Sep 24 22:45:58 2024 +0800 [wip] support hive with sequence file and rcfile --- .../doris/datasource/hive/HMSExternalTable.java | 2 ++ .../datasource/hive/HiveMetaStoreClientHelper.java | 4 ++- .../doris/datasource/hive/source/HiveScanNode.java | 29 +++++++++++++++------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 6179bf5f19c..3934abb3169 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -120,6 +120,8 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI // So add to SUPPORTED_HIVE_FILE_FORMATS and treat is as a hive table. // Then Doris will just list the files from location and read parquet files directly. SUPPORTED_HIVE_FILE_FORMATS.add("org.apache.hudi.hadoop.HoodieParquetInputFormatBase"); + SUPPORTED_HIVE_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.RCFileInputFormat"); + SUPPORTED_HIVE_FILE_FORMATS.add("org.apache.hadoop.mapred.SequenceFileInputFormat"); SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS = Sets.newHashSet(); SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java index db6019eda97..31f3536a66b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java @@ -98,7 +98,9 @@ public class HiveMetaStoreClientHelper { public enum HiveFileFormat { TEXT_FILE(0, "text"), PARQUET(1, "parquet"), - ORC(2, "orc"); + ORC(2, "orc"), + RCFILE(3, "rcfile"), + SEQUENCE_FILE(4, "sequencefile"); private int index; private String desc; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index 634c596c69f..2595a8ce2aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -37,6 +37,7 @@ import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.datasource.hive.HiveMetaStoreCache; import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue; import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; +import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper.HiveFileFormat; import org.apache.doris.datasource.hive.HivePartition; import org.apache.doris.datasource.hive.HiveTransaction; import org.apache.doris.datasource.hive.source.HiveSplit.HiveSplitCreator; @@ -410,17 +411,27 @@ public class HiveScanNode extends FileQueryScanNode { @Override public TFileFormatType getFileFormatType() throws UserException { - TFileFormatType type = null; String inputFormatName = hmsTable.getRemoteTable().getSd().getInputFormat(); - String hiveFormat = HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName); - if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) { - type = TFileFormatType.FORMAT_PARQUET; - } else if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.ORC.getDesc())) { - type = TFileFormatType.FORMAT_ORC; - } else if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.TEXT_FILE.getDesc())) { - type = TFileFormatType.FORMAT_CSV_PLAIN; + String hiveFormat = HiveFileFormat.getFormat(inputFormatName); + if (hiveFormat.equals(HiveFileFormat.PARQUET.getDesc())) { + return TFileFormatType.FORMAT_PARQUET; + } else if (hiveFormat.equals(HiveFileFormat.ORC.getDesc())) { + return TFileFormatType.FORMAT_ORC; + } else if (hiveFormat.equals(HiveFileFormat.TEXT_FILE.getDesc())) { + return TFileFormatType.FORMAT_CSV_PLAIN; + } else if (hiveFormat.equals(HiveFileFormat.SEQUENCE_FILE.getDesc())) { + return TFileFormatType.FORMAT_SEQUENCE; + } else if (hiveFormat.equals(HiveFileFormat.RCFILE.getDesc())) { + String serdeLib = hmsTable.getRemoteTable().getSd().getSerdeInfo().getSerializationLib(); + if (serdeLib.equals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) { + return TFileFormatType.FORMAT_RCTEXT; + } else if (serdeLib.equals("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe")) { + return TFileFormatType.FORMAT_RCBINARY; + } else { + throw new UserException("not support RCFile serdeLib: " + serdeLib); + } } - return type; + throw new UserException("unsupported hive file format: " + hiveFormat); } @Override --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org