This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new b82de68d7e7 [feature][insert]add hive table sink thrift (#32274) 
(#32360)
b82de68d7e7 is described below

commit b82de68d7e71b10677e1cbb61cb45bb2deef67fb
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Mon Mar 18 10:46:17 2024 +0800

    [feature][insert]add hive table sink thrift (#32274) (#32360)
    
    bp #32274
---
 gensrc/thrift/DataSinks.thrift       | 34 +++++++++++++++++++++-------------
 gensrc/thrift/FrontendService.thrift |  3 +++
 gensrc/thrift/Partitions.thrift      |  8 +++++++-
 gensrc/thrift/PlanNodes.thrift       |  4 +++-
 4 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/gensrc/thrift/DataSinks.thrift b/gensrc/thrift/DataSinks.thrift
index 7c9d5e8f8c2..e7683c25fd4 100644
--- a/gensrc/thrift/DataSinks.thrift
+++ b/gensrc/thrift/DataSinks.thrift
@@ -38,6 +38,7 @@ enum TDataSinkType {
     MULTI_CAST_DATA_STREAM_SINK,
     GROUP_COMMIT_OLAP_TABLE_SINK, // deprecated
     GROUP_COMMIT_BLOCK_SINK,
+    HIVE_TABLE_SINK,
 }
 
 enum TResultSinkType {
@@ -101,7 +102,7 @@ enum TParquetRepetitionType {
 struct TParquetSchema {
     1: optional TParquetRepetitionType schema_repetition_type
     2: optional TParquetDataType schema_data_type
-    3: optional string schema_column_name    
+    3: optional string schema_column_name
     4: optional TParquetDataLogicalType schema_data_logical_type
 }
 
@@ -280,6 +281,7 @@ struct TOlapTableSink {
 struct THiveLocationParams {
   1: optional string write_path
   2: optional string target_path
+  3: optional Types.TFileType file_type
 }
 
 struct TSortedColumn {
@@ -298,11 +300,16 @@ struct THiveBucket {
     4: optional list<TSortedColumn> sorted_by
 }
 
-enum THiveCompressionType {
-    SNAPPY = 3,
-    LZ4 = 4,
-    ZLIB = 6,
-    ZSTD = 7,
+enum THiveColumnType {
+    PARTITION_KEY = 0,
+    REGULAR = 1,
+    SYNTHESIZED = 2
+}
+
+struct THiveColumn {
+  1: optional string name
+  2: optional Types.TTypeDesc data_type
+  3: optional THiveColumnType column_type
 }
 
 struct THivePartition {
@@ -314,13 +321,14 @@ struct THivePartition {
 struct THiveTableSink {
     1: optional string db_name
     2: optional string table_name
-    3: optional list<string> data_column_names
-    4: optional list<string> partition_column_names
-    5: optional list<THivePartition> partitions
-    6: optional list<THiveBucket> buckets
-    7: optional PlanNodes.TFileFormatType file_format
-    8: optional THiveCompressionType compression_type
-    9: optional THiveLocationParams location
+    3: optional list<THiveColumn> columns
+    4: optional list<THivePartition> partitions
+    5: optional THiveBucket bucket_info
+    6: optional PlanNodes.TFileFormatType file_format
+    7: optional PlanNodes.TFileCompressType compression_type
+    8: optional THiveLocationParams location
+    9: optional map<string, string> hadoop_config
+    10: optional bool overwrite
 }
 
 enum TUpdateMode {
diff --git a/gensrc/thrift/FrontendService.thrift 
b/gensrc/thrift/FrontendService.thrift
index 9f47037c56d..049c8450b23 100644
--- a/gensrc/thrift/FrontendService.thrift
+++ b/gensrc/thrift/FrontendService.thrift
@@ -29,6 +29,7 @@ include "Exprs.thrift"
 include "RuntimeProfile.thrift"
 include "MasterService.thrift"
 include "AgentService.thrift"
+include "DataSinks.thrift"
 
 // These are supporting structs for JniFrontend.java, which serves as the glue
 // between our C++ execution environment and the Java frontend.
@@ -481,6 +482,8 @@ struct TReportExecStatusParams {
   24: optional TQueryStatistics query_statistics // deprecated
 
   25: optional TReportWorkloadRuntimeStatusParams 
report_workload_runtime_status
+
+  26: optional list<DataSinks.THivePartitionUpdate> hive_partition_updates
 }
 
 struct TFeResult {
diff --git a/gensrc/thrift/Partitions.thrift b/gensrc/thrift/Partitions.thrift
index 0a7e70c0a4f..4e306c2970b 100644
--- a/gensrc/thrift/Partitions.thrift
+++ b/gensrc/thrift/Partitions.thrift
@@ -43,7 +43,13 @@ enum TPartitionType {
   BUCKET_SHFFULE_HASH_PARTITIONED,
 
   // used for shuffle data by parititon and tablet
-  TABLET_SINK_SHUFFLE_PARTITIONED
+  TABLET_SINK_SHUFFLE_PARTITIONED,
+
+  // used for shuffle data by hive parititon
+  TABLE_SINK_HASH_PARTITIONED,
+
+  // used for hive unparititoned table
+  TABLE_SINK_RANDOM_PARTITIONED
 }
 
 enum TDistributionType {
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index f9459256548..da3643747fb 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -138,7 +138,9 @@ enum TFileCompressType {
     DEFLATE,
     LZOP,
     LZ4BLOCK,
-    SNAPPYBLOCK
+    SNAPPYBLOCK,
+    ZLIB,
+    ZSTD
 }
 
 struct THdfsConf {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to