This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 4888c632f4c [cherry-pick](branch2.1) support escape.delim and 
serialization.null.format for hive text (#41684)
4888c632f4c is described below

commit 4888c632f4c7190f33f9675055fcda7a0eeb0c56
Author: Socrates <suxiaogang...@icloud.com>
AuthorDate: Tue Oct 15 00:08:23 2024 +0800

    [cherry-pick](branch2.1) support escape.delim and serialization.null.format 
for hive text (#41684)
    
    ## Proposed changes
    pick from master:
    https://github.com/apache/doris/pull/40291
---
 .../vec/data_types/serde/data_type_array_serde.cpp |  3 ++
 .../vec/data_types/serde/data_type_map_serde.cpp   | 10 ++++--
 .../data_types/serde/data_type_nullable_serde.cpp  |  4 +--
 be/src/vec/data_types/serde/data_type_serde.h      | 17 +++++----
 .../vec/data_types/serde/data_type_string_serde.h  | 42 ++++++++++++++++++++++
 .../data_types/serde/data_type_struct_serde.cpp    |  3 ++
 be/src/vec/exec/format/csv/csv_reader.cpp          | 34 ++++++++++++++++--
 be/src/vec/exec/format/csv/csv_reader.h            | 16 +++++++++
 be/src/vec/runtime/vcsv_transformer.cpp            | 15 ++++++--
 .../scripts/create_preinstalled_scripts/run42.hql  |  1 -
 .../regression/serde_prop/some_serde_table.hql     | 20 +++--------
 .../doris/datasource/hive/source/HiveScanNode.java | 25 +++++++------
 .../org/apache/doris/planner/HiveTableSink.java    | 13 +++++--
 gensrc/thrift/PlanNodes.thrift                     |  1 +
 .../hive/test_hive_basic_type.out                  |  8 ++---
 .../hive/test_hive_serde_prop.out                  | 24 +++++++++++++
 .../hive/test_hive_serde_prop.groovy               | 22 +++---------
 17 files changed, 191 insertions(+), 67 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 0c606f4eeb2..872dd84d8c7 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -169,6 +169,9 @@ Status 
DataTypeArraySerDe::deserialize_one_cell_from_hive_text(
     for (int idx = 0, start = 0; idx <= slice.size; idx++) {
         char c = (idx == slice.size) ? collection_delimiter : slice[idx];
         if (c == collection_delimiter) {
+            if (options.escape_char != 0 && idx > 0 && slice[idx - 1] == 
options.escape_char) {
+                continue;
+            }
             slices.emplace_back(slice.data + start, idx - start);
             start = idx + 1;
         }
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp 
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index a1a65e7ea5b..2140885942d 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -97,13 +97,17 @@ Status 
DataTypeMapSerDe::deserialize_one_cell_from_hive_text(
          *
          *  So i use 'kv <= from' in order to get _map_kv_delimiter that 
appears first.
          * */
-        if (i < slice.size && slice[i] == map_kv_delimiter && kv <= from) {
+        if (i < slice.size && slice[i] == map_kv_delimiter && kv <= from &&
+            (options.escape_char == 0 || i == 0 || slice[i - 1] != 
options.escape_char)) {
             kv = i;
             continue;
         }
         if ((i == slice.size || slice[i] == collection_delimiter) && i >= kv + 
1) {
-            key_slices.push_back({slice.data + from, kv - from});
-            value_slices.push_back({slice.data + kv + 1, i - 1 - kv});
+            if (options.escape_char != 0 && i > 0 && slice[i - 1] == 
options.escape_char) {
+                continue;
+            }
+            key_slices.emplace_back(slice.data + from, kv - from);
+            value_slices.emplace_back(slice.data + kv + 1, i - 1 - kv);
             from = i + 1;
             kv = from;
         }
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 1b26d775d2d..1af85bd040d 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -88,7 +88,7 @@ Status DataTypeNullableSerDe::serialize_one_cell_to_hive_text(
 
     const auto& col_null = assert_cast<const ColumnNullable&>(*ptr);
     if (col_null.is_null_at(row_num)) {
-        bw.write(NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2);
+        bw.write(options.null_format, options.null_len);
     } else {
         RETURN_IF_ERROR(nested_serde->serialize_one_cell_to_hive_text(
                 col_null.get_nested_column(), row_num, bw, options,
@@ -101,7 +101,7 @@ Status 
DataTypeNullableSerDe::deserialize_one_cell_from_hive_text(
         IColumn& column, Slice& slice, const FormatOptions& options,
         int hive_text_complex_type_delimiter_level) const {
     auto& null_column = assert_cast<ColumnNullable&>(column);
-    if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') {
+    if (slice.compare(Slice(options.null_format, options.null_len)) == 0) {
         null_column.insert_data(nullptr, 0);
         return Status::OK();
     }
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index f09cf5a8a49..fd773718000 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -137,6 +137,10 @@ public:
         bool converted_from_string = false;
 
         char escape_char = 0;
+        /**
+         * flags for each byte to indicate if escape is needed.
+         */
+        bool need_escape[256] = {false};
 
         /**
          * only used for export data
@@ -148,8 +152,8 @@ public:
          *      NULL
          *      null
          */
-        const char* null_format;
-        int null_len;
+        const char* null_format = "\\N";
+        int null_len = 2;
 
         /**
          * The wrapper char for string type in nested type.
@@ -166,7 +170,7 @@ public:
             CHECK(0 <= hive_text_complex_type_delimiter_level &&
                   hive_text_complex_type_delimiter_level <= 153);
 
-            char ans = '\002';
+            char ans;
             
//https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java#L250
             //use only control chars that are very unlikely to be part of the 
string
             // the following might/likely to be used in text files for strings
@@ -175,8 +179,9 @@ public:
             // 12 (form feed, FF, \f, ^L),
             // 13 (carriage return, CR, \r, ^M),
             // 27 (escape, ESC, \e [GCC only], ^[).
-
-            if (hive_text_complex_type_delimiter_level == 1) {
+            if (hive_text_complex_type_delimiter_level == 0) {
+                ans = field_delim[0];
+            } else if (hive_text_complex_type_delimiter_level == 1) {
                 ans = collection_delim;
             } else if (hive_text_complex_type_delimiter_level == 2) {
                 ans = map_key_delim;
@@ -192,7 +197,7 @@ public:
             } else if (hive_text_complex_type_delimiter_level <= 25) {
                 // [22, 25] -> [28, 31]
                 ans = hive_text_complex_type_delimiter_level + 6;
-            } else if (hive_text_complex_type_delimiter_level <= 153) {
+            } else {
                 // [26, 153] -> [-128, -1]
                 ans = hive_text_complex_type_delimiter_level + (-26 - 128);
             }
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index d3161c88706..fe09ff615f4 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -96,6 +96,27 @@ public:
         return Status::OK();
     }
 
+    Status serialize_one_cell_to_hive_text(
+            const IColumn& column, int row_num, BufferWritable& bw, 
FormatOptions& options,
+            int hive_text_complex_type_delimiter_level = 1) const override {
+        auto result = check_column_const_set_readability(column, row_num);
+        ColumnPtr ptr = result.first;
+        row_num = result.second;
+        const auto& value = assert_cast<const 
ColumnType&>(*ptr).get_data_at(row_num);
+        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
+            if (options.escape_char != 0) {
+                StringRef str_ref = value;
+                write_with_escaped_char_to_hive_text(str_ref, bw, 
options.escape_char,
+                                                     options.need_escape);
+            } else {
+                bw.write(value.data, value.size);
+            }
+        } else {
+            bw.write(value.data, value.size);
+        }
+        return Status::OK();
+    }
+
     inline void write_with_escaped_char_to_json(StringRef value, 
BufferWritable& bw) const {
         for (char it : value) {
             switch (it) {
@@ -126,6 +147,17 @@ public:
         }
     }
 
+    inline void write_with_escaped_char_to_hive_text(StringRef value, 
BufferWritable& bw,
+                                                     char escape_char,
+                                                     const bool need_escape[]) 
const {
+        for (char it : value) {
+            if (need_escape[it & 0xff]) {
+                bw.write(escape_char);
+            }
+            bw.write(it);
+        }
+    }
+
     Status serialize_column_to_json(const IColumn& column, int start_idx, int 
end_idx,
                                     BufferWritable& bw, FormatOptions& 
options) const override {
         SERIALIZE_COLUMN_TO_JSON();
@@ -154,6 +186,16 @@ public:
         return Status::OK();
     }
 
+    Status deserialize_one_cell_from_hive_text(
+            IColumn& column, Slice& slice, const FormatOptions& options,
+            int hive_text_complex_type_delimiter_level = 1) const override {
+        if (options.escape_char != 0) {
+            escape_string(slice.data, slice.size, options.escape_char);
+        }
+        assert_cast<ColumnType&>(column).insert_data(slice.data, slice.size);
+        return Status::OK();
+    }
+
     Status deserialize_column_from_json_vector(IColumn& column, 
std::vector<Slice>& slices,
                                                int* num_deserialized,
                                                const FormatOptions& options) 
const override {
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp 
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index c28a3acefde..d48f42e2227 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -249,6 +249,9 @@ Status 
DataTypeStructSerDe::deserialize_one_cell_from_hive_text(
     char* data = slice.data;
     for (size_t i = 0, from = 0; i <= slice.size; i++) {
         if (i == slice.size || data[i] == struct_delimiter) {
+            if (options.escape_char != 0 && i > 0 && data[i - 1] == 
options.escape_char) {
+                continue;
+            }
             slices.push_back({data + from, i - from});
             from = i + 1;
         }
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 02841f8c3f0..d6a9b0c46b3 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -174,6 +174,23 @@ void PlainCsvTextFieldSplitter::do_split(const Slice& 
line, std::vector<Slice>*
     }
 }
 
+void HiveCsvTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>* 
splitted_values) {
+    const char* data = line.data;
+    const size_t size = line.size;
+    size_t value_start = 0;
+    for (size_t i = 0; i < size; ++i) {
+        if (data[i] == _value_sep[0]) {
+            // hive will escape the field separator in string
+            if (_escape_char != 0 && i > 0 && data[i - 1] == _escape_char) {
+                continue;
+            }
+            process_value_func(data, value_start, i - value_start, 
_trimming_char, splitted_values);
+            value_start = i + _value_sep_len;
+        }
+    }
+    process_value_func(data, value_start, size - value_start, _trimming_char, 
splitted_values);
+}
+
 CsvReader::CsvReader(RuntimeState* state, RuntimeProfile* profile, 
ScannerCounter* counter,
                      const TFileScanRangeParams& params, const TFileRangeDesc& 
range,
                      const std::vector<SlotDescriptor*>& file_slot_descs, 
io::IOContext* io_ctx)
@@ -354,6 +371,12 @@ Status CsvReader::init_reader(bool is_load) {
     } else {
         _options.map_key_delim = 
_params.file_attributes.text_params.mapkv_delimiter[0];
     }
+
+    if (_params.file_attributes.text_params.__isset.null_format) {
+        _options.null_format = 
_params.file_attributes.text_params.null_format.data();
+        _options.null_len = 
_params.file_attributes.text_params.null_format.length();
+    }
+
     _use_nullable_string_opt.resize(_file_slot_descs.size());
     for (int i = 0; i < _file_slot_descs.size(); ++i) {
         auto data_type_ptr = _file_slot_descs[i]->get_data_type_ptr();
@@ -378,9 +401,14 @@ Status CsvReader::init_reader(bool is_load) {
     if (_enclose == 0) {
         text_line_reader_ctx = std::make_shared<PlainTextLineReaderCtx>(
                 _line_delimiter, _line_delimiter_length, _keep_cr);
-
-        _fields_splitter = std::make_unique<PlainCsvTextFieldSplitter>(
-                _trim_tailing_spaces, false, _value_separator, 
_value_separator_length, -1);
+        if (_text_serde_type == TTextSerdeType::HIVE_TEXT_SERDE) {
+            _fields_splitter = std::make_unique<HiveCsvTextFieldSplitter>(
+                    _trim_tailing_spaces, false, _value_separator, 
_value_separator_length, -1,
+                    _escape);
+        } else {
+            _fields_splitter = std::make_unique<PlainCsvTextFieldSplitter>(
+                    _trim_tailing_spaces, false, _value_separator, 
_value_separator_length, -1);
+        }
     } else {
         text_line_reader_ctx = std::make_shared<EncloseCsvLineReaderContext>(
                 _line_delimiter, _line_delimiter_length, _value_separator, 
_value_separator_length,
diff --git a/be/src/vec/exec/format/csv/csv_reader.h 
b/be/src/vec/exec/format/csv/csv_reader.h
index 3b600190459..6edabc52ad3 100644
--- a/be/src/vec/exec/format/csv/csv_reader.h
+++ b/be/src/vec/exec/format/csv/csv_reader.h
@@ -171,6 +171,22 @@ private:
     std::string _value_sep;
 };
 
+class HiveCsvTextFieldSplitter : public 
BaseCsvTextFieldSplitter<HiveCsvTextFieldSplitter> {
+public:
+    explicit HiveCsvTextFieldSplitter(bool trim_tailing_space, bool trim_ends,
+                                      const string& value_sep, size_t 
value_sep_len = 1,
+                                      char trimming_char = 0, char escape_char 
= 0)
+            : BaseCsvTextFieldSplitter(trim_tailing_space, trim_ends, 
value_sep_len, trimming_char),
+              _value_sep(value_sep),
+              _escape_char(escape_char) {}
+
+    void do_split(const Slice& line, std::vector<Slice>* splitted_values);
+
+private:
+    std::string _value_sep;
+    char _escape_char;
+};
+
 class CsvReader : public GenericReader {
     ENABLE_FACTORY_CREATOR(CsvReader);
 
diff --git a/be/src/vec/runtime/vcsv_transformer.cpp 
b/be/src/vec/runtime/vcsv_transformer.cpp
index 4bfd342fe76..51ac2eb2729 100644
--- a/be/src/vec/runtime/vcsv_transformer.cpp
+++ b/be/src/vec/runtime/vcsv_transformer.cpp
@@ -63,10 +63,21 @@ VCSVTransformer::VCSVTransformer(RuntimeState* state, 
doris::io::FileWriter* fil
     }
 
     if (_is_text_format) {
+        _options.field_delim = hive_serde_properties->field_delim;
         _options.collection_delim = hive_serde_properties->collection_delim[0];
         _options.map_key_delim = hive_serde_properties->mapkv_delim[0];
-        _options.escape_char = hive_serde_properties->escape_char[0];
-        _options.null_format = hive_serde_properties->null_format.c_str();
+        if (hive_serde_properties->__isset.escape_char) {
+            _options.escape_char = hive_serde_properties->escape_char[0];
+        }
+        _options.null_format = hive_serde_properties->null_format.data();
+        _options.null_len = hive_serde_properties->null_format.length();
+        // The list of separators + escapeChar are the bytes required to be 
escaped.
+        if (_options.escape_char != 0) {
+            _options.need_escape[_options.escape_char & 0xff] = true;
+        }
+        for (int i = 0; i <= 153; i++) {
+            _options.need_escape[_options.get_collection_delimiter(i) & 0xff] 
= true;
+        }
     }
 }
 
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
index dc469fad77b..36b4776dc8f 100755
--- 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
@@ -32,7 +32,6 @@ CREATE TABLE IF NOT EXISTS `text_all_types`(
 `t_decimal_precision_38` decimal(38,16),
 `t_binary` binary
 )
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
 STORED AS TEXTFILE
 LOCATION
   '/user/doris/preinstalled_data/text/text_all_types';
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
index b5d963a1c2b..4de85bc19f0 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -88,19 +88,8 @@ CREATE TABLE `serde_test7`(
 ROW FORMAT SERDE 
   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
 WITH SERDEPROPERTIES (
-  'escape.delim' = '|'
-)
-STORED AS INPUTFORMAT 
-  'org.apache.hadoop.mapred.TextInputFormat' 
-OUTPUTFORMAT 
-  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
-
-CREATE TABLE `serde_test8`(
-  `id` int, 
-  `name` string)
-ROW FORMAT SERDE 
-  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
-WITH SERDEPROPERTIES (
+  'field.delim' = 'a',
+  'escape.delim' = '|',
   'serialization.null.format' = 'null'
 )
 STORED AS INPUTFORMAT 
@@ -108,11 +97,12 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
 
+CREATE TABLE `serde_test8` like `serde_test7`;
+
 insert into serde_test1 values(1, "abc"),(2, "def");
 insert into serde_test2 values(1, "abc"),(2, "def");
 insert into serde_test3 values(1, "abc"),(2, "def");
 insert into serde_test4 values(1, "abc"),(2, "def");
 insert into serde_test5 values(1, "abc"),(2, "def");
 insert into serde_test6 values(1, "abc"),(2, "def");
-insert into serde_test7 values(1, "abc"),(2, "def");
-insert into serde_test8 values(1, "abc"),(2, "def");
+insert into serde_test7 values(1, null),(2, "|||"),(3, "aaa"),(4, "\"null\"");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 634c596c69f..0dcf4724a7b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -464,21 +464,24 @@ public class HiveScanNode extends FileQueryScanNode {
         if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
             
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
         }
-
-        // TODO: support escape char and null format in csv_reader
-        Optional<String> escapeChar = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+        // 6. set escape delimiter
+        Optional<String> escapeDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
                 PROP_ESCAPE_DELIMITER);
-        if (escapeChar.isPresent() && 
!escapeChar.get().equals(DEFAULT_ESCAPE_DELIMIER)) {
-            throw new UserException(
-                    "not support serde prop " + PROP_ESCAPE_DELIMITER + " in 
hive text reading");
+        if (escapeDelim.isPresent()) {
+            String escape = HiveMetaStoreClientHelper.getByte(
+                    escapeDelim.get());
+            if (escape != null) {
+                textParams
+                        .setEscape(escape.getBytes()[0]);
+            } else {
+                textParams.setEscape(DEFAULT_ESCAPE_DELIMIER.getBytes()[0]);
+            }
         }
-
+        // 7. set null format
         Optional<String> nullFormat = 
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
                 PROP_NULL_FORMAT);
-        if (nullFormat.isPresent() && 
!nullFormat.get().equals(DEFAULT_NULL_FORMAT)) {
-            throw new UserException(
-                    "not support serde prop " + PROP_NULL_FORMAT + " in hive 
text reading");
-        }
+        
textParams.setNullFormat(HiveMetaStoreClientHelper.firstPresentOrDefault(
+                DEFAULT_NULL_FORMAT, nullFormat));
 
         TFileAttributes fileAttributes = new TFileAttributes();
         fileAttributes.setTextParams(textParams);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
index cb60b142404..330e0ed4a06 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
@@ -244,9 +244,16 @@ public class HiveTableSink extends 
BaseExternalTableDataSink {
         // 5. set escape delimiter
         Optional<String> escapeDelim = 
HiveMetaStoreClientHelper.getSerdeProperty(targetTable.getRemoteTable(),
                 PROP_ESCAPE_DELIMITER);
-        serDeProperties
-                
.setEscapeChar(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
-                        DEFAULT_ESCAPE_DELIMIER, escapeDelim)));
+        if (escapeDelim.isPresent()) {
+            String escape = HiveMetaStoreClientHelper.getByte(
+                    escapeDelim.get());
+            if (escape != null) {
+                serDeProperties
+                        .setEscapeChar(escape);
+            } else {
+                serDeProperties.setEscapeChar(DEFAULT_ESCAPE_DELIMIER);
+            }
+        }
         // 6. set null format
         Optional<String> nullFormat = 
HiveMetaStoreClientHelper.getSerdeProperty(targetTable.getRemoteTable(),
                 PROP_NULL_FORMAT);
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 2c5423fccb5..daf2e28a991 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -258,6 +258,7 @@ struct TFileTextScanRangeParams {
     4: optional string mapkv_delimiter;
     5: optional i8 enclose;
     6: optional i8 escape;
+    7: optional string null_format;
 }
 
 struct TFileScanSlotInfo {
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out 
b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
index 388b95944e9..cc48c4a5601 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
@@ -24,7 +24,7 @@ true  8       8       8       80      8.8     80.8    7298    
12/31/10        8       2010-12-31T12:08:13.780 2010    12
 ""     "test"
 
 -- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
+\N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
 
 -- !11 --
 \N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
@@ -290,7 +290,7 @@ true        8       8       8       80      8.8     80.8    
7298    12/31/10        8       2010-12-31T12:08:13.780 2010    12
 ""     "test"
 
 -- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
+\N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
 
 -- !11 --
 \N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
@@ -556,7 +556,7 @@ true        8       8       8       80      8.8     80.8    
7298    12/31/10        8       2010-12-31T12:08:13.780 2010    12
 ""     "test"
 
 -- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
+\N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
 
 -- !11 --
 \N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
@@ -822,7 +822,7 @@ true        8       8       8       80      8.8     80.8    
7298    12/31/10        8       2010-12-31T12:08:13.780 2010    12
 ""     "test"
 
 -- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
+\N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
 
 -- !11 --
 \N     \N      \N      \N      \N      \N      \N      \N      \N              
test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out 
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index 38918c3fc6f..a527c7b687d 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -27,6 +27,18 @@ b    2.2
 1      abc
 2      def
 
+-- !8 --
+1      null
+2      |||
+3      aaa
+4      "null"
+
+-- !9 --
+1      null
+2      |||
+3      aaa
+4      "null"
+
 -- !1 --
 a      1.1
 b      2.2
@@ -55,3 +67,15 @@ b    2.2
 1      abc
 2      def
 
+-- !8 --
+1      null
+2      |||
+3      aaa
+4      "null"
+
+-- !9 --
+1      null
+2      |||
+3      aaa
+4      "null"
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 8aa97e63123..d0c191f7c67 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -23,6 +23,7 @@ suite("test_hive_serde_prop", 
"external_docker,hive,external_docker_hive,p0,exte
     }
 
     for (String hivePrefix : ["hive2", "hive3"]) {
+        setHivePrefix(hivePrefix)
         String catalog_name = "test_${hivePrefix}_serde_prop"
         String ex_db_name = "`stats_test`"
         String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
@@ -45,24 +46,11 @@ suite("test_hive_serde_prop", 
"external_docker,hive,external_docker_hive,p0,exte
         qt_5 """select * from ${catalog_name}.regression.serde_test4 order by 
id;"""
         qt_6 """select * from ${catalog_name}.regression.serde_test5 order by 
id;"""
         qt_7 """select * from ${catalog_name}.regression.serde_test6 order by 
id;"""
+        qt_8 """select * from ${catalog_name}.regression.serde_test7 order by 
id;"""
 
-        def success = true;
-        try {
-            sql """select * from ${catalog_name}.regression.serde_test7 order 
by id;"""
-        } catch(Exception e) {
-            assertTrue(e.getMessage().contains("not support serde prop"), 
e.getMessage())
-            success = false;
-        }
-        assertEquals(success, false)
-
-        success = true;
-        try {
-            sql """select * from ${catalog_name}.regression.serde_test8 order 
by id;"""
-        } catch(Exception e) {
-            assertTrue(e.getMessage().contains("not support serde prop"), 
e.getMessage())
-            success = false;
-        }
-        assertEquals(success, false)
+        hive_docker """truncate table regression.serde_test8;"""
+        sql """insert into ${catalog_name}.regression.serde_test8 select * 
from ${catalog_name}.regression.serde_test7;"""
+        qt_9 """select * from ${catalog_name}.regression.serde_test8 order by 
id;"""
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to