This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 4888c632f4c [cherry-pick](branch2.1) support escape.delim and serialization.null.format for hive text (#41684) 4888c632f4c is described below commit 4888c632f4c7190f33f9675055fcda7a0eeb0c56 Author: Socrates <suxiaogang...@icloud.com> AuthorDate: Tue Oct 15 00:08:23 2024 +0800 [cherry-pick](branch2.1) support escape.delim and serialization.null.format for hive text (#41684) ## Proposed changes pick from master: https://github.com/apache/doris/pull/40291 --- .../vec/data_types/serde/data_type_array_serde.cpp | 3 ++ .../vec/data_types/serde/data_type_map_serde.cpp | 10 ++++-- .../data_types/serde/data_type_nullable_serde.cpp | 4 +-- be/src/vec/data_types/serde/data_type_serde.h | 17 +++++---- .../vec/data_types/serde/data_type_string_serde.h | 42 ++++++++++++++++++++++ .../data_types/serde/data_type_struct_serde.cpp | 3 ++ be/src/vec/exec/format/csv/csv_reader.cpp | 34 ++++++++++++++++-- be/src/vec/exec/format/csv/csv_reader.h | 16 +++++++++ be/src/vec/runtime/vcsv_transformer.cpp | 15 ++++++-- .../scripts/create_preinstalled_scripts/run42.hql | 1 - .../regression/serde_prop/some_serde_table.hql | 20 +++-------- .../doris/datasource/hive/source/HiveScanNode.java | 25 +++++++------ .../org/apache/doris/planner/HiveTableSink.java | 13 +++++-- gensrc/thrift/PlanNodes.thrift | 1 + .../hive/test_hive_basic_type.out | 8 ++--- .../hive/test_hive_serde_prop.out | 24 +++++++++++++ .../hive/test_hive_serde_prop.groovy | 22 +++--------- 17 files changed, 191 insertions(+), 67 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 0c606f4eeb2..872dd84d8c7 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -169,6 +169,9 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text( for (int idx = 0, start = 0; idx <= slice.size; idx++) { char c = (idx == slice.size) ? collection_delimiter : slice[idx]; if (c == collection_delimiter) { + if (options.escape_char != 0 && idx > 0 && slice[idx - 1] == options.escape_char) { + continue; + } slices.emplace_back(slice.data + start, idx - start); start = idx + 1; } diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index a1a65e7ea5b..2140885942d 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -97,13 +97,17 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text( * * So i use 'kv <= from' in order to get _map_kv_delimiter that appears first. * */ - if (i < slice.size && slice[i] == map_kv_delimiter && kv <= from) { + if (i < slice.size && slice[i] == map_kv_delimiter && kv <= from && + (options.escape_char == 0 || i == 0 || slice[i - 1] != options.escape_char)) { kv = i; continue; } if ((i == slice.size || slice[i] == collection_delimiter) && i >= kv + 1) { - key_slices.push_back({slice.data + from, kv - from}); - value_slices.push_back({slice.data + kv + 1, i - 1 - kv}); + if (options.escape_char != 0 && i > 0 && slice[i - 1] == options.escape_char) { + continue; + } + key_slices.emplace_back(slice.data + from, kv - from); + value_slices.emplace_back(slice.data + kv + 1, i - 1 - kv); from = i + 1; kv = from; } diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 1b26d775d2d..1af85bd040d 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -88,7 +88,7 @@ Status DataTypeNullableSerDe::serialize_one_cell_to_hive_text( const auto& col_null = assert_cast<const ColumnNullable&>(*ptr); if (col_null.is_null_at(row_num)) { - bw.write(NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2); + bw.write(options.null_format, options.null_len); } else { RETURN_IF_ERROR(nested_serde->serialize_one_cell_to_hive_text( col_null.get_nested_column(), row_num, bw, options, @@ -101,7 +101,7 @@ Status DataTypeNullableSerDe::deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, int hive_text_complex_type_delimiter_level) const { auto& null_column = assert_cast<ColumnNullable&>(column); - if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') { + if (slice.compare(Slice(options.null_format, options.null_len)) == 0) { null_column.insert_data(nullptr, 0); return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index f09cf5a8a49..fd773718000 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -137,6 +137,10 @@ public: bool converted_from_string = false; char escape_char = 0; + /** + * flags for each byte to indicate if escape is needed. + */ + bool need_escape[256] = {false}; /** * only used for export data @@ -148,8 +152,8 @@ public: * NULL * null */ - const char* null_format; - int null_len; + const char* null_format = "\\N"; + int null_len = 2; /** * The wrapper char for string type in nested type. @@ -166,7 +170,7 @@ public: CHECK(0 <= hive_text_complex_type_delimiter_level && hive_text_complex_type_delimiter_level <= 153); - char ans = '\002'; + char ans; //https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java#L250 //use only control chars that are very unlikely to be part of the string // the following might/likely to be used in text files for strings @@ -175,8 +179,9 @@ public: // 12 (form feed, FF, \f, ^L), // 13 (carriage return, CR, \r, ^M), // 27 (escape, ESC, \e [GCC only], ^[). - - if (hive_text_complex_type_delimiter_level == 1) { + if (hive_text_complex_type_delimiter_level == 0) { + ans = field_delim[0]; + } else if (hive_text_complex_type_delimiter_level == 1) { ans = collection_delim; } else if (hive_text_complex_type_delimiter_level == 2) { ans = map_key_delim; @@ -192,7 +197,7 @@ public: } else if (hive_text_complex_type_delimiter_level <= 25) { // [22, 25] -> [28, 31] ans = hive_text_complex_type_delimiter_level + 6; - } else if (hive_text_complex_type_delimiter_level <= 153) { + } else { // [26, 153] -> [-128, -1] ans = hive_text_complex_type_delimiter_level + (-26 - 128); } diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index d3161c88706..fe09ff615f4 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -96,6 +96,27 @@ public: return Status::OK(); } + Status serialize_one_cell_to_hive_text( + const IColumn& column, int row_num, BufferWritable& bw, FormatOptions& options, + int hive_text_complex_type_delimiter_level = 1) const override { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + const auto& value = assert_cast<const ColumnType&>(*ptr).get_data_at(row_num); + if constexpr (std::is_same_v<ColumnType, ColumnString>) { + if (options.escape_char != 0) { + StringRef str_ref = value; + write_with_escaped_char_to_hive_text(str_ref, bw, options.escape_char, + options.need_escape); + } else { + bw.write(value.data, value.size); + } + } else { + bw.write(value.data, value.size); + } + return Status::OK(); + } + inline void write_with_escaped_char_to_json(StringRef value, BufferWritable& bw) const { for (char it : value) { switch (it) { @@ -126,6 +147,17 @@ public: } } + inline void write_with_escaped_char_to_hive_text(StringRef value, BufferWritable& bw, + char escape_char, + const bool need_escape[]) const { + for (char it : value) { + if (need_escape[it & 0xff]) { + bw.write(escape_char); + } + bw.write(it); + } + } + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, BufferWritable& bw, FormatOptions& options) const override { SERIALIZE_COLUMN_TO_JSON(); @@ -154,6 +186,16 @@ public: return Status::OK(); } + Status deserialize_one_cell_from_hive_text( + IColumn& column, Slice& slice, const FormatOptions& options, + int hive_text_complex_type_delimiter_level = 1) const override { + if (options.escape_char != 0) { + escape_string(slice.data, slice.size, options.escape_char); + } + assert_cast<ColumnType&>(column).insert_data(slice.data, slice.size); + return Status::OK(); + } + Status deserialize_column_from_json_vector(IColumn& column, std::vector<Slice>& slices, int* num_deserialized, const FormatOptions& options) const override { diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index c28a3acefde..d48f42e2227 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -249,6 +249,9 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_hive_text( char* data = slice.data; for (size_t i = 0, from = 0; i <= slice.size; i++) { if (i == slice.size || data[i] == struct_delimiter) { + if (options.escape_char != 0 && i > 0 && data[i - 1] == options.escape_char) { + continue; + } slices.push_back({data + from, i - from}); from = i + 1; } diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index 02841f8c3f0..d6a9b0c46b3 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -174,6 +174,23 @@ void PlainCsvTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>* } } +void HiveCsvTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>* splitted_values) { + const char* data = line.data; + const size_t size = line.size; + size_t value_start = 0; + for (size_t i = 0; i < size; ++i) { + if (data[i] == _value_sep[0]) { + // hive will escape the field separator in string + if (_escape_char != 0 && i > 0 && data[i - 1] == _escape_char) { + continue; + } + process_value_func(data, value_start, i - value_start, _trimming_char, splitted_values); + value_start = i + _value_sep_len; + } + } + process_value_func(data, value_start, size - value_start, _trimming_char, splitted_values); +} + CsvReader::CsvReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter, const TFileScanRangeParams& params, const TFileRangeDesc& range, const std::vector<SlotDescriptor*>& file_slot_descs, io::IOContext* io_ctx) @@ -354,6 +371,12 @@ Status CsvReader::init_reader(bool is_load) { } else { _options.map_key_delim = _params.file_attributes.text_params.mapkv_delimiter[0]; } + + if (_params.file_attributes.text_params.__isset.null_format) { + _options.null_format = _params.file_attributes.text_params.null_format.data(); + _options.null_len = _params.file_attributes.text_params.null_format.length(); + } + _use_nullable_string_opt.resize(_file_slot_descs.size()); for (int i = 0; i < _file_slot_descs.size(); ++i) { auto data_type_ptr = _file_slot_descs[i]->get_data_type_ptr(); @@ -378,9 +401,14 @@ Status CsvReader::init_reader(bool is_load) { if (_enclose == 0) { text_line_reader_ctx = std::make_shared<PlainTextLineReaderCtx>( _line_delimiter, _line_delimiter_length, _keep_cr); - - _fields_splitter = std::make_unique<PlainCsvTextFieldSplitter>( - _trim_tailing_spaces, false, _value_separator, _value_separator_length, -1); + if (_text_serde_type == TTextSerdeType::HIVE_TEXT_SERDE) { + _fields_splitter = std::make_unique<HiveCsvTextFieldSplitter>( + _trim_tailing_spaces, false, _value_separator, _value_separator_length, -1, + _escape); + } else { + _fields_splitter = std::make_unique<PlainCsvTextFieldSplitter>( + _trim_tailing_spaces, false, _value_separator, _value_separator_length, -1); + } } else { text_line_reader_ctx = std::make_shared<EncloseCsvLineReaderContext>( _line_delimiter, _line_delimiter_length, _value_separator, _value_separator_length, diff --git a/be/src/vec/exec/format/csv/csv_reader.h b/be/src/vec/exec/format/csv/csv_reader.h index 3b600190459..6edabc52ad3 100644 --- a/be/src/vec/exec/format/csv/csv_reader.h +++ b/be/src/vec/exec/format/csv/csv_reader.h @@ -171,6 +171,22 @@ private: std::string _value_sep; }; +class HiveCsvTextFieldSplitter : public BaseCsvTextFieldSplitter<HiveCsvTextFieldSplitter> { +public: + explicit HiveCsvTextFieldSplitter(bool trim_tailing_space, bool trim_ends, + const string& value_sep, size_t value_sep_len = 1, + char trimming_char = 0, char escape_char = 0) + : BaseCsvTextFieldSplitter(trim_tailing_space, trim_ends, value_sep_len, trimming_char), + _value_sep(value_sep), + _escape_char(escape_char) {} + + void do_split(const Slice& line, std::vector<Slice>* splitted_values); + +private: + std::string _value_sep; + char _escape_char; +}; + class CsvReader : public GenericReader { ENABLE_FACTORY_CREATOR(CsvReader); diff --git a/be/src/vec/runtime/vcsv_transformer.cpp b/be/src/vec/runtime/vcsv_transformer.cpp index 4bfd342fe76..51ac2eb2729 100644 --- a/be/src/vec/runtime/vcsv_transformer.cpp +++ b/be/src/vec/runtime/vcsv_transformer.cpp @@ -63,10 +63,21 @@ VCSVTransformer::VCSVTransformer(RuntimeState* state, doris::io::FileWriter* fil } if (_is_text_format) { + _options.field_delim = hive_serde_properties->field_delim; _options.collection_delim = hive_serde_properties->collection_delim[0]; _options.map_key_delim = hive_serde_properties->mapkv_delim[0]; - _options.escape_char = hive_serde_properties->escape_char[0]; - _options.null_format = hive_serde_properties->null_format.c_str(); + if (hive_serde_properties->__isset.escape_char) { + _options.escape_char = hive_serde_properties->escape_char[0]; + } + _options.null_format = hive_serde_properties->null_format.data(); + _options.null_len = hive_serde_properties->null_format.length(); + // The list of separators + escapeChar are the bytes required to be escaped. + if (_options.escape_char != 0) { + _options.need_escape[_options.escape_char & 0xff] = true; + } + for (int i = 0; i <= 153; i++) { + _options.need_escape[_options.get_collection_delimiter(i) & 0xff] = true; + } } } diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql index dc469fad77b..36b4776dc8f 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql @@ -32,7 +32,6 @@ CREATE TABLE IF NOT EXISTS `text_all_types`( `t_decimal_precision_38` decimal(38,16), `t_binary` binary ) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE LOCATION '/user/doris/preinstalled_data/text/text_all_types'; diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql index b5d963a1c2b..4de85bc19f0 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql @@ -88,19 +88,8 @@ CREATE TABLE `serde_test7`( ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( - 'escape.delim' = '|' -) -STORED AS INPUTFORMAT - 'org.apache.hadoop.mapred.TextInputFormat' -OUTPUTFORMAT - 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; - -CREATE TABLE `serde_test8`( - `id` int, - `name` string) -ROW FORMAT SERDE - 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -WITH SERDEPROPERTIES ( + 'field.delim' = 'a', + 'escape.delim' = '|', 'serialization.null.format' = 'null' ) STORED AS INPUTFORMAT @@ -108,11 +97,12 @@ STORED AS INPUTFORMAT OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; +CREATE TABLE `serde_test8` like `serde_test7`; + insert into serde_test1 values(1, "abc"),(2, "def"); insert into serde_test2 values(1, "abc"),(2, "def"); insert into serde_test3 values(1, "abc"),(2, "def"); insert into serde_test4 values(1, "abc"),(2, "def"); insert into serde_test5 values(1, "abc"),(2, "def"); insert into serde_test6 values(1, "abc"),(2, "def"); -insert into serde_test7 values(1, "abc"),(2, "def"); -insert into serde_test8 values(1, "abc"),(2, "def"); +insert into serde_test7 values(1, null),(2, "|||"),(3, "aaa"),(4, "\"null\""); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index 634c596c69f..0dcf4724a7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -464,21 +464,24 @@ public class HiveScanNode extends FileQueryScanNode { if (serdeParams.containsKey(PROP_QUOTE_CHAR)) { textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]); } - - // TODO: support escape char and null format in csv_reader - Optional<String> escapeChar = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + // 6. set escape delimiter + Optional<String> escapeDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_ESCAPE_DELIMITER); - if (escapeChar.isPresent() && !escapeChar.get().equals(DEFAULT_ESCAPE_DELIMIER)) { - throw new UserException( - "not support serde prop " + PROP_ESCAPE_DELIMITER + " in hive text reading"); + if (escapeDelim.isPresent()) { + String escape = HiveMetaStoreClientHelper.getByte( + escapeDelim.get()); + if (escape != null) { + textParams + .setEscape(escape.getBytes()[0]); + } else { + textParams.setEscape(DEFAULT_ESCAPE_DELIMIER.getBytes()[0]); + } } - + // 7. set null format Optional<String> nullFormat = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_NULL_FORMAT); - if (nullFormat.isPresent() && !nullFormat.get().equals(DEFAULT_NULL_FORMAT)) { - throw new UserException( - "not support serde prop " + PROP_NULL_FORMAT + " in hive text reading"); - } + textParams.setNullFormat(HiveMetaStoreClientHelper.firstPresentOrDefault( + DEFAULT_NULL_FORMAT, nullFormat)); TFileAttributes fileAttributes = new TFileAttributes(); fileAttributes.setTextParams(textParams); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java index cb60b142404..330e0ed4a06 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java @@ -244,9 +244,16 @@ public class HiveTableSink extends BaseExternalTableDataSink { // 5. set escape delimiter Optional<String> escapeDelim = HiveMetaStoreClientHelper.getSerdeProperty(targetTable.getRemoteTable(), PROP_ESCAPE_DELIMITER); - serDeProperties - .setEscapeChar(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_ESCAPE_DELIMIER, escapeDelim))); + if (escapeDelim.isPresent()) { + String escape = HiveMetaStoreClientHelper.getByte( + escapeDelim.get()); + if (escape != null) { + serDeProperties + .setEscapeChar(escape); + } else { + serDeProperties.setEscapeChar(DEFAULT_ESCAPE_DELIMIER); + } + } // 6. set null format Optional<String> nullFormat = HiveMetaStoreClientHelper.getSerdeProperty(targetTable.getRemoteTable(), PROP_NULL_FORMAT); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 2c5423fccb5..daf2e28a991 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -258,6 +258,7 @@ struct TFileTextScanRangeParams { 4: optional string mapkv_delimiter; 5: optional i8 enclose; 6: optional i8 escape; + 7: optional string null_format; } struct TFileScanSlotInfo { diff --git a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out index 388b95944e9..cc48c4a5601 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out +++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out @@ -24,7 +24,7 @@ true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 "" "test" -- !10 -- -\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] +\N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] -- !11 -- \N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] @@ -290,7 +290,7 @@ true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 "" "test" -- !10 -- -\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] +\N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] -- !11 -- \N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] @@ -556,7 +556,7 @@ true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 "" "test" -- !10 -- -\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] +\N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] -- !11 -- \N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] @@ -822,7 +822,7 @@ true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 "" "test" -- !10 -- -\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] +\N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] -- !11 -- \N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] diff --git a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out index 38918c3fc6f..a527c7b687d 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out +++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out @@ -27,6 +27,18 @@ b 2.2 1 abc 2 def +-- !8 -- +1 null +2 ||| +3 aaa +4 "null" + +-- !9 -- +1 null +2 ||| +3 aaa +4 "null" + -- !1 -- a 1.1 b 2.2 @@ -55,3 +67,15 @@ b 2.2 1 abc 2 def +-- !8 -- +1 null +2 ||| +3 aaa +4 "null" + +-- !9 -- +1 null +2 ||| +3 aaa +4 "null" + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy index 8aa97e63123..d0c191f7c67 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy @@ -23,6 +23,7 @@ suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,exte } for (String hivePrefix : ["hive2", "hive3"]) { + setHivePrefix(hivePrefix) String catalog_name = "test_${hivePrefix}_serde_prop" String ex_db_name = "`stats_test`" String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") @@ -45,24 +46,11 @@ suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,exte qt_5 """select * from ${catalog_name}.regression.serde_test4 order by id;""" qt_6 """select * from ${catalog_name}.regression.serde_test5 order by id;""" qt_7 """select * from ${catalog_name}.regression.serde_test6 order by id;""" + qt_8 """select * from ${catalog_name}.regression.serde_test7 order by id;""" - def success = true; - try { - sql """select * from ${catalog_name}.regression.serde_test7 order by id;""" - } catch(Exception e) { - assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage()) - success = false; - } - assertEquals(success, false) - - success = true; - try { - sql """select * from ${catalog_name}.regression.serde_test8 order by id;""" - } catch(Exception e) { - assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage()) - success = false; - } - assertEquals(success, false) + hive_docker """truncate table regression.serde_test8;""" + sql """insert into ${catalog_name}.regression.serde_test8 select * from ${catalog_name}.regression.serde_test7;""" + qt_9 """select * from ${catalog_name}.regression.serde_test8 order by id;""" } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org