This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new b9d447741e8 Revert "branch-2.1: [Fix](Serde) Support hive compatible output format #49036" (#49986) b9d447741e8 is described below commit b9d447741e8fe3b050673491f77042bfb1611d46 Author: Mingyu Chen (Rayner) <morning...@163.com> AuthorDate: Fri Apr 11 01:08:02 2025 -0700 Revert "branch-2.1: [Fix](Serde) Support hive compatible output format #49036" (#49986) Reverts apache/doris#49831 --- .../vec/data_types/serde/data_type_array_serde.cpp | 5 +- .../vec/data_types/serde/data_type_map_serde.cpp | 7 +- .../data_types/serde/data_type_number_serde.cpp | 9 +- be/src/vec/data_types/serde/data_type_serde.h | 20 ---- .../data_types/serde/data_type_struct_serde.cpp | 5 +- be/src/vec/sink/vmysql_result_writer.cpp | 16 --- .../org/apache/doris/nereids/NereidsPlanner.java | 1 - .../java/org/apache/doris/qe/SessionVariable.java | 10 +- gensrc/thrift/PaloInternalService.thrift | 3 +- .../datatype_p0/serde/test_serde_dialect_hive.out | Bin 2029 -> 0 bytes .../serde/test_serde_dialect_hive.groovy | 107 --------------------- 11 files changed, 8 insertions(+), 175 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index e5fc7461e45..872dd84d8c7 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -336,8 +336,7 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const IColumn& column, const auto end_arr_element = offsets[row_idx_of_col_arr]; for (int j = begin_arr_element; j < end_arr_element; ++j) { if (j != begin_arr_element) { - if (0 != result.push_string(options.mysql_collection_delim.c_str(), - options.mysql_collection_delim.size())) { + if (0 != result.push_string(", ", 2)) { return Status::InternalError("pack mysql buffer failed."); } } @@ -346,7 +345,6 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const IColumn& column, return Status::InternalError("pack mysql buffer failed."); } } else { - ++options.level; if (is_nested_string && options.wrapper_len > 0) { if (0 != result.push_string(options.nested_string_wrapper, options.wrapper_len)) { return Status::InternalError("pack mysql buffer failed."); @@ -360,7 +358,6 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const IColumn& column, RETURN_IF_ERROR( nested_serde->write_column_to_mysql(data, result, j, false, options)); } - --options.level; } } if (0 != result.push_string("]", 1)) { diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index bf018ce3a80..2140885942d 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -418,8 +418,7 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, auto& offsets = map_column.get_offsets(); for (auto j = offsets[col_index - 1]; j < offsets[col_index]; ++j) { if (j != offsets[col_index - 1]) { - if (0 != result.push_string(options.mysql_collection_delim.c_str(), - options.mysql_collection_delim.size())) { + if (0 != result.push_string(", ", 2)) { return Status::InternalError("pack mysql buffer failed."); } } @@ -428,7 +427,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, return Status::InternalError("pack mysql buffer failed."); } } else { - ++options.level; if (is_key_string && options.wrapper_len > 0) { if (0 != result.push_string(options.nested_string_wrapper, options.wrapper_len)) { return Status::InternalError("pack mysql buffer failed."); @@ -442,7 +440,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, RETURN_IF_ERROR(key_serde->write_column_to_mysql(nested_keys_column, result, j, false, options)); } - --options.level; } if (0 != result.push_string(&options.map_key_delim, 1)) { return Status::InternalError("pack mysql buffer failed."); @@ -452,7 +449,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, return Status::InternalError("pack mysql buffer failed."); } } else { - ++options.level; if (is_val_string && options.wrapper_len > 0) { if (0 != result.push_string(options.nested_string_wrapper, options.wrapper_len)) { return Status::InternalError("pack mysql buffer failed."); @@ -466,7 +462,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, RETURN_IF_ERROR(value_serde->write_column_to_mysql(nested_values_column, result, j, false, options)); } - --options.level; } } if (0 != result.push_string("}", 1)) { diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index cd8b3d567e9..522cf02c75f 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -277,15 +277,8 @@ Status DataTypeNumberSerDe<T>::_write_column_to_mysql(const IColumn& column, int buf_ret = 0; auto& data = assert_cast<const ColumnType&>(column).get_data(); const auto col_index = index_check_const(row_idx, col_const); - if constexpr (std::is_same_v<T, Int8>) { + if constexpr (std::is_same_v<T, Int8> || std::is_same_v<T, UInt8>) { buf_ret = result.push_tinyint(data[col_index]); - } else if constexpr (std::is_same_v<T, UInt8>) { - if (options.level > 0 && !options.is_bool_value_num) { - std::string bool_value = data[col_index] ? "true" : "false"; - result.push_string(bool_value.c_str(), bool_value.size()); - } else { - buf_ret = result.push_tinyint(data[col_index]); - } } else if constexpr (std::is_same_v<T, Int16> || std::is_same_v<T, UInt16>) { buf_ret = result.push_smallint(data[col_index]); } else if constexpr (std::is_same_v<T, Int32> || std::is_same_v<T, UInt32>) { diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index b23a6a21501..7dedf30ac32 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -165,26 +165,6 @@ public: const char* nested_string_wrapper; int wrapper_len; - /** - * mysql_collection_delim is used to separate elements in collection, such as array, map, struct - * It is used to write to mysql. - */ - std::string mysql_collection_delim = ", "; - - /** - * is_bool_value_num is used to display bool value in collection, such as array, map, struct - * eg, if set to true, the array<true> will be: - * [1] - * if set to false, the array<true> will be: - * [true] - */ - bool is_bool_value_num = true; - - /** - * Indicate the nested level of column. It is used to control some behavior of serde - */ - mutable int level = 0; - [[nodiscard]] char get_collection_delimiter( int hive_text_complex_type_delimiter_level) const { CHECK(0 <= hive_text_complex_type_delimiter_level && diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index d95682e604c..d48f42e2227 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -348,8 +348,7 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const IColumn& column, bool begin = true; for (size_t j = 0; j < elem_serdes_ptrs.size(); ++j) { if (!begin) { - if (0 != result.push_string(options.mysql_collection_delim.c_str(), - options.mysql_collection_delim.size())) { + if (0 != result.push_string(", ", 2)) { return Status::InternalError("pack mysql buffer failed."); } } @@ -373,7 +372,6 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const IColumn& column, return Status::InternalError("pack mysql buffer failed."); } } else { - ++options.level; if (remove_nullable(col.get_column_ptr(j))->is_column_string() && options.wrapper_len > 0) { if (0 != result.push_string(options.nested_string_wrapper, options.wrapper_len)) { @@ -388,7 +386,6 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const IColumn& column, RETURN_IF_ERROR(elem_serdes_ptrs[j]->write_column_to_mysql( col.get_column(j), result, col_index, false, options)); } - --options.level; } begin = false; } diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index 8ad1c276025..0cdf1b34034 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -123,8 +123,6 @@ Status VMysqlResultWriter<is_binary_format>::_set_options( _options.map_key_delim = ':'; _options.null_format = "null"; _options.null_len = 4; - _options.mysql_collection_delim = ", "; - _options.is_bool_value_num = true; break; case TSerdeDialect::PRESTO: // eg: @@ -135,20 +133,6 @@ Status VMysqlResultWriter<is_binary_format>::_set_options( _options.map_key_delim = '='; _options.null_format = "NULL"; _options.null_len = 4; - _options.mysql_collection_delim = ", "; - _options.is_bool_value_num = true; - break; - case TSerdeDialect::HIVE: - // eg: - // array: ["abc","def","",null] - // map: {"k1":null,"k2":"v3"} - _options.nested_string_wrapper = "\""; - _options.wrapper_len = 1; - _options.map_key_delim = ':'; - _options.null_format = "null"; - _options.null_len = 4; - _options.mysql_collection_delim = ","; - _options.is_bool_value_num = false; break; default: return Status::InternalError("unknown serde dialect: {}", serde_dialect); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 57d88de7a4e..89a9d220be3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -677,7 +677,6 @@ public class NereidsPlanner extends Planner { statementContext.setFormatOptions(FormatOptions.getForPresto()); break; case "doris": - case "hive": statementContext.setFormatOptions(FormatOptions.getDefault()); break; default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 7d9b8416618..3f874077f51 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -4348,11 +4348,9 @@ public class SessionVariable implements Serializable, Writable { throw new UnsupportedOperationException("serdeDialect value is empty"); } - if (!serdeDialect.equalsIgnoreCase("doris") - && !serdeDialect.equalsIgnoreCase("presto") - && !serdeDialect.equalsIgnoreCase("trino") - && !serdeDialect.equalsIgnoreCase("hive")) { - LOG.warn("serde dialect value is invalid, the invalid value is {}", serdeDialect); + if (!serdeDialect.equalsIgnoreCase("doris") && !serdeDialect.equalsIgnoreCase("presto") + && !serdeDialect.equalsIgnoreCase("trino")) { + LOG.warn("serdeDialect value is invalid, the invalid value is {}", serdeDialect); throw new UnsupportedOperationException( "sqlDialect value is invalid, the invalid value is " + serdeDialect); } @@ -4514,8 +4512,6 @@ public class SessionVariable implements Serializable, Writable { case "presto": case "trino": return TSerdeDialect.PRESTO; - case "hive": - return TSerdeDialect.HIVE; default: throw new IllegalArgumentException("Unknown serde dialect: " + serdeDialect); } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index ac43d3a3dee..c612826836e 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -83,8 +83,7 @@ struct TResourceLimit { enum TSerdeDialect { DORIS = 0, - PRESTO = 1, - HIVE = 2 + PRESTO = 1 } // Query options that correspond to PaloService.PaloQueryOptions, diff --git a/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out b/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out deleted file mode 100644 index 3ea1043cdf6..00000000000 Binary files a/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out and /dev/null differ diff --git a/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy b/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy deleted file mode 100644 index b8e3037d770..00000000000 --- a/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy +++ /dev/null @@ -1,107 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_serde_dialect_hive", "p0") { - - sql """create database if not exists test_serde_dialect_hive;""" - sql """use test_serde_dialect_hive;""" - sql """drop table if exists test_serde_dialect_hive_tbl""" - sql """ - create table if not exists test_serde_dialect_hive_tbl ( - c1 tinyint, - c2 smallint, - c3 int, - c4 bigint, - c5 largeint, - c6 float, - c7 double, - c8 decimal(27, 9), - c9 date, - c10 datetime, - c11 datetime(6), - c12 ipv4, - c13 ipv6, - c14 string, - c15 char(6), - c16 varchar(1024), - c17 boolean, - c18 json, - c19 array<int>, - c20 array<double>, - c21 array<decimal(10, 5)>, - c22 array<string>, - c23 array<map<string, string>>, - c24 array<array<string>>, - c25 array<struct<s_id:int(11), s_name:string, s_address:string>>, - c26 array<struct<s_id:struct<k1:string, k2:decimal(10,2)>, s_name:array<ipv4>, s_address:map<string, ipv6>>>, - c27 map<string, string>, - c28 map<string, array<array<string>>>, - c29 map<int, map<string, array<array<string>>>>, - c30 map<decimal(5, 3), array<struct<s_id:struct<k1:string, k2:decimal(10,2)>, s_name:array<string>, s_address:map<string, string>>>>, - c31 struct<s_id:int(11), s_name:string, s_address:string>, - c32 struct<s_id:int(11), s_name:array<string>, s_address:string>, - c33 array<date>, - c34 array<datetime(3)>, - c35 array<boolean>, - c36 struct<s_id:int(11), s_name:string, s_gender:boolean>, - c37 map<string, boolean> - ) - distributed by random buckets 1 - properties("replication_num" = "1"); - """ - - sql """ - insert into test_serde_dialect_hive_tbl - (c1, c2,c3, c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c27,c28,c29,c31,c32,c33,c34,c35,c36,c37) - values( - 1,2,3,4,5,1.1,2.0000,123456.123456789,"2024-06-30", "2024-06-30 10:10:11", "2024-06-30 10:10:11.123456", - '59.50.185.152', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', - 'this is a string with , and "', - 'abc ef', - ' 123ndedwdw', - true, - '[1, 2, 3, 4, 5]', - [1,2,3,null,5], - [1.1,2.1,3.1,null,5.00], - [1.1,2.1,3.00000,null,5.12345], - ['abc', 'de, f"', null, ''], - [{'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'}, {'k1': 'v1', 'k2': null, 'k3 , "abc':'', 'k4':'a , "a'}], - [['abc', 'de, f"', null, ''],[],null], - {'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'}, - {'k1': [['abc', 'de, f"', null, ''],[],null], 'k2': null}, - {10: {'k1': [['abc', 'de, f"', null, ''],[],null]}, 11: null}, - named_struct('s_id', 100, 's_name', 'abc , "', 's_address', null), - named_struct('s_id', null, 's_name', ['abc', 'de, f"', null, ''], 's_address', ''), - ['2024-06-01',null,'2024-06-03'], - ['2024-06-01 10:10:10',null,'2024-06-03 01:11:23.123'], - [true, true, false, false, true, false, false], - named_struct('s_id', 100, 's_name', 'abc , "', 's_gender', true), - {'k1': false, 'k2': true, 'k3':true, 'k4': false} - ); - """ - - sql """set serde_dialect="doris";""" - qt_sql01 """select * from test_serde_dialect_hive_tbl""" - sql """set serde_dialect="hive";""" - qt_sql01 """select * from test_serde_dialect_hive_tbl""" - - test { - sql """set serde_dialect="invalid"""" - exception "sqlDialect value is invalid" - } -} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org