This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 79662fcc943 [branch-2.1](functions) clean some ip functions code and make IS_IP_ADDRESS_IN_RANGE DEPENDS_ON_ARGUMENT (#45358) 79662fcc943 is described below commit 79662fcc943ee1523bac342eca72edf86983d5da Author: zclllyybb <zhaochan...@selectdb.com> AuthorDate: Tue Dec 17 11:51:07 2024 +0800 [branch-2.1](functions) clean some ip functions code and make IS_IP_ADDRESS_IN_RANGE DEPENDS_ON_ARGUMENT (#45358) pick https://github.com/apache/doris/pull/35239 add special logic to deal smooth upgrade The origin PR is https://github.com/apache/doris/pull/35239. for branch-3.0 it was merged in 3.0.0 but forgot to register old version. now in branch-3.0 we fix it in https://github.com/apache/doris/pull/45428 which must be merged in 3.0.4. and do same thing in this PR which must be merged in 2.1.8. then: ``` FROM TO result 217- 218+ ✅ 217- 303- 💥 218+ 303- ✅ 218+ 304+ ✅ 303- 304+ ✅ ``` this is our best result. --- be/src/agent/be_exec_version_manager.h | 1 + be/src/runtime/runtime_state.h | 5 + be/src/vec/columns/column.h | 4 +- be/src/vec/exprs/vectorized_fn_call.cpp | 4 +- be/src/vec/functions/function.h | 1 + be/src/vec/functions/function_ip.cpp | 4 + be/src/vec/functions/function_ip.h | 223 +++++++-------------- be/src/vec/functions/simple_function_factory.h | 5 + be/test/vec/function/function_ip_test.cpp | 10 +- .../functions/scalar/Ipv6NumToString.java | 3 +- .../functions/scalar/IsIpAddressInRange.java | 4 +- .../java/org/apache/doris/qe/SessionVariable.java | 9 + gensrc/script/doris_builtins_functions.py | 13 +- gensrc/thrift/PaloInternalService.thrift | 4 + .../test_is_ip_address_in_range_function.out | 13 +- .../test_is_ip_address_in_range_function.groovy | 19 +- 16 files changed, 141 insertions(+), 181 deletions(-) diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index 07593601793..709e101178a 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -70,6 +70,7 @@ private: * f. shrink some function's nullable mode. * g. do local merge of remote runtime filter * h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS + * i. change FunctionIsIPAddressInRange from AlwaysNotNullable to DependOnArguments. controlled by individual session variable. */ constexpr inline int BeExecVersionManager::max_be_exec_version = 6; constexpr inline int BeExecVersionManager::min_be_exec_version = 0; diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index c9813108267..96633f7215e 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -174,6 +174,11 @@ public: return _query_options.__isset.enable_decimal256 && _query_options.enable_decimal256; } + bool new_is_ip_address_in_range() const { + return _query_options.__isset.new_is_ip_address_in_range && + _query_options.new_is_ip_address_in_range; + } + bool enable_common_expr_pushdown() const { return _query_options.__isset.enable_common_expr_pushdown && _query_options.enable_common_expr_pushdown; diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index b6c46ba69e6..ce976a40c86 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -22,10 +22,9 @@ #include <fmt/format.h> #include <glog/logging.h> -#include <stdint.h> #include <sys/types.h> -#include <algorithm> +#include <cstdint> #include <functional> #include <ostream> #include <string> @@ -761,6 +760,7 @@ struct IsMutableColumns<> { static const bool value = true; }; +// prefer assert_cast than check_and_get template <typename Type> const Type* check_and_get_column(const IColumn& column) { return typeid_cast<const Type*>(&column); diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index a08088a9efe..097be4abb72 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -99,7 +99,9 @@ Status VectorizedFnCall::prepare(RuntimeState* state, const RowDescriptor& desc, // get the function. won't prepare function. _function = SimpleFunctionFactory::instance().get_function( _fn.name.function_name, argument_template, _data_type, - {.enable_decimal256 = state->enable_decimal256()}, state->be_exec_version()); + {.enable_decimal256 = state->enable_decimal256(), + .new_is_ip_address_in_range = state->new_is_ip_address_in_range()}, + state->be_exec_version()); } if (_function == nullptr) { return Status::InternalError("Could not find function {}, arg {} return {} ", diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index f618cc9a1b0..99775d5ab9e 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -49,6 +49,7 @@ namespace doris::vectorized { struct FunctionAttr { bool enable_decimal256 {false}; + bool new_is_ip_address_in_range {false}; }; #define RETURN_REAL_TYPE_FOR_DATEV2_FUNCTION(TYPE) \ diff --git a/be/src/vec/functions/function_ip.cpp b/be/src/vec/functions/function_ip.cpp index 3e8418acb30..30b31901624 100644 --- a/be/src/vec/functions/function_ip.cpp +++ b/be/src/vec/functions/function_ip.cpp @@ -17,6 +17,8 @@ #include "vec/functions/function_ip.h" +#include "vec/functions/simple_function_factory.h" + namespace doris::vectorized { void register_function_ip(SimpleFunctionFactory& factory) { @@ -45,6 +47,8 @@ void register_function_ip(SimpleFunctionFactory& factory) { factory.register_function<FunctionIsIPString<IPv6>>(); factory.register_function<FunctionIsIPAddressInRange>(); + factory.register_function<FunctionIsIPAddressInRangeOld>(); + /// CIDR part factory.register_function<FunctionIPv4CIDRToRange>(); factory.register_function<FunctionIPv6CIDRToRange>(); diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 1365d5ff665..86b0c85745a 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -23,7 +23,6 @@ #include <cstddef> #include <memory> -#include <vector> #include "vec/columns/column.h" #include "vec/columns/column_const.h" @@ -37,7 +36,6 @@ #include "vec/common/ipv6_to_binary.h" #include "vec/common/unaligned.h" #include "vec/core/column_with_type_and_name.h" -#include "vec/core/columns_with_type_and_name.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_ipv4.h" @@ -48,7 +46,6 @@ #include "vec/data_types/data_type_struct.h" #include "vec/functions/function.h" #include "vec/functions/function_helpers.h" -#include "vec/functions/simple_function_factory.h" #include "vec/runtime/ip_address_cidr.h" namespace doris::vectorized { @@ -60,7 +57,7 @@ private: using ColumnType = ColumnVector<ArgType>; const ColumnPtr& column = argument.column; - if (const ColumnType* col = typeid_cast<const ColumnType*>(column.get())) { + if (const auto* col = typeid_cast<const ColumnType*>(column.get())) { const typename ColumnType::Container& vec_in = col->get_data(); auto col_res = ColumnString::create(); @@ -90,9 +87,10 @@ private: block.replace_by_position( result, ColumnNullable::create(std::move(col_res), std::move(null_map))); return Status::OK(); - } else + } else { return Status::RuntimeError("Illegal column {} of argument of function {}", argument.column->get_name(), get_name()); + } } public: @@ -114,17 +112,21 @@ public: switch (argument.type->get_type_id()) { case TypeIndex::Int8: return execute_type<Int8>(block, argument, result); + break; case TypeIndex::Int16: return execute_type<Int16>(block, argument, result); + break; case TypeIndex::Int32: return execute_type<Int32>(block, argument, result); + break; case TypeIndex::Int64: return execute_type<Int64>(block, argument, result); + break; default: break; } - return Status::RuntimeError( + return Status::InternalError( "Illegal column {} of argument of function {}, expected Int8 or Int16 or Int32 or " "Int64", argument.name, get_name()); @@ -140,13 +142,7 @@ static inline bool try_parse_ipv4(const char* pos, Int64& result_value) { template <IPConvertExceptionMode exception_mode, typename ToColumn> ColumnPtr convert_to_ipv4(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { - const ColumnString* column_string = check_and_get_column<ColumnString>(column.get()); - - if (!column_string) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal column {} of argument of function {}, expected String", - column->get_name()); - } + const auto* column_string = assert_cast<const ColumnString*>(column.get()); size_t column_size = column_string->size(); @@ -226,11 +222,6 @@ public: size_t get_number_of_arguments() const override { return 1; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - if (!is_string(remove_nullable(arguments[0]))) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of argument of function {}", arguments[0]->get_name(), - get_name()); - } auto result_type = std::make_shared<DataTypeInt64>(); if constexpr (exception_mode == IPConvertExceptionMode::Null) { @@ -273,7 +264,7 @@ void process_ipv6_column(const ColumnPtr& column, size_t input_rows_count, auto* begin = reinterpret_cast<char*>(vec_res.data()); auto* pos = begin; - const auto* col = check_and_get_column<T>(column.get()); + const auto* col = assert_cast<const T*>(column.get()); for (size_t i = 0; i < input_rows_count; ++i) { bool is_empty = false; @@ -282,7 +273,7 @@ void process_ipv6_column(const ColumnPtr& column, size_t input_rows_count, const auto& vec_in = col->get_data(); memcpy(ipv6_address_data, reinterpret_cast<const unsigned char*>(&vec_in[i]), IPV6_BINARY_LENGTH); - } else { + } else { // ColumnString const auto str_ref = col->get_data_at(i); const char* value = str_ref.data; size_t value_size = str_ref.size; @@ -324,26 +315,12 @@ public: size_t get_number_of_arguments() const override { return 1; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - const auto* arg_string = check_and_get_data_type<DataTypeString>(arguments[0].get()); - const auto* arg_ipv6 = check_and_get_data_type<DataTypeIPv6>(arguments[0].get()); - if (!arg_ipv6 && !(arg_string)) - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of argument of function {}, expected IPv6 or String", - arguments[0]->get_name(), get_name()); - return make_nullable(std::make_shared<DataTypeString>()); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; - const auto* col_ipv6 = check_and_get_column<ColumnIPv6>(column.get()); - const auto* col_string = check_and_get_column<ColumnString>(column.get()); - - if (!col_ipv6 && !col_string) - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal column {} of argument of function {}, expected IPv6 or String", - column->get_name(), get_name()); auto col_res = ColumnString::create(); ColumnString::Chars& vec_res = col_res->get_chars(); @@ -355,10 +332,10 @@ public: unsigned char ipv6_address_data[IPV6_BINARY_LENGTH]; - if (col_ipv6) { + if (check_and_get_column<ColumnIPv6>(column.get())) { process_ipv6_column<ColumnIPv6>(column, input_rows_count, vec_res, offsets_res, null_map, ipv6_address_data); - } else { + } else { //ColumnString process_ipv6_column<ColumnString>(column, input_rows_count, vec_res, offsets_res, null_map, ipv6_address_data); } @@ -374,13 +351,6 @@ template <IPConvertExceptionMode exception_mode, typename ToColumn = ColumnIPv6, typename StringColumnType> ColumnPtr convert_to_ipv6(const StringColumnType& string_column, const PaddedPODArray<UInt8>* null_map = nullptr) { - if constexpr (!std::is_same_v<ToColumn, ColumnString> && - !std::is_same_v<ToColumn, ColumnIPv6>) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal return column type {}. Expected IPv6 or String", - TypeName<typename ToColumn::ValueType>::get()); - } - const size_t column_size = string_column.size(); ColumnUInt8::MutablePtr col_null_map_to; @@ -522,14 +492,9 @@ ColumnPtr convert_to_ipv6(const StringColumnType& string_column, template <IPConvertExceptionMode exception_mode, typename ToColumn = ColumnIPv6> ColumnPtr convert_to_ipv6(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { - if (const auto* column_input_string = check_and_get_column<ColumnString>(column.get())) { - auto result = - detail::convert_to_ipv6<exception_mode, ToColumn>(*column_input_string, null_map); - return result; - } else { - throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal column type {}. Expected String", - column->get_name()); - } + const auto* column_input_string = assert_cast<const ColumnString*>(column.get()); + auto result = detail::convert_to_ipv6<exception_mode, ToColumn>(*column_input_string, null_map); + return result; } template <IPConvertExceptionMode exception_mode> @@ -552,12 +517,6 @@ public: bool use_default_implementation_for_nulls() const override { return false; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - if (!is_string(remove_nullable(arguments[0]))) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of argument of function {}", arguments[0]->get_name(), - get_name()); - } - auto result_type = std::make_shared<DataTypeString>(); if constexpr (exception_mode == IPConvertExceptionMode::Null) { @@ -605,12 +564,6 @@ public: size_t get_number_of_arguments() const override { return 1; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - const auto& addr_type = arguments[0]; - if (!is_string(remove_nullable(addr_type))) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of first argument of function {}, expected String", - addr_type->get_name(), get_name()); - } return std::make_shared<DataTypeUInt8>(); } @@ -652,18 +605,52 @@ public: size_t get_number_of_arguments() const override { return 2; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - if (arguments.size() != 2) { - throw Exception( - ErrorCode::INVALID_ARGUMENT, - "Number of arguments for function {} doesn't match: passed {}, should be 2", - get_name(), arguments.size()); - } - const auto& addr_type = arguments[0]; - const auto& cidr_type = arguments[1]; - if (!is_string(remove_nullable(addr_type)) || !is_string(remove_nullable(cidr_type))) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "The arguments of function {} must be String", get_name()); + return std::make_shared<DataTypeUInt8>(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + const auto& addr_column_with_type_and_name = block.get_by_position(arguments[0]); + const auto& cidr_column_with_type_and_name = block.get_by_position(arguments[1]); + WhichDataType addr_type(addr_column_with_type_and_name.type); + WhichDataType cidr_type(cidr_column_with_type_and_name.type); + const auto& [addr_column, addr_const] = + unpack_if_const(addr_column_with_type_and_name.column); + const auto& [cidr_column, cidr_const] = + unpack_if_const(cidr_column_with_type_and_name.column); + const auto* str_addr_column = assert_cast<const ColumnString*>(addr_column.get()); + const auto* str_cidr_column = assert_cast<const ColumnString*>(cidr_column.get()); + + auto col_res = ColumnUInt8::create(input_rows_count, 0); + auto& col_res_data = col_res->get_data(); + + for (size_t i = 0; i < input_rows_count; ++i) { + auto addr_idx = index_check_const(i, addr_const); + auto cidr_idx = index_check_const(i, cidr_const); + + const auto addr = + IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view()); + const auto cidr = + parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view()); + col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0; } + + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } +}; + +// old version throw exception when meet null value +class FunctionIsIPAddressInRangeOld : public IFunction { +public: + static constexpr auto name = "__is_ip_address_in_range_OLD__"; + static FunctionPtr create() { return std::make_shared<FunctionIsIPAddressInRange>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 2; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return std::make_shared<DataTypeUInt8>(); } @@ -687,33 +674,21 @@ public: if (addr_type.is_nullable()) { const auto* addr_column_nullable = assert_cast<const ColumnNullable*>(addr_column.get()); - str_addr_column = - check_and_get_column<ColumnString>(addr_column_nullable->get_nested_column()); + str_addr_column = assert_cast<const ColumnString*>( + addr_column_nullable->get_nested_column_ptr().get()); null_map_addr = &addr_column_nullable->get_null_map_data(); } else { - str_addr_column = check_and_get_column<ColumnString>(addr_column.get()); + str_addr_column = assert_cast<const ColumnString*>(addr_column.get()); } if (cidr_type.is_nullable()) { const auto* cidr_column_nullable = assert_cast<const ColumnNullable*>(cidr_column.get()); - str_cidr_column = - check_and_get_column<ColumnString>(cidr_column_nullable->get_nested_column()); + str_cidr_column = assert_cast<const ColumnString*>( + cidr_column_nullable->get_nested_column_ptr().get()); null_map_cidr = &cidr_column_nullable->get_null_map_data(); } else { - str_cidr_column = check_and_get_column<ColumnString>(cidr_column.get()); - } - - if (!str_addr_column) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal column {} of argument of function {}, expected String", - addr_column->get_name(), get_name()); - } - - if (!str_cidr_column) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal column {} of argument of function {}, expected String", - cidr_column->get_name(), get_name()); + str_cidr_column = assert_cast<const ColumnString*>(cidr_column.get()); } auto col_res = ColumnUInt8::create(input_rows_count, 0); @@ -722,12 +697,12 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { auto addr_idx = index_check_const(i, addr_const); auto cidr_idx = index_check_const(i, cidr_const); - if (null_map_addr && (*null_map_addr)[addr_idx]) { + if (null_map_addr && (*null_map_addr)[addr_idx]) [[unlikely]] { throw Exception(ErrorCode::INVALID_ARGUMENT, "The arguments of function {} must be String, not NULL", get_name()); } - if (null_map_cidr && (*null_map_cidr)[cidr_idx]) { + if (null_map_cidr && (*null_map_cidr)[cidr_idx]) [[unlikely]] { throw Exception(ErrorCode::INVALID_ARGUMENT, "The arguments of function {} must be String, not NULL", get_name()); @@ -754,22 +729,7 @@ public: size_t get_number_of_arguments() const override { return 2; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - WhichDataType first_arg_type = arguments[0]; - if (!(first_arg_type.is_ipv4())) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of first argument of function {}, expected IPv4", - arguments[0]->get_name(), get_name()); - } - - WhichDataType second_arg_type = arguments[1]; - if (!(second_arg_type.is_int16())) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of second argument of function {}, expected Int16", - arguments[1]->get_name(), get_name()); - } - DataTypePtr element = std::make_shared<DataTypeIPv4>(); - return std::make_shared<DataTypeStruct>(DataTypes {element, element}, Strings {"min", "max"}); } @@ -782,9 +742,9 @@ public: const auto& [ip_column_ptr, ip_col_const] = unpack_if_const(ip_column.column); const auto& [cidr_column_ptr, cidr_col_const] = unpack_if_const(cidr_column.column); - const auto* col_ip_column = check_and_get_column<ColumnVector<IPv4>>(ip_column_ptr.get()); + const auto* col_ip_column = assert_cast<const ColumnVector<IPv4>*>(ip_column_ptr.get()); const auto* col_cidr_column = - check_and_get_column<ColumnVector<Int16>>(cidr_column_ptr.get()); + assert_cast<const ColumnVector<Int16>*>(cidr_column_ptr.get()); const typename ColumnVector<IPv4>::Container& vec_ip_input = col_ip_column->get_data(); const ColumnInt16::Container& vec_cidr_input = col_cidr_column->get_data(); @@ -866,19 +826,6 @@ public: size_t get_number_of_arguments() const override { return 2; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - const auto& ipv6_type = arguments[0]; - if (!is_string(remove_nullable(ipv6_type)) && !is_ipv6(remove_nullable(ipv6_type))) { - throw Exception( - ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of first argument of function {}, expected String or IPv6", - ipv6_type->get_name(), get_name()); - } - const auto& cidr_type = arguments[1]; - if (!is_int16(remove_nullable(cidr_type))) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of second argument of function {}, expected Int16", - cidr_type->get_name(), get_name()); - } DataTypePtr element = std::make_shared<DataTypeIPv6>(); return std::make_shared<DataTypeStruct>(DataTypes {element, element}, Strings {"min", "max"}); @@ -899,11 +846,11 @@ public: ColumnPtr col_res = nullptr; if (addr_type.is_ipv6()) { - const auto* ipv6_addr_column = check_and_get_column<ColumnIPv6>(addr_column.get()); + const auto* ipv6_addr_column = assert_cast<const ColumnIPv6*>(addr_column.get()); col_res = execute_impl<ColumnIPv6>(*ipv6_addr_column, *cidr_col, input_rows_count, add_col_const, col_const); } else if (addr_type.is_string()) { - const auto* str_addr_column = check_and_get_column<ColumnString>(addr_column.get()); + const auto* str_addr_column = assert_cast<const ColumnString*>(addr_column.get()); col_res = execute_impl<ColumnString>(*str_addr_column, *cidr_col, input_rows_count, add_col_const, col_const); } else { @@ -1021,12 +968,8 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; - const auto* col_in = check_and_get_column<ColumnString>(column.get()); + const auto* col_in = assert_cast<const ColumnString*>(column.get()); - if (!col_in) - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal column {} of argument of function {}, expected String", - column->get_name(), get_name()); size_t col_size = col_in->size(); auto col_res = ColumnUInt8::create(col_size, 0); auto& col_res_data = col_res->get_data(); @@ -1066,12 +1009,8 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; - const auto* col_in = check_and_get_column<ColumnString>(column.get()); + const auto* col_in = assert_cast<const ColumnString*>(column.get()); - if (!col_in) - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal column {} of argument of function {}, expected String", - column->get_name(), get_name()); size_t col_size = col_in->size(); auto col_res = ColumnUInt8::create(col_size, 0); auto& col_res_data = col_res->get_data(); @@ -1124,12 +1063,6 @@ public: size_t get_number_of_arguments() const override { return 1; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - if (!is_string(remove_nullable(arguments[0]))) { - throw Exception(ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of argument of function {}, expected String", - arguments[0]->get_name(), get_name()); - } - DataTypePtr result_type; if constexpr (std::is_same_v<Type, IPv4>) { @@ -1158,11 +1091,11 @@ public: if (addr_type.is_nullable()) { const auto* addr_column_nullable = assert_cast<const ColumnNullable*>(addr_column.get()); - str_addr_column = - check_and_get_column<ColumnString>(addr_column_nullable->get_nested_column()); + str_addr_column = assert_cast<const ColumnString*>( + addr_column_nullable->get_nested_column_ptr().get()); addr_null_map = &addr_column_nullable->get_null_map_data(); } else { - str_addr_column = check_and_get_column<ColumnString>(addr_column.get()); + str_addr_column = assert_cast<const ColumnString*>(addr_column.get()); } auto col_res = ColumnVector<Type>::create(input_rows_count, 0); diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index a859a4685e2..c86d29aaa66 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -162,6 +162,11 @@ public: int be_version = BeExecVersionManager::get_newest_version()) { std::string key_str = name; + // special function replacement + if (key_str == "is_ip_address_in_range" && !attr.new_is_ip_address_in_range) [[unlikely]] { + key_str = "__is_ip_address_in_range_OLD__"; + } + if (function_alias.contains(name)) { key_str = function_alias[name]; } diff --git a/be/test/vec/function/function_ip_test.cpp b/be/test/vec/function/function_ip_test.cpp index 772b1277798..192b3810a98 100644 --- a/be/test/vec/function/function_ip_test.cpp +++ b/be/test/vec/function/function_ip_test.cpp @@ -57,7 +57,7 @@ TEST(FunctionIpTest, FunctionIsIPAddressInRangeTest) { { // vector vs vector InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; - static_cast<void>(check_function<DataTypeUInt8, false>(func_name, input_types, data_set)); + static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, data_set)); } { @@ -65,8 +65,8 @@ TEST(FunctionIpTest, FunctionIsIPAddressInRangeTest) { InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}}; for (const auto& line : data_set) { DataSet const_cidr_dataset = {line}; - static_cast<void>(check_function<DataTypeUInt8, false>(func_name, input_types, - const_cidr_dataset)); + static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, + const_cidr_dataset)); } } @@ -75,8 +75,8 @@ TEST(FunctionIpTest, FunctionIsIPAddressInRangeTest) { InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String}; for (const auto& line : data_set) { DataSet const_addr_dataset = {line}; - static_cast<void>(check_function<DataTypeUInt8, false>(func_name, input_types, - const_addr_dataset)); + static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, + const_addr_dataset)); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Ipv6NumToString.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Ipv6NumToString.java index ef5ac558ade..056601f369b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Ipv6NumToString.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Ipv6NumToString.java @@ -41,8 +41,7 @@ public class Ipv6NumToString extends ScalarFunction public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT), FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE), - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(IPv6Type.INSTANCE), - FunctionSignature.ret(StringType.INSTANCE).args(IPv6Type.INSTANCE)); + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(IPv6Type.INSTANCE)); public Ipv6NumToString(Expression arg0) { super("ipv6_num_to_string", arg0); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java index d15af4235b7..85fe4dcab78 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java @@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BooleanType; @@ -36,7 +36,7 @@ import java.util.List; * scalar function `is_ip_address_in_range` */ public class IsIpAddressInRange extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index de9ba6486fa..411f678490c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -656,6 +656,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_COOLDOWN_REPLICA_AFFINITY = "enable_cooldown_replica_affinity"; + + public static final String NEW_IS_IP_ADDRESS_IN_RANGE = "new_is_ip_address_in_range"; /** * Inserting overwrite for auto partition table allows creating partition for * datas which cannot find partition to overwrite. @@ -2214,6 +2216,11 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_COOLDOWN_REPLICA_AFFINITY, needForward = true) public boolean enableCooldownReplicaAffinity = true; + // only to control some function behaviour. not visible or mutable. + @VariableMgr.VarAttr(name = NEW_IS_IP_ADDRESS_IN_RANGE, needForward = true, flag = VariableMgr.INVISIBLE + | VariableMgr.READ_ONLY) + public boolean newIsIpAddressInRange = true; + public void setEnableEsParallelScroll(boolean enableESParallelScroll) { this.enableESParallelScroll = enableESParallelScroll; } @@ -3772,6 +3779,8 @@ public class SessionVariable implements Serializable, Writable { tResult.setOrcMaxMergeDistanceBytes(orcMaxMergeDistanceBytes); tResult.setOrcOnceMaxReadBytes(orcOnceMaxReadBytes); + tResult.setNewIsIpAddressInRange(newIsIpAddressInRange); + return tResult; } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 74f7b42f7a8..fc4db14bdfe 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2103,7 +2103,6 @@ visible_functions = { [['ipv6_num_to_string','inet6_ntoa'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NULLABLE'], [['ipv6_num_to_string','inet6_ntoa'], 'STRING', ['STRING'], 'ALWAYS_NULLABLE'], [['ipv6_num_to_string','inet6_ntoa'], 'VARCHAR', ['IPV6'], 'ALWAYS_NULLABLE'], - [['ipv6_num_to_string','inet6_ntoa'], 'STRING', ['IPV6'], 'ALWAYS_NULLABLE'], [['ipv6_string_to_num'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], [['ipv6_string_to_num'], 'STRING', ['STRING'], 'ALWAYS_NOT_NULLABLE'], [['ipv6_string_to_num_or_default'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], @@ -2118,12 +2117,12 @@ visible_functions = { [['is_ipv4_string'], 'BOOLEAN', ['STRING'], ''], [['is_ipv6_string'], 'BOOLEAN', ['VARCHAR'], ''], [['is_ipv6_string'], 'BOOLEAN', ['STRING'], ''], - [['is_ip_address_in_range'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NOT_NULLABLE'], - [['is_ip_address_in_range'], 'BOOLEAN', ['STRING', 'STRING'], 'ALWAYS_NOT_NULLABLE'], - [['ipv4_cidr_to_range'], 'STRUCT<IPV4, IPV4>', ['IPV4', 'SMALLINT'], ''], - [['ipv6_cidr_to_range'], 'STRUCT<IPV6, IPV6>', ['IPV6', 'SMALLINT'], ''], - [['ipv6_cidr_to_range'], 'STRUCT<IPV6, IPV6>', ['VARCHAR', 'SMALLINT'], ''], - [['ipv6_cidr_to_range'], 'STRUCT<IPV6, IPV6>', ['STRING', 'SMALLINT'], ''], + [['is_ip_address_in_range'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], 'DEPEND_ON_ARGUMENT'], + [['is_ip_address_in_range'], 'BOOLEAN', ['STRING', 'STRING'], 'DEPEND_ON_ARGUMENT'], + [['ipv4_cidr_to_range'], 'STRUCT<IPV4, IPV4>', ['IPV4', 'SMALLINT'], 'DEPEND_ON_ARGUMENT'], + [['ipv6_cidr_to_range'], 'STRUCT<IPV6, IPV6>', ['IPV6', 'SMALLINT'], 'DEPEND_ON_ARGUMENT'], + [['ipv6_cidr_to_range'], 'STRUCT<IPV6, IPV6>', ['VARCHAR', 'SMALLINT'], 'DEPEND_ON_ARGUMENT'], + [['ipv6_cidr_to_range'], 'STRUCT<IPV6, IPV6>', ['STRING', 'SMALLINT'], 'DEPEND_ON_ARGUMENT'], [['to_ipv4'], 'IPV4', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], [['to_ipv4'], 'IPV4', ['STRING'], 'ALWAYS_NOT_NULLABLE'], [['to_ipv4_or_default'], 'IPV4', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 09c053f6487..b0e3ad456fc 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -338,6 +338,10 @@ struct TQueryOptions { 138: optional i64 orc_tiny_stripe_threshold_bytes = 8388608; 139: optional i64 orc_once_max_read_bytes = 8388608; 140: optional i64 orc_max_merge_distance_bytes = 1048576; + + // upgrade options. keep them same in every branch. + 200: optional bool new_is_ip_address_in_range = false; + // For cloud, to control if the content would be written into file cache 1000: optional bool disable_file_cache = false } diff --git a/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out b/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out index 391ee192b58..285b861b742 100644 --- a/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out +++ b/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out @@ -1,3 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- 1 true 2 false @@ -80,4 +81,14 @@ 23 false 24 false 25 false -26 false \ No newline at end of file +26 false + +-- !sql -- +\N + +-- !sql -- +\N + +-- !sql -- +\N + diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy index e7b496a1408..812bfffeb2f 100644 --- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy @@ -73,22 +73,9 @@ suite("test_is_ip_address_in_range_function") { // scalar vs vector qt_sql "select id, is_ip_address_in_range('192.168.100.0', cidr) from test_is_ip_address_in_range_function order by id" - test { - sql "SELECT is_ip_address_in_range('::ffff:192.168.0.1', NULL)" - // check exception message contains - exception "The arguments of function is_ip_address_in_range must be String, not NULL" - } + qt_sql "SELECT is_ip_address_in_range('::ffff:192.168.0.1', NULL)" - test { - sql "SELECT is_ip_address_in_range(NULL, '::ffff:192.168.0.4/128')" - // check exception message contains - exception "The arguments of function is_ip_address_in_range must be String, not NULL" - } + qt_sql "SELECT is_ip_address_in_range(NULL, '::ffff:192.168.0.4/128')" - test { - sql "SELECT is_ip_address_in_range(NULL, NULL)" - // check exception message contains - exception "The arguments of function is_ip_address_in_range must be String, not NULL" - } - sql """ DROP TABLE IF EXISTS test_is_ip_address_in_range_function """ + qt_sql "SELECT is_ip_address_in_range(NULL, NULL)" } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org