superdiaodiao commented on code in PR #28361: URL: https://github.com/apache/doris/pull/28361#discussion_r1426068353
########## be/src/vec/functions/function_ip.h: ########## @@ -348,4 +348,254 @@ class FunctionIPv6NumToString : public IFunction { } }; +namespace detail { +template <IPStringToNumExceptionMode exception_mode, typename ToColumn = ColumnIPv6, + typename StringColumnType> +ColumnPtr convertToIPv6(const StringColumnType& string_column, + const PaddedPODArray<UInt8>* null_map = nullptr) { + if constexpr (!std::is_same_v<ToColumn, ColumnString> && + !std::is_same_v<ToColumn, ColumnIPv6>) { + throw Exception(ErrorCode::INVALID_ARGUMENT, + "Illegal return column type {}. Expected IPv6 or String", + TypeName<typename ToColumn::ValueType>::get()); + } + + const size_t column_size = string_column.size(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container* vec_null_map_to = nullptr; + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { + col_null_map_to = ColumnUInt8::create(column_size, false); + vec_null_map_to = &col_null_map_to->get_data(); + } + + /// This is a special treatment for source column of type String + /// to preserve previous behavior when IPv6 was a domain type of String + if constexpr (std::is_same_v<StringColumnType, ColumnString>) { + if (string_column.get_offsets()[0] - 1 == IPV6_BINARY_LENGTH) { + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + auto col_res = ColumnString::create(); + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { + col_null_map_to = ColumnUInt8::create(column_size, false); + if (null_map) { + memcpy(col_null_map_to->get_data().data(), null_map->data(), column_size); + } + + return ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); + } + + return col_res; + } else { + auto col_res = ColumnIPv6::create(); + auto& vec_res = col_res->get_data(); + + vec_res.resize(column_size); + memcpy(vec_res.data(), string_column.get_chars().data(), + column_size * IPV6_BINARY_LENGTH); + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { + col_null_map_to = ColumnUInt8::create(column_size, false); + if (null_map) { + memcpy(col_null_map_to->get_data().data(), null_map->data(), column_size); + } + return ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); + } + + return col_res; + } + } + } + + auto column_create = [](size_t column_size) -> typename ToColumn::MutablePtr { + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + auto column_string = ColumnString::create(); + column_string->get_chars().reserve(column_size * IPV6_BINARY_LENGTH); + column_string->get_offsets().reserve(column_size); + return column_string; + } else { + return ColumnIPv6::create(); + } + }; + + auto get_vector = [](auto& col_res, size_t col_size) -> decltype(auto) { + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + auto& vec_res = col_res->get_chars(); + vec_res.resize(col_size * IPV6_BINARY_LENGTH); + return (vec_res); + } else { + auto& vec_res = col_res->get_data(); + vec_res.resize(col_size); + return (vec_res); + } + }; + + auto col_res = column_create(column_size); + auto& vec_res = get_vector(col_res, column_size); + + using Chars = typename StringColumnType::Chars; + const Chars& vec_src = string_column.get_chars(); + + size_t src_offset = 0; + char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:"; + + /// ColumnString contains not null terminated strings. But functions parseIPv6, parseIPv4 expect null terminated string. + /// TODO fix this - now parseIPv6/parseIPv4 accept end iterator, so can be parsed in-place + std::string string_buffer; + + int offset_inc = 1; + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + offset_inc = IPV6_BINARY_LENGTH; + } + + for (size_t out_offset = 0, i = 0; i < column_size; out_offset += offset_inc, ++i) { + size_t src_next_offset = src_offset; + + const char* src_value = nullptr; + auto* res_value = reinterpret_cast<unsigned char*>(&vec_res[out_offset]); + + if constexpr (std::is_same_v<StringColumnType, ColumnString>) { + src_value = reinterpret_cast<const char*>(&vec_src[src_offset]); + src_next_offset = string_column.get_offsets()[i]; + + string_buffer.assign(src_value, src_next_offset - src_offset); + src_value = string_buffer.c_str(); + } + + if (null_map && (*null_map)[i]) { + std::fill_n(&vec_res[out_offset], offset_inc, 0); + src_offset = src_next_offset; + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { + (*vec_null_map_to)[i] = true; + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + auto* column_string = assert_cast<ColumnString*>(col_res.get()); + column_string->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); + } + } + continue; + } + + bool parse_result = false; + Int64 dummy_result = 0; + + /// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes. + + /// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address. + /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address. + size_t string_length = src_next_offset - src_offset; + if (string_length != 0) { + if (tryParseIPv4(src_value, dummy_result)) { + strcat(src_ipv4_buf, src_value); + parse_result = parseIPv6whole(src_ipv4_buf, res_value); + } else { + parse_result = parseIPv6whole(src_value, res_value); + } + } + + if (parse_result && string_length != 0) { + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + auto* column_string = assert_cast<ColumnString*>(col_res.get()); + std::copy(res_value, res_value + IPV6_BINARY_LENGTH, + column_string->get_chars().begin() + i * IPV6_BINARY_LENGTH); + column_string->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); + } else { + col_res->insert_data(reinterpret_cast<const char*>(res_value), IPV6_BINARY_LENGTH); + } + } else { + if (exception_mode == IPStringToNumExceptionMode::Throw) { + throw Exception(ErrorCode::INVALID_ARGUMENT, "Invalid IPv6 value"); + } + std::fill_n(&vec_res[out_offset], offset_inc, 0); + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + auto* column_string = assert_cast<ColumnString*>(col_res.get()); + column_string->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); + } + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { + (*vec_null_map_to)[i] = true; + } + } + src_offset = src_next_offset; + } + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { + return ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); + } + return col_res; +} +} // namespace detail + +template <IPStringToNumExceptionMode exception_mode, typename ToColumn = ColumnIPv6> +ColumnPtr convertToIPv6(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { + if (const auto* column_input_string = check_and_get_column<ColumnString>(column.get())) { + auto result = + detail::convertToIPv6<exception_mode, ToColumn>(*column_input_string, null_map); + return result; + } else { + throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal column type {}. Expected String", + column->get_name()); + } +} + +template <IPStringToNumExceptionMode exception_mode> +class FunctionIPv6StringToNum : public IFunction { +public: + static constexpr auto name = exception_mode == IPStringToNumExceptionMode::Throw + ? "ipv6stringtonum" + : (exception_mode == IPStringToNumExceptionMode::Default + ? "ipv6stringtonumordefault" + : "ipv6stringtonumornull"); + + static FunctionPtr create() { + return std::make_shared<FunctionIPv6StringToNum<exception_mode>>(); + } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + bool use_default_implementation_for_nulls() const override { return false; } Review Comment: to keep the behaviors like Clickhouse -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org