HappenLee commented on code in PR #28361:
URL: https://github.com/apache/doris/pull/28361#discussion_r1426058017
##########
be/src/vec/functions/function_ip.h:
##########
@@ -348,4 +348,254 @@ class FunctionIPv6NumToString : public IFunction {
}
};
+namespace detail {
+template <IPStringToNumExceptionMode exception_mode, typename ToColumn =
ColumnIPv6,
+ typename StringColumnType>
+ColumnPtr convertToIPv6(const StringColumnType& string_column,
+ const PaddedPODArray<UInt8>* null_map = nullptr) {
+ if constexpr (!std::is_same_v<ToColumn, ColumnString> &&
+ !std::is_same_v<ToColumn, ColumnIPv6>) {
+ throw Exception(ErrorCode::INVALID_ARGUMENT,
+ "Illegal return column type {}. Expected IPv6 or
String",
+ TypeName<typename ToColumn::ValueType>::get());
+ }
+
+ const size_t column_size = string_column.size();
+
+ ColumnUInt8::MutablePtr col_null_map_to;
+ ColumnUInt8::Container* vec_null_map_to = nullptr;
+
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ col_null_map_to = ColumnUInt8::create(column_size, false);
+ vec_null_map_to = &col_null_map_to->get_data();
+ }
+
+ /// This is a special treatment for source column of type String
+ /// to preserve previous behavior when IPv6 was a domain type of String
+ if constexpr (std::is_same_v<StringColumnType, ColumnString>) {
+ if (string_column.get_offsets()[0] - 1 == IPV6_BINARY_LENGTH) {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto col_res = ColumnString::create();
+
+ if constexpr (exception_mode ==
IPStringToNumExceptionMode::Null) {
+ col_null_map_to = ColumnUInt8::create(column_size, false);
+ if (null_map) {
+ memcpy(col_null_map_to->get_data().data(),
null_map->data(), column_size);
+ }
+
+ return ColumnNullable::create(std::move(col_res),
std::move(col_null_map_to));
+ }
+
+ return col_res;
+ } else {
+ auto col_res = ColumnIPv6::create();
+ auto& vec_res = col_res->get_data();
+
+ vec_res.resize(column_size);
+ memcpy(vec_res.data(), string_column.get_chars().data(),
+ column_size * IPV6_BINARY_LENGTH);
+
+ if constexpr (exception_mode ==
IPStringToNumExceptionMode::Null) {
+ col_null_map_to = ColumnUInt8::create(column_size, false);
+ if (null_map) {
+ memcpy(col_null_map_to->get_data().data(),
null_map->data(), column_size);
+ }
+ return ColumnNullable::create(std::move(col_res),
std::move(col_null_map_to));
+ }
+
+ return col_res;
+ }
+ }
+ }
+
+ auto column_create = [](size_t column_size) -> typename
ToColumn::MutablePtr {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto column_string = ColumnString::create();
+ column_string->get_chars().reserve(column_size *
IPV6_BINARY_LENGTH);
+ column_string->get_offsets().reserve(column_size);
+ return column_string;
+ } else {
+ return ColumnIPv6::create();
+ }
+ };
+
+ auto get_vector = [](auto& col_res, size_t col_size) -> decltype(auto) {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto& vec_res = col_res->get_chars();
+ vec_res.resize(col_size * IPV6_BINARY_LENGTH);
+ return (vec_res);
+ } else {
+ auto& vec_res = col_res->get_data();
+ vec_res.resize(col_size);
+ return (vec_res);
+ }
+ };
+
+ auto col_res = column_create(column_size);
+ auto& vec_res = get_vector(col_res, column_size);
+
+ using Chars = typename StringColumnType::Chars;
+ const Chars& vec_src = string_column.get_chars();
+
+ size_t src_offset = 0;
+ char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] =
"::ffff:";
+
+ /// ColumnString contains not null terminated strings. But functions
parseIPv6, parseIPv4 expect null terminated string.
+ /// TODO fix this - now parseIPv6/parseIPv4 accept end iterator, so can be
parsed in-place
+ std::string string_buffer;
+
+ int offset_inc = 1;
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ offset_inc = IPV6_BINARY_LENGTH;
+ }
+
+ for (size_t out_offset = 0, i = 0; i < column_size; out_offset +=
offset_inc, ++i) {
+ size_t src_next_offset = src_offset;
+
+ const char* src_value = nullptr;
+ auto* res_value = reinterpret_cast<unsigned
char*>(&vec_res[out_offset]);
+
+ if constexpr (std::is_same_v<StringColumnType, ColumnString>) {
+ src_value = reinterpret_cast<const char*>(&vec_src[src_offset]);
+ src_next_offset = string_column.get_offsets()[i];
+
+ string_buffer.assign(src_value, src_next_offset - src_offset);
+ src_value = string_buffer.c_str();
+ }
+
+ if (null_map && (*null_map)[i]) {
+ std::fill_n(&vec_res[out_offset], offset_inc, 0);
+ src_offset = src_next_offset;
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ (*vec_null_map_to)[i] = true;
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto* column_string =
assert_cast<ColumnString*>(col_res.get());
+ column_string->get_offsets().push_back((i + 1) *
IPV6_BINARY_LENGTH);
+ }
+ }
+ continue;
+ }
+
+ bool parse_result = false;
+ Int64 dummy_result = 0;
+
+ /// For both cases below: In case of failure, the function parseIPv6
fills vec_res with zero bytes.
+
+ /// If the source IP address is parsable as an IPv4 address, then
transform it into a valid IPv6 address.
+ /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address
to represent it as a valid IPv6 address.
+ size_t string_length = src_next_offset - src_offset;
+ if (string_length != 0) {
+ if (tryParseIPv4(src_value, dummy_result)) {
+ strcat(src_ipv4_buf, src_value);
+ parse_result = parseIPv6whole(src_ipv4_buf, res_value);
+ } else {
+ parse_result = parseIPv6whole(src_value, res_value);
+ }
+ }
+
+ if (parse_result && string_length != 0) {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto* column_string =
assert_cast<ColumnString*>(col_res.get());
+ std::copy(res_value, res_value + IPV6_BINARY_LENGTH,
+ column_string->get_chars().begin() + i *
IPV6_BINARY_LENGTH);
+ column_string->get_offsets().push_back((i + 1) *
IPV6_BINARY_LENGTH);
+ } else {
+ col_res->insert_data(reinterpret_cast<const char*>(res_value),
IPV6_BINARY_LENGTH);
+ }
+ } else {
+ if (exception_mode == IPStringToNumExceptionMode::Throw) {
+ throw Exception(ErrorCode::INVALID_ARGUMENT, "Invalid IPv6
value");
+ }
+ std::fill_n(&vec_res[out_offset], offset_inc, 0);
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto* column_string =
assert_cast<ColumnString*>(col_res.get());
+ column_string->get_offsets().push_back((i + 1) *
IPV6_BINARY_LENGTH);
+ }
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ (*vec_null_map_to)[i] = true;
+ }
+ }
+ src_offset = src_next_offset;
+ }
+
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ return ColumnNullable::create(std::move(col_res),
std::move(col_null_map_to));
+ }
+ return col_res;
+}
+} // namespace detail
+
+template <IPStringToNumExceptionMode exception_mode, typename ToColumn =
ColumnIPv6>
+ColumnPtr convertToIPv6(ColumnPtr column, const PaddedPODArray<UInt8>*
null_map = nullptr) {
+ if (const auto* column_input_string =
check_and_get_column<ColumnString>(column.get())) {
+ auto result =
+ detail::convertToIPv6<exception_mode,
ToColumn>(*column_input_string, null_map);
+ return result;
+ } else {
+ throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal column type {}.
Expected String",
+ column->get_name());
+ }
+}
+
+template <IPStringToNumExceptionMode exception_mode>
+class FunctionIPv6StringToNum : public IFunction {
+public:
+ static constexpr auto name = exception_mode ==
IPStringToNumExceptionMode::Throw
+ ? "ipv6stringtonum"
+ : (exception_mode ==
IPStringToNumExceptionMode::Default
+ ?
"ipv6stringtonumordefault"
+ : "ipv6stringtonumornull");
+
+ static FunctionPtr create() {
+ return std::make_shared<FunctionIPv6StringToNum<exception_mode>>();
+ }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 1; }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
Review Comment:
why here use false ?
##########
be/src/vec/functions/function_ip.h:
##########
@@ -348,4 +348,254 @@ class FunctionIPv6NumToString : public IFunction {
}
};
+namespace detail {
+template <IPStringToNumExceptionMode exception_mode, typename ToColumn =
ColumnIPv6,
+ typename StringColumnType>
+ColumnPtr convertToIPv6(const StringColumnType& string_column,
+ const PaddedPODArray<UInt8>* null_map = nullptr) {
+ if constexpr (!std::is_same_v<ToColumn, ColumnString> &&
+ !std::is_same_v<ToColumn, ColumnIPv6>) {
+ throw Exception(ErrorCode::INVALID_ARGUMENT,
+ "Illegal return column type {}. Expected IPv6 or
String",
+ TypeName<typename ToColumn::ValueType>::get());
+ }
+
+ const size_t column_size = string_column.size();
+
+ ColumnUInt8::MutablePtr col_null_map_to;
+ ColumnUInt8::Container* vec_null_map_to = nullptr;
+
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ col_null_map_to = ColumnUInt8::create(column_size, false);
+ vec_null_map_to = &col_null_map_to->get_data();
+ }
+
+ /// This is a special treatment for source column of type String
+ /// to preserve previous behavior when IPv6 was a domain type of String
+ if constexpr (std::is_same_v<StringColumnType, ColumnString>) {
+ if (string_column.get_offsets()[0] - 1 == IPV6_BINARY_LENGTH) {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto col_res = ColumnString::create();
+
+ if constexpr (exception_mode ==
IPStringToNumExceptionMode::Null) {
+ col_null_map_to = ColumnUInt8::create(column_size, false);
+ if (null_map) {
+ memcpy(col_null_map_to->get_data().data(),
null_map->data(), column_size);
+ }
+
+ return ColumnNullable::create(std::move(col_res),
std::move(col_null_map_to));
+ }
+
+ return col_res;
+ } else {
+ auto col_res = ColumnIPv6::create();
+ auto& vec_res = col_res->get_data();
+
+ vec_res.resize(column_size);
+ memcpy(vec_res.data(), string_column.get_chars().data(),
+ column_size * IPV6_BINARY_LENGTH);
+
+ if constexpr (exception_mode ==
IPStringToNumExceptionMode::Null) {
+ col_null_map_to = ColumnUInt8::create(column_size, false);
+ if (null_map) {
+ memcpy(col_null_map_to->get_data().data(),
null_map->data(), column_size);
+ }
+ return ColumnNullable::create(std::move(col_res),
std::move(col_null_map_to));
+ }
+
+ return col_res;
+ }
+ }
+ }
+
+ auto column_create = [](size_t column_size) -> typename
ToColumn::MutablePtr {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto column_string = ColumnString::create();
+ column_string->get_chars().reserve(column_size *
IPV6_BINARY_LENGTH);
+ column_string->get_offsets().reserve(column_size);
+ return column_string;
+ } else {
+ return ColumnIPv6::create();
+ }
+ };
+
+ auto get_vector = [](auto& col_res, size_t col_size) -> decltype(auto) {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto& vec_res = col_res->get_chars();
+ vec_res.resize(col_size * IPV6_BINARY_LENGTH);
+ return (vec_res);
+ } else {
+ auto& vec_res = col_res->get_data();
+ vec_res.resize(col_size);
+ return (vec_res);
+ }
+ };
+
+ auto col_res = column_create(column_size);
+ auto& vec_res = get_vector(col_res, column_size);
+
+ using Chars = typename StringColumnType::Chars;
+ const Chars& vec_src = string_column.get_chars();
+
+ size_t src_offset = 0;
+ char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] =
"::ffff:";
+
+ /// ColumnString contains not null terminated strings. But functions
parseIPv6, parseIPv4 expect null terminated string.
+ /// TODO fix this - now parseIPv6/parseIPv4 accept end iterator, so can be
parsed in-place
+ std::string string_buffer;
+
+ int offset_inc = 1;
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ offset_inc = IPV6_BINARY_LENGTH;
+ }
+
+ for (size_t out_offset = 0, i = 0; i < column_size; out_offset +=
offset_inc, ++i) {
+ size_t src_next_offset = src_offset;
+
+ const char* src_value = nullptr;
+ auto* res_value = reinterpret_cast<unsigned
char*>(&vec_res[out_offset]);
+
+ if constexpr (std::is_same_v<StringColumnType, ColumnString>) {
+ src_value = reinterpret_cast<const char*>(&vec_src[src_offset]);
+ src_next_offset = string_column.get_offsets()[i];
+
+ string_buffer.assign(src_value, src_next_offset - src_offset);
+ src_value = string_buffer.c_str();
+ }
+
+ if (null_map && (*null_map)[i]) {
+ std::fill_n(&vec_res[out_offset], offset_inc, 0);
+ src_offset = src_next_offset;
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ (*vec_null_map_to)[i] = true;
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto* column_string =
assert_cast<ColumnString*>(col_res.get());
+ column_string->get_offsets().push_back((i + 1) *
IPV6_BINARY_LENGTH);
+ }
+ }
+ continue;
+ }
+
+ bool parse_result = false;
+ Int64 dummy_result = 0;
+
+ /// For both cases below: In case of failure, the function parseIPv6
fills vec_res with zero bytes.
+
+ /// If the source IP address is parsable as an IPv4 address, then
transform it into a valid IPv6 address.
+ /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address
to represent it as a valid IPv6 address.
+ size_t string_length = src_next_offset - src_offset;
+ if (string_length != 0) {
+ if (tryParseIPv4(src_value, dummy_result)) {
+ strcat(src_ipv4_buf, src_value);
+ parse_result = parseIPv6whole(src_ipv4_buf, res_value);
+ } else {
+ parse_result = parseIPv6whole(src_value, res_value);
+ }
+ }
+
+ if (parse_result && string_length != 0) {
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto* column_string =
assert_cast<ColumnString*>(col_res.get());
+ std::copy(res_value, res_value + IPV6_BINARY_LENGTH,
+ column_string->get_chars().begin() + i *
IPV6_BINARY_LENGTH);
+ column_string->get_offsets().push_back((i + 1) *
IPV6_BINARY_LENGTH);
+ } else {
+ col_res->insert_data(reinterpret_cast<const char*>(res_value),
IPV6_BINARY_LENGTH);
+ }
+ } else {
+ if (exception_mode == IPStringToNumExceptionMode::Throw) {
+ throw Exception(ErrorCode::INVALID_ARGUMENT, "Invalid IPv6
value");
+ }
+ std::fill_n(&vec_res[out_offset], offset_inc, 0);
+ if constexpr (std::is_same_v<ToColumn, ColumnString>) {
+ auto* column_string =
assert_cast<ColumnString*>(col_res.get());
+ column_string->get_offsets().push_back((i + 1) *
IPV6_BINARY_LENGTH);
+ }
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ (*vec_null_map_to)[i] = true;
+ }
+ }
+ src_offset = src_next_offset;
+ }
+
+ if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+ return ColumnNullable::create(std::move(col_res),
std::move(col_null_map_to));
+ }
+ return col_res;
+}
+} // namespace detail
+
+template <IPStringToNumExceptionMode exception_mode, typename ToColumn =
ColumnIPv6>
+ColumnPtr convertToIPv6(ColumnPtr column, const PaddedPODArray<UInt8>*
null_map = nullptr) {
+ if (const auto* column_input_string =
check_and_get_column<ColumnString>(column.get())) {
+ auto result =
+ detail::convertToIPv6<exception_mode,
ToColumn>(*column_input_string, null_map);
+ return result;
+ } else {
+ throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal column type {}.
Expected String",
+ column->get_name());
+ }
+}
+
+template <IPStringToNumExceptionMode exception_mode>
+class FunctionIPv6StringToNum : public IFunction {
+public:
+ static constexpr auto name = exception_mode ==
IPStringToNumExceptionMode::Throw
+ ? "ipv6stringtonum"
+ : (exception_mode ==
IPStringToNumExceptionMode::Default
+ ?
"ipv6stringtonumordefault"
+ : "ipv6stringtonumornull");
+
+ static FunctionPtr create() {
+ return std::make_shared<FunctionIPv6StringToNum<exception_mode>>();
+ }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 1; }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
Review Comment:
why here use false ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]