This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8ff8d946978a33b927eecf4856a99305933e3bbe Author: yangshijie <[email protected]> AuthorDate: Mon Feb 5 11:21:55 2024 +0800 [fix](ip) change IPv6 to little-endian byte order storage (like IPv4) (#30730) --- be/src/vec/columns/column_vector.h | 21 -- be/src/vec/common/format_ip.cpp | 114 ----------- be/src/vec/common/format_ip.h | 198 ++++++++++++++---- be/src/vec/common/ipv6_to_binary.h | 13 +- be/src/vec/functions/function_ip.h | 62 ++++-- be/src/vec/runtime/ip_address_cidr.h | 14 +- be/src/vec/runtime/ipv4_value.h | 10 +- be/src/vec/runtime/ipv6_value.h | 14 +- be/test/vec/runtime/ip_value_test.cpp | 118 +++++++++++ .../data/datatype_p0/ip/test_ip_basic.out | 225 +++++++++++++++++++++ .../ip_functions/test_ip_functions.out | 17 +- .../suites/datatype_p0/ip/test_ip_basic.groovy | 41 ++++ .../ip_functions/test_ip_functions.groovy | 6 + .../test_ipv6_cidr_to_range_function.groovy | 1 + .../test_is_ip_address_in_range_function.groovy | 55 ++--- 15 files changed, 669 insertions(+), 240 deletions(-) diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index acc8688786f..d9aab68697d 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -67,10 +67,6 @@ namespace doris::vectorized { * Integer values are compared as usual. * Floating-point numbers are compared this way that NaNs always end up at the end * (if you don't do this, the sort would not work at all). - * Due to IPv4 being a Little-Endian storage, comparing UInt32 is equivalent to comparing IPv4. - * However, IPv6 is a Big-Endian storage, and comparing IPv6 is not equivalent to comparing uint128_t. - * So we should use std::memcmp to start comparing from low bytes to high bytes. - * (e.g. :: < ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff) */ template <typename T> struct CompareHelper { @@ -130,23 +126,6 @@ struct CompareHelper<Float32> : public FloatCompareHelper<Float32> {}; template <> struct CompareHelper<Float64> : public FloatCompareHelper<Float64> {}; -struct IPv6CompareHelper { - static bool less(IPv6 a, IPv6 b, int /*nan_direction_hint*/) { - return std::memcmp(&a, &b, sizeof(IPv6)) < 0; - } - - static bool greater(IPv6 a, IPv6 b, int /*nan_direction_hint*/) { - return std::memcmp(&a, &b, sizeof(IPv6)) > 0; - } - - static int compare(IPv6 a, IPv6 b, int /*nan_direction_hint*/) { - return std::memcmp(&a, &b, sizeof(IPv6)); - } -}; - -template <> -struct CompareHelper<IPv6> : public IPv6CompareHelper {}; - /** A template for columns that use a simple array to store. */ template <typename T> diff --git a/be/src/vec/common/format_ip.cpp b/be/src/vec/common/format_ip.cpp index 3e4379872ac..d9f5ffd45dd 100644 --- a/be/src/vec/common/format_ip.cpp +++ b/be/src/vec/common/format_ip.cpp @@ -74,118 +74,4 @@ consteval std::array<std::pair<const char*, size_t>, N> str_make_array() { extern constexpr std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table = str_make_array<256>(); -/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) -static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry) { - return value >= base ? 1 + intLog(value / base, base, value % base || carry) - : value % base > 1 || carry; -} - -/// Print integer in desired base, faster than sprintf. -/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark -/// But it doesn't matter here. -template <UInt32 base, typename T> -static void print_integer(char*& out, T value) { - if (value == 0) { - *out++ = '0'; - } else { - constexpr size_t buffer_size = sizeof(T) * intLog(256, base, false); - - char buf[buffer_size]; - auto ptr = buf; - - while (value > 0) { - *ptr = hex_digit_lowercase(value % base); - ++ptr; - value /= base; - } - - /// Copy to out reversed. - while (ptr != buf) { - --ptr; - *out = *ptr; - ++out; - } - } -} - -void formatIPv6(const unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count) { - struct { - Int64 base, len; - } best {-1, 0}, cur {-1, 0}; - std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {}; - - /** Preprocess: - * Copy the input (bytewise) array into a wordwise array. - * Find the longest run of 0x00's in src[] for :: shorthanding. */ - for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) { - words[i / 2] = (src[i] << 8) | src[i + 1]; - } - - for (size_t i = 0; i < words.size(); i++) { - if (words[i] == 0) { - if (cur.base == -1) { - cur.base = i; - cur.len = 1; - } else { - cur.len++; - } - } else { - if (cur.base != -1) { - if (best.base == -1 || cur.len > best.len) { - best = cur; - } - cur.base = -1; - } - } - } - - if (cur.base != -1) { - if (best.base == -1 || cur.len > best.len) { - best = cur; - } - } - if (best.base != -1 && best.len < 2) { - best.base = -1; - } - - /// Format the result. - for (size_t i = 0; i < words.size(); i++) { - /// Are we inside the best run of 0x00's? - if (best.base != -1) { - auto best_base = static_cast<size_t>(best.base); - if (i >= best_base && i < (best_base + best.len)) { - if (i == best_base) { - *dst++ = ':'; - } - continue; - } - } - /// Are we following an initial run of 0x00s or any real hex? - if (i != 0) { - *dst++ = ':'; - } - /// Is this address an encapsulated IPv4? - if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) { - uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0}; - memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH); - // Due to historical reasons formatIPv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format. -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer)); -#endif - formatIPv4(ipv4_buffer, dst, - std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), - "0"); - // formatIPv4 has already added a null-terminator for us. - return; - } - print_integer<16>(dst, words[i]); - } - - /// Was it a trailing run of 0x00's? - if (best.base != -1 && - static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) { - *dst++ = ':'; - } -} - } // namespace doris::vectorized diff --git a/be/src/vec/common/format_ip.h b/be/src/vec/common/format_ip.h index 71c7c73b50a..45f90d3bdad 100644 --- a/be/src/vec/common/format_ip.h +++ b/be/src/vec/common/format_ip.h @@ -57,15 +57,15 @@ extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_ * mask_string is NON-NULL, if mask_tail_octets > 0. * * Examples: - * formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); + * format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); * > dst == "127.0.0.1" - * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); + * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); * > dst == "127.0.0.xxx" - * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); + * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); * > dst == "127.0.0.0" */ -inline void formatIPv4(const unsigned char* src, size_t src_size, char*& dst, - uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") { +inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst, + uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") { const size_t mask_length = mask_string ? strlen(mask_string) : 0; const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); const size_t padding = std::min(4 - src_size, limit); @@ -99,9 +99,9 @@ inline void formatIPv4(const unsigned char* src, size_t src_size, char*& dst, dst--; } -inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0, - const char* mask_string = "xxx") { - formatIPv4(src, 4, dst, mask_tail_octets, mask_string); +inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0, + const char* mask_string = "xxx") { + format_ipv4(src, 4, dst, mask_tail_octets, mask_string); } /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. @@ -124,7 +124,7 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o */ template <typename T, typename EOFfunction> requires(std::is_same<typename std::remove_cv<T>::type, char>::value) -inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { +inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { return false; } @@ -166,8 +166,8 @@ inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t firs } /// returns pointer to the right after parsed sequence or null on failed parsing -inline const char* parseIPv4(const char* src, const char* end, unsigned char* dst) { - if (parseIPv4( +inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) { + if (parse_ipv4( src, [&src, end]() { return src == end; }, dst)) { return src; } @@ -175,13 +175,13 @@ inline const char* parseIPv4(const char* src, const char* end, unsigned char* ds } /// returns true if whole buffer was parsed successfully -inline bool parseIPv4whole(const char* src, const char* end, unsigned char* dst) { - return parseIPv4(src, end, dst) == end; +inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) { + return parse_ipv4(src, end, dst) == end; } /// returns pointer to the right after parsed sequence or null on failed parsing -inline const char* parseIPv4(const char* src, unsigned char* dst) { - if (parseIPv4( +inline const char* parse_ipv4(const char* src, unsigned char* dst) { + if (parse_ipv4( src, []() { return false; }, dst)) { return src; } @@ -189,20 +189,141 @@ inline const char* parseIPv4(const char* src, unsigned char* dst) { } /// returns true if whole null-terminated string was parsed successfully -inline bool parseIPv4whole(const char* src, unsigned char* dst) { - const char* end = parseIPv4(src, dst); +inline bool parse_ipv4_whole(const char* src, unsigned char* dst) { + const char* end = parse_ipv4(src, dst); return end != nullptr && *end == '\0'; } +/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) +inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) { + return value >= base ? 1 + int_log(value / base, base, value % base || carry) + : value % base > 1 || carry; +} + +/// Print integer in desired base, faster than sprintf. +/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark +/// But it doesn't matter here. +template <UInt32 base, typename T> +inline void print_integer(char*& out, T value) { + if (value == 0) { + *out++ = '0'; + } else { + constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false); + + char buf[buffer_size]; + auto ptr = buf; + + while (value > 0) { + *ptr = hex_digit_lowercase(value % base); + ++ptr; + value /= base; + } + + /// Copy to out reversed. + while (ptr != buf) { + --ptr; + *out = *ptr; + ++out; + } + } +} + /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c - * performs significantly faster than the reference implementation due to the absence of sprintf calls, - * bounds checking, unnecessary string copying and length calculation. + * performs significantly faster than the reference implementation due to the absence of sprintf calls, + * bounds checking, unnecessary string copying and length calculation. + * @param src - pointer to IPv6 (16 bytes) stored in little-endian byte order + * @param dst - where to put format result bytes + * @param zeroed_tail_bytes_count - the parameter is currently not being used */ -void formatIPv6(const unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0); +inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) { + struct { + Int64 base, len; + } best {-1, 0}, cur {-1, 0}; + std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {}; + + // the current function logic is processed in big endian manner + // but ipv6 in doris is stored in little-endian byte order + // so transfer to big-endian byte order first + // compatible with parse_ipv6 function in format_ip.h + std::reverse(src, src + IPV6_BINARY_LENGTH); + + /** Preprocess: + * Copy the input (bytewise) array into a wordwise array. + * Find the longest run of 0x00's in src[] for :: shorthanding. */ + for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) { + words[i / 2] = (src[i] << 8) | src[i + 1]; + } + + for (size_t i = 0; i < words.size(); i++) { + if (words[i] == 0) { + if (cur.base == -1) { + cur.base = i; + cur.len = 1; + } else { + cur.len++; + } + } else { + if (cur.base != -1) { + if (best.base == -1 || cur.len > best.len) { + best = cur; + } + cur.base = -1; + } + } + } + + if (cur.base != -1) { + if (best.base == -1 || cur.len > best.len) { + best = cur; + } + } + if (best.base != -1 && best.len < 2) { + best.base = -1; + } + + /// Format the result. + for (size_t i = 0; i < words.size(); i++) { + /// Are we inside the best run of 0x00's? + if (best.base != -1) { + auto best_base = static_cast<size_t>(best.base); + if (i >= best_base && i < (best_base + best.len)) { + if (i == best_base) { + *dst++ = ':'; + } + continue; + } + } + /// Are we following an initial run of 0x00s or any real hex? + if (i != 0) { + *dst++ = ':'; + } + /// Is this address an encapsulated IPv4? + if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) { + uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0}; + memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH); + // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format. +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer)); +#endif + format_ipv4(ipv4_buffer, dst, + std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), + "0"); + // format_ipv4 has already added a null-terminator for us. + return; + } + print_integer<16>(dst, words[i]); + } + + /// Was it a trailing run of 0x00's? + if (best.base != -1 && + static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) { + *dst++ = ':'; + } +} /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. * -* Parses the input string `src` and stores binary big-endian value into buffer pointed by `dst`, +* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`, * which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. * * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) @@ -211,13 +332,13 @@ void formatIPv6(const unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_ * * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. -* @param dst - where to put output bytes, expected to be non-null and at IPV6_BINARY_LENGTH-long. +* @param dst - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long. * @param first_block - preparsed first block * @return - true if parsed successfully, false otherwise. */ template <typename T, typename EOFfunction> requires(std::is_same<typename std::remove_cv<T>::type, char>::value) -inline bool parseIPv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { +inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { const auto clear_dst = [dst]() { std::memset(dst, '\0', IPV6_BINARY_LENGTH); return false; @@ -263,8 +384,8 @@ inline bool parseIPv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t firs return clear_dst(); } - if (*src == '.') /// mixed IPv4 parsing - { + /// mixed IPv4 parsing + if (*src == '.') { if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first return clear_dst(); @@ -288,7 +409,7 @@ inline bool parseIPv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t firs if (num > 255) return clear_dst(); /// parse IPv4 with known first octet - if (!parseIPv4(src, eof, iter, num)) return clear_dst(); + if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); if constexpr (std::endian::native == std::endian::little) std::reverse(iter, iter + IPV4_BINARY_LENGTH); @@ -322,40 +443,45 @@ inline bool parseIPv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t firs /// either all 8 groups or all-zeroes block should be present if (groups < 8 && zptr == nullptr) return clear_dst(); - if (zptr != nullptr) /// process all-zeroes block - { + /// process all-zeroes block + if (zptr != nullptr) { size_t msize = iter - zptr; std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); } + /// the current function logic is processed in big endian manner + /// but ipv6 in doris is stored in little-endian byte order + /// so transfer to little-endian + std::reverse(dst, dst + IPV6_BINARY_LENGTH); + return true; } /// returns pointer to the right after parsed sequence or null on failed parsing -inline const char* parseIPv6(const char* src, const char* end, unsigned char* dst) { - if (parseIPv6( +inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) { + if (parse_ipv6( src, [&src, end]() { return src == end; }, dst)) return src; return nullptr; } /// returns true if whole buffer was parsed successfully -inline bool parseIPv6whole(const char* src, const char* end, unsigned char* dst) { - return parseIPv6(src, end, dst) == end; +inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) { + return parse_ipv6(src, end, dst) == end; } /// returns pointer to the right after parsed sequence or null on failed parsing -inline const char* parseIPv6(const char* src, unsigned char* dst) { - if (parseIPv6( +inline const char* parse_ipv6(const char* src, unsigned char* dst) { + if (parse_ipv6( src, []() { return false; }, dst)) return src; return nullptr; } /// returns true if whole null-terminated string was parsed successfully -inline bool parseIPv6whole(const char* src, unsigned char* dst) { - const char* end = parseIPv6(src, dst); +inline bool parse_ipv6_whole(const char* src, unsigned char* dst) { + const char* end = parse_ipv6(src, dst); return end != nullptr && *end == '\0'; } diff --git a/be/src/vec/common/ipv6_to_binary.h b/be/src/vec/common/ipv6_to_binary.h index 0f22740fbcd..3113a394b61 100644 --- a/be/src/vec/common/ipv6_to_binary.h +++ b/be/src/vec/common/ipv6_to_binary.h @@ -35,15 +35,15 @@ static constexpr RawMaskArrayT generate_bit_mask(size_t prefix) { if (prefix >= arr.size() * 8) { prefix = arr.size() * 8; } - size_t i = 0; - for (; prefix >= 8; ++i, prefix -= 8) { + int8_t i = IPV6_BINARY_LENGTH - 1; + for (; prefix >= 8; --i, prefix -= 8) { arr[i] = 0xff; } if (prefix > 0) { - arr[i++] = ~(0xff >> prefix); + arr[i--] = ~(0xff >> prefix); } - while (i < arr.size()) { - arr[i++] = 0x00; + while (i >= 0) { + arr[i--] = 0x00; } return arr; } @@ -58,9 +58,10 @@ static constexpr std::array<RawMaskArrayT, masksCount> generate_bit_masks() { } /// Returns a reference to 16-byte array containing mask with first `prefix_len` bits set to `1` and `128 - prefix_len` to `0`. +/// Store in little-endian byte order /// The reference is valid during all program execution time. /// Values of prefix_len greater than 128 interpreted as 128 exactly. -const std::array<uint8_t, 16>& get_cidr_mask_ipv6(uint8_t prefix_len) { +inline const std::array<uint8_t, 16>& get_cidr_mask_ipv6(uint8_t prefix_len) { static constexpr auto IPV6_RAW_MASK_ARRAY = generate_bit_masks<RawMaskArrayV6, IPV6_MASKS_COUNT>(); return IPV6_RAW_MASK_ARRAY[prefix_len]; diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 98802a3ee20..4fd1c38282b 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -78,7 +78,7 @@ private: offsets_res[i] = pos - begin; null_map->get_data()[i] = 1; } else { - formatIPv4(reinterpret_cast<const unsigned char*>(&vec_in[i]), src_size, pos); + format_ipv4(reinterpret_cast<const unsigned char*>(&vec_in[i]), src_size, pos); offsets_res[i] = pos - begin; } } @@ -131,12 +131,12 @@ public: /// Since IPExceptionMode means wider scope, we use more specific name here. enum class IPConvertExceptionMode : uint8_t { Throw, Default, Null }; -static inline bool tryParseIPv4(const char* pos, Int64& result_value) { - return parseIPv4whole(pos, reinterpret_cast<unsigned char*>(&result_value)); +static inline bool try_parse_ipv4(const char* pos, Int64& result_value) { + return parse_ipv4_whole(pos, reinterpret_cast<unsigned char*>(&result_value)); } template <IPConvertExceptionMode exception_mode, typename ToColumn> -ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { +ColumnPtr convert_to_ipv4(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { const ColumnString* column_string = check_and_get_column<ColumnString>(column.get()); if (!column_string) { @@ -183,7 +183,7 @@ ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray<UInt8>* null_map size_t src_length = (i < vec_res.size() - 1) ? (offsets_src[i] - prev_offset) : (vec_src.size() - prev_offset); std::string src(src_start, src_length); - bool parse_result = tryParseIPv4(src.c_str(), vec_res[i]); + bool parse_result = try_parse_ipv4(src.c_str(), vec_res[i]); if (!parse_result) { if constexpr (exception_mode == IPConvertExceptionMode::Throw) { @@ -251,7 +251,7 @@ public: null_map = &column_nullable->get_null_map_data(); } - auto col_res = convertToIPv4<exception_mode, ColumnInt64>(column, null_map); + auto col_res = convert_to_ipv4<exception_mode, ColumnInt64>(column, null_map); if (null_map && exception_mode == IPConvertExceptionMode::Null) { block.replace_by_position( @@ -296,7 +296,16 @@ void process_ipv6_column(const ColumnPtr& column, size_t input_rows_count, offsets_res[i] = pos - begin; null_map->get_data()[i] = 1; } else { - formatIPv6(ipv6_address_data, pos); + if constexpr (std::is_same_v<T, ColumnIPv6>) { + // ipv6 is little-endian byte order storage in doris + // so parsing ipv6 in little-endian byte order + format_ipv6(ipv6_address_data, pos); + } else { + // 16 bytes ipv6 string is big-endian byte order storage in doris + // so transfer to little-endian firstly + std::reverse(ipv6_address_data, ipv6_address_data + IPV6_BINARY_LENGTH); + format_ipv6(ipv6_address_data, pos); + } offsets_res[i] = pos - begin; } } @@ -360,8 +369,8 @@ public: namespace detail { template <IPConvertExceptionMode exception_mode, typename ToColumn = ColumnIPv6, typename StringColumnType> -ColumnPtr convertToIPv6(const StringColumnType& string_column, - const PaddedPODArray<UInt8>* null_map = nullptr) { +ColumnPtr convert_to_ipv6(const StringColumnType& string_column, + const PaddedPODArray<UInt8>* null_map = nullptr) { if constexpr (!std::is_same_v<ToColumn, ColumnString> && !std::is_same_v<ToColumn, ColumnIPv6>) { throw Exception(ErrorCode::INVALID_ARGUMENT, @@ -463,15 +472,20 @@ ColumnPtr convertToIPv6(const StringColumnType& string_column, /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address. size_t string_length = src_next_offset - src_offset; if (string_length != 0) { - if (tryParseIPv4(src_value, dummy_result)) { + if (try_parse_ipv4(src_value, dummy_result)) { strcat(src_ipv4_buf, src_value); - parse_result = parseIPv6whole(src_ipv4_buf, res_value); + parse_result = parse_ipv6_whole(src_ipv4_buf, res_value); } else { - parse_result = parseIPv6whole(src_value, res_value); + parse_result = parse_ipv6_whole(src_value, res_value); } } if (parse_result && string_length != 0) { + if constexpr (std::is_same_v<ToColumn, ColumnString>) { + // handling 16 bytes ipv6 string in the big-endian byte order + // is aimed at conforming to human reading habits + std::reverse(res_value, res_value + IPV6_BINARY_LENGTH); + } if constexpr (std::is_same_v<ToColumn, ColumnString>) { auto* column_string = assert_cast<ColumnString*>(col_res.get()); std::copy(res_value, res_value + IPV6_BINARY_LENGTH, @@ -504,10 +518,10 @@ ColumnPtr convertToIPv6(const StringColumnType& string_column, } // namespace detail template <IPConvertExceptionMode exception_mode, typename ToColumn = ColumnIPv6> -ColumnPtr convertToIPv6(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { +ColumnPtr convert_to_ipv6(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { if (const auto* column_input_string = check_and_get_column<ColumnString>(column.get())) { auto result = - detail::convertToIPv6<exception_mode, ToColumn>(*column_input_string, null_map); + detail::convert_to_ipv6<exception_mode, ToColumn>(*column_input_string, null_map); return result; } else { throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal column type {}. Expected String", @@ -563,7 +577,7 @@ public: null_map = &column_nullable->get_null_map_data(); } - auto col_res = convertToIPv6<exception_mode, ColumnString>(column, null_map); + auto col_res = convert_to_ipv6<exception_mode, ColumnString>(column, null_map); if (null_map && exception_mode == IPConvertExceptionMode::Null) { block.replace_by_position( @@ -878,9 +892,18 @@ public: throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal cidr value '{}'", std::to_string(cidr)); } - apply_cidr_mask(from_column.get_data_at(i).data, - reinterpret_cast<char*>(&vec_res_lower_range[i]), - reinterpret_cast<char*>(&vec_res_upper_range[i]), cidr); + if constexpr (std::is_same_v<FromColumn, ColumnString>) { + // 16 bytes ipv6 string is stored in big-endian byte order + // so transfer to little-endian firstly + auto* src_data = const_cast<char*>(from_column.get_data_at(i).data); + std::reverse(src_data, src_data + IPV6_BINARY_LENGTH); + apply_cidr_mask(src_data, reinterpret_cast<char*>(&vec_res_lower_range[i]), + reinterpret_cast<char*>(&vec_res_upper_range[i]), cidr); + } else { + apply_cidr_mask(from_column.get_data_at(i).data, + reinterpret_cast<char*>(&vec_res_lower_range[i]), + reinterpret_cast<char*>(&vec_res_upper_range[i]), cidr); + } } return ColumnStruct::create( @@ -890,9 +913,10 @@ public: private: static void apply_cidr_mask(const char* __restrict src, char* __restrict dst_lower, char* __restrict dst_upper, UInt8 bits_to_keep) { + // little-endian mask const auto& mask = get_cidr_mask_ipv6(bits_to_keep); - for (size_t i = 0; i < IPV6_BINARY_LENGTH; ++i) { + for (int8_t i = IPV6_BINARY_LENGTH - 1; i >= 0; --i) { dst_lower[i] = src[i] & mask[i]; dst_upper[i] = dst_lower[i] | ~mask[i]; } diff --git a/be/src/vec/runtime/ip_address_cidr.h b/be/src/vec/runtime/ip_address_cidr.h index 721722d5f8b..789d93d40e0 100644 --- a/be/src/vec/runtime/ip_address_cidr.h +++ b/be/src/vec/runtime/ip_address_cidr.h @@ -28,13 +28,14 @@ class IPAddressVariant { public: explicit IPAddressVariant(std::string_view address_str) { vectorized::Int64 v4 = 0; - if (vectorized::parseIPv4whole(address_str.begin(), address_str.end(), - reinterpret_cast<unsigned char*>(&v4))) { + if (vectorized::parse_ipv4_whole(address_str.begin(), address_str.end(), + reinterpret_cast<unsigned char*>(&v4))) { _addr = static_cast<vectorized::UInt32>(v4); } else { _addr = IPv6AddrType(); - if (!vectorized::parseIPv6whole(address_str.begin(), address_str.end(), - std::get<IPv6AddrType>(_addr).data())) { + // parse ipv6 in little-endian + if (!vectorized::parse_ipv6_whole(address_str.begin(), address_str.end(), + std::get<IPv6AddrType>(_addr).data())) { throw Exception(ErrorCode::INVALID_ARGUMENT, "Neither IPv4 nor IPv6 address: '{}'", address_str); } @@ -72,13 +73,14 @@ bool match_ipv4_subnet(uint32_t addr, uint32_t cidr_addr, uint8_t prefix) { return (addr & mask) == (cidr_addr & mask); } +// ipv6 liitle-endian input bool match_ipv6_subnet(const uint8_t* addr, const uint8_t* cidr_addr, uint8_t prefix) { if (prefix > IPV6_BINARY_LENGTH * 8U) { prefix = IPV6_BINARY_LENGTH * 8U; } - size_t i = 0; + size_t i = IPV6_BINARY_LENGTH - 1; - for (; prefix >= 8; ++i, prefix -= 8) { + for (; prefix >= 8; --i, prefix -= 8) { if (addr[i] != cidr_addr[i]) { return false; } diff --git a/be/src/vec/runtime/ipv4_value.h b/be/src/vec/runtime/ipv4_value.h index b82bc3489b9..2a5a5ae91ef 100644 --- a/be/src/vec/runtime/ipv4_value.h +++ b/be/src/vec/runtime/ipv4_value.h @@ -57,8 +57,8 @@ public: while (end > begin && std::isspace(ipv4_str[end])) { --end; } - if (!vectorized::parseIPv4whole(ipv4_str + begin, ipv4_str + end + 1, - reinterpret_cast<unsigned char*>(&parse_value))) { + if (!vectorized::parse_ipv4_whole(ipv4_str + begin, ipv4_str + end + 1, + reinterpret_cast<unsigned char*>(&parse_value))) { return false; } value = static_cast<vectorized::IPv4>(parse_value); @@ -74,7 +74,7 @@ public: char* start = buf; char* end = buf; const auto* src = reinterpret_cast<const unsigned char*>(&value); - vectorized::formatIPv4(src, end); + vectorized::format_ipv4(src, end); size_t len = end - start; return {buf, len}; } @@ -92,8 +92,8 @@ public: while (end > begin && std::isspace(ipv4_str[end])) { --end; } - return vectorized::parseIPv4whole(ipv4_str + begin, ipv4_str + end + 1, - reinterpret_cast<unsigned char*>(&parse_value)); + return vectorized::parse_ipv4_whole(ipv4_str + begin, ipv4_str + end + 1, + reinterpret_cast<unsigned char*>(&parse_value)); } private: diff --git a/be/src/vec/runtime/ipv6_value.h b/be/src/vec/runtime/ipv6_value.h index 58e6be3a8ad..953b71baf84 100644 --- a/be/src/vec/runtime/ipv6_value.h +++ b/be/src/vec/runtime/ipv6_value.h @@ -54,8 +54,9 @@ public: while (end > begin && std::isspace(ipv6_str[end])) { --end; } - return vectorized::parseIPv6whole(ipv6_str + begin, ipv6_str + end + 1, - reinterpret_cast<unsigned char*>(&value)); + // parse and store in little-endian + return vectorized::parse_ipv6_whole(ipv6_str + begin, ipv6_str + end + 1, + reinterpret_cast<unsigned char*>(&value)); } static bool from_string(vectorized::IPv6& value, const std::string& ipv6_str) { @@ -68,8 +69,9 @@ public: char buf[IPV6_MAX_TEXT_LENGTH + 1]; char* start = buf; char* end = buf; - const auto* src = reinterpret_cast<const unsigned char*>(&value); - vectorized::formatIPv6(src, end); + auto* src = reinterpret_cast<unsigned char*>(&value); + // load and format in little-endian + vectorized::format_ipv6(src, end); size_t len = end - start; return {buf, len}; } @@ -87,8 +89,8 @@ public: while (end > begin && std::isspace(ipv6_str[end])) { --end; } - return vectorized::parseIPv6whole(ipv6_str + begin, ipv6_str + end + 1, - reinterpret_cast<unsigned char*>(&value)); + return vectorized::parse_ipv6_whole(ipv6_str + begin, ipv6_str + end + 1, + reinterpret_cast<unsigned char*>(&value)); } private: diff --git a/be/test/vec/runtime/ip_value_test.cpp b/be/test/vec/runtime/ip_value_test.cpp new file mode 100644 index 00000000000..cbeebcf24b6 --- /dev/null +++ b/be/test/vec/runtime/ip_value_test.cpp @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> + +#include <cstdint> +#include <iostream> + +#include "gtest/gtest_pred_impl.h" +#include "vec/common/ipv6_to_binary.h" +#include "vec/runtime/ipv4_value.h" +#include "vec/runtime/ipv6_value.h" + +namespace doris { + +class IPv4Value; +class IPv6Value; + +// util function +template <typename T> +static void print_bytes(T num) { + auto* byte_ptr = reinterpret_cast<uint8_t*>(&num); + + std::cout << "low -> "; + + for (size_t i = 0; i < sizeof(T); ++i) { + std::cout << std::hex << static_cast<int>(byte_ptr[i]) << " "; + } + + std::cout << " -> high"; + std::cout << std::dec << std::endl; +} + +TEST(IPValueTest, IPv4ValueTest) { + const std::string ipv4_str1 = "192.168.103.254"; + const std::string ipv4_str2 = "193.168.103.255"; + vectorized::IPv4 ipv4_val1; + vectorized::IPv4 ipv4_val2; + ASSERT_TRUE(IPv4Value::from_string(ipv4_val1, ipv4_str1.c_str(), ipv4_str1.size())); + ASSERT_TRUE(IPv4Value::from_string(ipv4_val2, ipv4_str2.c_str(), ipv4_str2.size())); + ASSERT_TRUE(ipv4_val1 < ipv4_val2); + print_bytes(ipv4_val1); + print_bytes(ipv4_val2); + std::string ipv4_format1 = IPv4Value::to_string(ipv4_val1); + std::string ipv4_format2 = IPv4Value::to_string(ipv4_val2); + ASSERT_EQ(ipv4_str1, ipv4_format1); + ASSERT_EQ(ipv4_str2, ipv4_format2); +} + +TEST(IPValueTest, IPv6ValueTest) { + const std::string ipv6_str1 = "2001:418:0:5000::c2d"; + const std::string ipv6_str2 = "2001:428::205:171:200:230"; + vectorized::IPv6 ipv6_val1; + vectorized::IPv6 ipv6_val2; + ASSERT_TRUE(IPv6Value::from_string(ipv6_val1, ipv6_str1.c_str(), ipv6_str1.size())); + ASSERT_TRUE(IPv6Value::from_string(ipv6_val2, ipv6_str2.c_str(), ipv6_str2.size())); + ASSERT_TRUE(ipv6_val1 < ipv6_val2); + print_bytes(ipv6_val1); + print_bytes(ipv6_val2); + std::string ipv6_format1 = IPv6Value::to_string(ipv6_val1); + std::string ipv6_format2 = IPv6Value::to_string(ipv6_val2); + ASSERT_EQ(ipv6_str1, ipv6_format1); + ASSERT_EQ(ipv6_str2, ipv6_format2); +} + +static void apply_cidr_mask(const char* __restrict src, char* __restrict dst_lower, + char* __restrict dst_upper, vectorized::UInt8 bits_to_keep) { + const auto& mask = vectorized::get_cidr_mask_ipv6(bits_to_keep); + + for (int8_t i = IPV6_BINARY_LENGTH - 1; i >= 0; --i) { + dst_lower[i] = src[i] & mask[i]; + dst_upper[i] = dst_lower[i] | ~mask[i]; + } +} + +TEST(IPValueTest, IPv6CIDRTest) { + const std::string ipv6_str1 = "2001:0db8:0000:85a3:0000:0000:ac1f:8001"; + const std::string ipv6_str2 = "2001:0db8:0000:85a3:ffff:ffff:ffff:ffff"; + vectorized::IPv6 ipv6_val1; // little-endian + vectorized::IPv6 ipv6_val2; // little-endian + ASSERT_TRUE(IPv6Value::from_string(ipv6_val1, ipv6_str1.c_str(), ipv6_str1.size())); + ASSERT_TRUE(IPv6Value::from_string(ipv6_val2, ipv6_str2.c_str(), ipv6_str2.size())); + vectorized::IPv6 min_range1, max_range1; + vectorized::IPv6 min_range2, max_range2; + apply_cidr_mask(reinterpret_cast<const char*>(&ipv6_val1), reinterpret_cast<char*>(&min_range1), + reinterpret_cast<char*>(&max_range1), 0); + apply_cidr_mask(reinterpret_cast<const char*>(&ipv6_val2), reinterpret_cast<char*>(&min_range2), + reinterpret_cast<char*>(&max_range2), 32); + print_bytes(min_range1); + print_bytes(max_range1); + print_bytes(min_range2); + print_bytes(max_range2); + std::string min_range_format1 = IPv6Value::to_string(min_range1); + std::string max_range_format1 = IPv6Value::to_string(max_range1); + std::string min_range_format2 = IPv6Value::to_string(min_range2); + std::string max_range_format2 = IPv6Value::to_string(max_range2); + ASSERT_EQ(min_range_format1, "::"); + ASSERT_EQ(max_range_format1, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"); + ASSERT_EQ(min_range_format2, "2001:db8::"); + ASSERT_EQ(max_range_format2, "2001:db8:ffff:ffff:ffff:ffff:ffff:ffff"); +} + +} // namespace doris \ No newline at end of file diff --git a/regression-test/data/datatype_p0/ip/test_ip_basic.out b/regression-test/data/datatype_p0/ip/test_ip_basic.out index 0262959d83e..6b17d85b832 100644 --- a/regression-test/data/datatype_p0/ip/test_ip_basic.out +++ b/regression-test/data/datatype_p0/ip/test_ip_basic.out @@ -129,3 +129,228 @@ ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 127.0.0.1 9 255.255.255.255 16 +-- !sql15 -- +1.55.138.211 +1.80.2.248 +1.83.241.116 +14.204.253.158 +23.238.237.26 +36.79.88.107 +36.82.224.170 +36.183.59.29 +36.183.126.25 +36.186.75.121 +36.189.46.88 +42.115.43.114 +42.117.228.166 +45.3.47.158 +58.16.171.31 +58.19.84.138 +59.50.185.152 +59.52.3.168 +61.58.96.173 +62.151.203.189 +64.77.240.1 +66.150.171.196 +67.22.254.206 +67.55.90.43 +69.57.193.230 +104.24.237.140 +104.25.84.59 +104.27.46.146 +104.149.148.137 +107.160.215.141 +111.4.229.190 +111.4.242.106 +111.14.182.156 +111.46.39.248 +111.53.182.225 +111.56.27.171 +111.56.188.125 +112.15.128.113 +112.33.13.212 +112.40.38.145 +113.56.43.134 +113.56.44.105 +113.142.1.1 +115.0.24.191 +115.4.78.200 +115.6.31.95 +115.6.63.224 +115.6.104.199 +115.9.240.116 +115.11.21.200 +115.13.39.164 +115.14.93.25 +115.19.192.159 +115.22.20.223 +115.159.103.38 +116.2.211.43 +116.66.238.92 +116.97.76.104 +116.106.34.242 +116.252.54.207 +116.253.40.133 +117.32.14.179 +117.33.191.217 +117.150.98.199 +119.36.22.147 +120.192.122.34 +120.193.17.57 +120.239.82.212 +121.25.82.29 +121.25.160.80 +121.28.97.113 +122.10.93.66 +142.252.102.78 +142.254.161.133 +144.12.164.251 +153.35.83.233 +162.144.2.57 +171.225.130.45 +171.228.0.153 +171.229.231.90 +171.234.78.185 +180.244.18.143 +180.253.57.249 +182.30.107.86 +183.211.172.143 +183.212.25.70 +183.245.137.140 +183.247.232.58 +192.220.125.142 +199.17.84.108 +202.198.37.147 +203.126.212.37 +209.131.3.252 +210.66.18.184 +210.183.19.113 +211.97.110.141 +218.42.159.17 +220.200.1.22 +222.216.51.186 +223.73.153.243 + +-- !sql16 -- +2001:418:0:5000::c2d +2001:428::205:171:200:230 +2001:438:ffff::407d:1bc1 +2001:450:2001:1000:0:40:6924:23 +2001:470:0:1fa::2 +2001:470:0:327::1 +2001:470:1:248::2 +2001:470:1:89e::2 +2001:470:1:946::2 +2001:500:55::1 +2001:550:0:1000::9a18:292a +2001:550:0:1000::9a36:2a61 +2001:550:2:29::2c9:1 +2001:550:2:48::34:1 +2001:578:20::d +2001:578:201:1::601:9 +2001:578:400:4:2000::19 +2001:578:1400:4::9d +2001:579:6f05:500:9934:5b3e:b7fe:1447 +2001:5b0:23ff:fffa::113 +2001:668:0:3::f000:c2 +2001:67c:754::1 +2001:67c:24e4:4::250 +2001:688:0:2:1::9e +2001:920:1833::1 +2001:c20:4800::175 +2001:fb1:fe0:9::8 +2001:13c7:6014::1 +2001:16a0:2:200a::2 +2001:1a98:6677::9d9d:140a +2001:1af8:4040::12 +2001:1b70:a1:610::b102:2 +2001:2000:3018:3b::1 +2001:2000:3080:80::2 +2001:2000:3080:10ca::2 +2001:2000:3080:1351::2 +2001:41a8:400:2::13a +2001:44c8:129:2632:33:0:252:2 +2001:4888::342:1:0:0 +2001:4888:1f:e891:161:26:: +2001:48f8:1000:1::16 +2003:0:1203:4001::1 +2400:bf40::1 +2400:c700:0:158:: +2400:dd00:1:200a::2 +2400:dd0b:1003::2 +2401:7400:8888:2::8 +2402:800:63ff:10::7:2 +2402:800:63ff:40::1 +2402:28c0:100:ffff:ffff:ffff:ffff:ffff +2402:7800:40:2::62 +2403:5000:171:46::2 +2403:e800:200:102::2 +2404:c600:1000:2::1d1 +2405:4800::3221:3621:2 +2405:9800:9800:66::2 +2408:8000:c000::1 +2600:140e:6::1 +2604:7e00::105d +2605:3e00::1:2:2 +2606:2800:602a::1 +2606:2800:602c:b::d004 +2606:2b00::1 +2607:f0d0:2:2::243 +2607:f290::1 +2607:fa70:3:33::2 +2607:fc68:0:4:0:2:2711:21 +2610:18:181:4000::66 +2620:44:a000::1 +2800:630:4010:8::2 +2800:c20:1141::8 +2804:64:0:25::1 +2804:a8:2:c8::d6 +2804:158c::1 +2a00:18e0:0:bb04::82 +2a00:de00::1 +2a01:348::e:1:1 +2a01:8840:16::1 +2a01:b740:a09::1 +2a02:23f0:ffff:8::5 +2a02:2698:5000::1e06 +2a02:2a38:37:5::2 +2a02:2e00:2080:f000:0:261:1:11 +2a02:aa08:e000:3100::2 +2a02:b000:fff::524 +2a02:e980:b::1 +2a02:e980:19::1 +2a02:e980:1e::1 +2a02:e980:83:5b09:ecb8:c669:b336:650e +2a03:4200:441:2::4e +2a03:9d40:fe00:5:: +2a04:2dc0::16d +2a07:a343:f210::1 +2a0a:6f40:2::1 +2a0b:7086:fff0::1 +2a0c:3240::1 +2a0c:f743::1 +2c0f:fa18:0:4::b +2c0f:fc98:1200::2 +2c0f:feb0:1:2::d1 + +-- !sql17 -- +115.13.39.164 2001:c20:4800::175 +115.14.93.25 2400:dd0b:1003::2 +115.19.192.159 2a02:e980:b::1 +115.22.20.223 2001:450:2001:1000:0:40:6924:23 +115.159.103.38 2001:428::205:171:200:230 +116.2.211.43 2a0b:7086:fff0::1 +116.66.238.92 2800:c20:1141::8 +116.97.76.104 2a03:4200:441:2::4e +116.106.34.242 2a01:8840:16::1 +116.252.54.207 2401:7400:8888:2::8 +116.253.40.133 2606:2b00::1 +117.32.14.179 2001:48f8:1000:1::16 +117.33.191.217 2001:67c:754::1 +117.150.98.199 2a01:b740:a09::1 +119.36.22.147 2001:4888:1f:e891:161:26:: +120.192.122.34 2a07:a343:f210::1 +120.193.17.57 2606:2800:602a::1 +120.239.82.212 2a0c:3240::1 +121.25.82.29 2620:44:a000::1 +121.25.160.80 2001:418:0:5000::c2d diff --git a/regression-test/data/query_p0/sql_functions/ip_functions/test_ip_functions.out b/regression-test/data/query_p0/sql_functions/ip_functions/test_ip_functions.out index 6bdc72df39f..799d441a70a 100644 --- a/regression-test/data/query_p0/sql_functions/ip_functions/test_ip_functions.out +++ b/regression-test/data/query_p0/sql_functions/ip_functions/test_ip_functions.out @@ -212,4 +212,19 @@ true \N -- !sql -- -\N \ No newline at end of file +\N + +-- !sql -- +::ffff:192.168.0.1 + +-- !sql -- +::ffff:10.0.5.9 + +-- !sql -- +::ffff:192.168.0.1 + +-- !sql -- +:: + +-- !sql -- +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff diff --git a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy index 586f3d15774..f5b2620e967 100644 --- a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy +++ b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy @@ -19,10 +19,12 @@ suite("test_ip_basic") { sql """ DROP TABLE IF EXISTS t0 """ sql """ DROP TABLE IF EXISTS t1 """ + sql """ DROP TABLE IF EXISTS t2 """ sql """ SET enable_nereids_planner=true """ sql """ SET enable_fallback_to_original_planner=false """ + // create table t0 sql """ CREATE TABLE `t0` ( `id` int, @@ -36,6 +38,7 @@ suite("test_ip_basic") { ); """ + // create table t1 sql """ CREATE TABLE `t1` ( `id` int, @@ -49,6 +52,21 @@ suite("test_ip_basic") { ); """ + // create table t2 + sql """ + CREATE TABLE `t2` ( + `id` int, + `ip_v4` ipv4, + `ip_v6` ipv6 + ) ENGINE=OLAP + DISTRIBUTED BY HASH(`id`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_duplicate_without_keys_by_default" = "true" + ); + """ + + // insert data into t0 sql """ insert into t0 values (0, NULL, NULL), @@ -64,6 +82,7 @@ suite("test_ip_basic") { (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'); """ + // insert data into t1 sql """ insert into t1 values (0, NULL, NULL), @@ -79,6 +98,22 @@ suite("test_ip_basic") { (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'); """ + // insert data into t2 + streamLoad { + db 'regression_test_datatype_p0_ip' + table 't2' + + set 'column_separator', ',' + + file 'test_data/test.csv' + + time 10000 // limit inflight 10s + + // stream load action will check result, include Success status, and NumberTotalRows == NumberLoadedRows + } + + sql """sync""" + // order by qt_sql1 "select ip_v4 from t0 order by ip_v4" qt_sql2 "select ip_v6 from t0 order by ip_v6" @@ -101,6 +136,12 @@ suite("test_ip_basic") { // join and group by qt_sql14 "select t0.ip_v4, count(*) as cnt from t0 join t1 on t0.ip_v4=t1.ip_v4 and t0.ip_v6=t1.ip_v6 group by t0.ip_v4 order by cnt" + // order by + qt_sql15 "select ip_v4 from t2 order by ip_v4" + qt_sql16 "select ip_v6 from t2 order by ip_v6" + qt_sql17 "select ip_v4, ip_v6 from t2 order by ip_v4, ip_v6 limit 20 offset 50" + sql "DROP TABLE t0" sql "DROP TABLE t1" + sql "DROP TABLE t2" } diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy index 64074ec2e88..333e9e8a4b8 100644 --- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy @@ -94,4 +94,10 @@ suite("test_ip_functions", "arrow_flight_sql") { qt_sql "SELECT is_ipv4_mapped(inet6_aton(''));" qt_sql "SELECT is_ipv4_mapped(inet6_aton(NULL));" qt_sql "SELECT is_ipv4_mapped(inet6_aton('KK'));" + + qt_sql "SELECT ipv6_num_to_string(ipv6_string_to_num('192.168.0.1'));" + qt_sql "SELECT ipv6_num_to_string(ipv6_string_to_num('::ffff:10.0.5.9'));" + qt_sql "SELECT ipv6_num_to_string(ipv6_string_to_num('::ffff:c0a8:0001'));" + qt_sql "SELECT ipv6_num_to_string(ipv6_string_to_num('::'));" + qt_sql "SELECT ipv6_num_to_string(ipv6_string_to_num('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'));" } diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy index 7b3c5107fa5..9b19c22bc2c 100644 --- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy @@ -50,6 +50,7 @@ suite("test_ipv6_cidr_to_range_function") { qt_sql "select id, struct_element(ipv6_cidr_to_range(addr, cidr), 'min') as min_range, struct_element(ipv6_cidr_to_range(addr, cidr), 'max') as max_range from test_ipv6_cidr_to_range_function order by id" sql """ DROP TABLE IF EXISTS test_ipv6_cidr_to_range_function """ + sql """ DROP TABLE IF EXISTS test_str_cidr_to_range_function """ sql """ CREATE TABLE test_str_cidr_to_range_function ( diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy index d1283bd8391..685ca9154de 100644 --- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy @@ -34,32 +34,35 @@ suite("test_is_ip_address_in_range_function") { ); """ - sql "insert into test_is_ip_address_in_range_function values(1, '127.0.0.1', '127.0.0.0/8')" - sql "insert into test_is_ip_address_in_range_function values(2, '128.0.0.1', '127.0.0.0/8')" - sql "insert into test_is_ip_address_in_range_function values(3, 'ffff::1', 'ffff::/16')" - sql "insert into test_is_ip_address_in_range_function values(4, 'fffe::1', 'ffff::/16')" - sql "insert into test_is_ip_address_in_range_function values(5, '192.168.99.255', '192.168.100.0/22')" - sql "insert into test_is_ip_address_in_range_function values(6, '192.168.100.1', '192.168.100.0/22')" - sql "insert into test_is_ip_address_in_range_function values(7, '192.168.103.255', '192.168.100.0/22')" - sql "insert into test_is_ip_address_in_range_function values(8, '192.168.104.0', '192.168.100.0/22')" - sql "insert into test_is_ip_address_in_range_function values(9, '::192.168.99.255', '::192.168.100.0/118')" - sql "insert into test_is_ip_address_in_range_function values(10, '::192.168.100.1', '::192.168.100.0/118')" - sql "insert into test_is_ip_address_in_range_function values(11, '::192.168.103.255', '::192.168.100.0/118')" - sql "insert into test_is_ip_address_in_range_function values(12, '::192.168.104.0', '::192.168.100.0/118')" - sql "insert into test_is_ip_address_in_range_function values(13, '192.168.100.1', '192.168.100.0/22')" - sql "insert into test_is_ip_address_in_range_function values(14, '192.168.100.1', '192.168.100.0/24')" - sql "insert into test_is_ip_address_in_range_function values(15, '192.168.100.1', '192.168.100.0/32')" - sql "insert into test_is_ip_address_in_range_function values(16, '::192.168.100.1', '::192.168.100.0/118')" - sql "insert into test_is_ip_address_in_range_function values(17, '::192.168.100.1', '::192.168.100.0/120')" - sql "insert into test_is_ip_address_in_range_function values(18, '::192.168.100.1', '::192.168.100.0/128')" - sql "insert into test_is_ip_address_in_range_function values(19, '192.168.100.1', '192.168.100.0/22')" - sql "insert into test_is_ip_address_in_range_function values(20, '192.168.103.255', '192.168.100.0/24')" - sql "insert into test_is_ip_address_in_range_function values(21, '::192.168.100.1', '::192.168.100.0/118')" - sql "insert into test_is_ip_address_in_range_function values(22, '::192.168.103.255', '::192.168.100.0/120')" - sql "insert into test_is_ip_address_in_range_function values(23, '127.0.0.1', 'ffff::/16')" - sql "insert into test_is_ip_address_in_range_function values(24, '127.0.0.1', '::127.0.0.1/128')" - sql "insert into test_is_ip_address_in_range_function values(25, '::1', '127.0.0.0/8')" - sql "insert into test_is_ip_address_in_range_function values(26, '::127.0.0.1', '127.0.0.1/32')" + sql """ + insert into test_is_ip_address_in_range_function values + (1, '127.0.0.1', '127.0.0.0/8'), + (2, '128.0.0.1', '127.0.0.0/8'), + (3, 'ffff::1', 'ffff::/16'), + (4, 'fffe::1', 'ffff::/16'), + (5, '192.168.99.255', '192.168.100.0/22'), + (6, '192.168.100.1', '192.168.100.0/22'), + (7, '192.168.103.255', '192.168.100.0/22'), + (8, '192.168.104.0', '192.168.100.0/22'), + (9, '::192.168.99.255', '::192.168.100.0/118'), + (10, '::192.168.100.1', '::192.168.100.0/118'), + (11, '::192.168.103.255', '::192.168.100.0/118'), + (12, '::192.168.104.0', '::192.168.100.0/118'), + (13, '192.168.100.1', '192.168.100.0/22'), + (14, '192.168.100.1', '192.168.100.0/24'), + (15, '192.168.100.1', '192.168.100.0/32'), + (16, '::192.168.100.1', '::192.168.100.0/118'), + (17, '::192.168.100.1', '::192.168.100.0/120'), + (18, '::192.168.100.1', '::192.168.100.0/128'), + (19, '192.168.100.1', '192.168.100.0/22'), + (20, '192.168.103.255', '192.168.100.0/24'), + (21, '::192.168.100.1', '::192.168.100.0/118'), + (22, '::192.168.103.255', '::192.168.100.0/120'), + (23, '127.0.0.1', 'ffff::/16'), + (24, '127.0.0.1', '::127.0.0.1/128'), + (25, '::1', '127.0.0.0/8'), + (26, '::127.0.0.1', '127.0.0.1/32') + """ qt_sql "select id, is_ip_address_in_range(addr, cidr) from test_is_ip_address_in_range_function order by id" --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
