v6 datatype and add some basic regression test cases (#30261)

yiguolei Wed, 31 Jan 2024 07:54:20 -0800

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


commit 221308f78ac5ad18b1a5da0f4b5af8e3fe77199e
Author: yangshijie <sjyang2...@zju.edu.cn>
AuthorDate: Tue Jan 30 17:29:04 2024 +0800

    [fix](datatype) fix bugs for IPv4/v6 datatype and add some basic regression 
test cases (#30261)
---
 be/src/exec/olap_common.h                          |   6 +
 be/src/olap/types.h                                |  14 +--
 be/src/runtime/primitive_type.h                    |   4 +-
 be/src/runtime/runtime_predicate.cpp               |   8 ++
 be/src/runtime/runtime_predicate.h                 |  10 ++
 .../aggregate_function_min_max.cpp                 |  10 ++
 be/src/vec/columns/column_vector.cpp               |   3 +-
 be/src/vec/columns/column_vector.h                 |  21 ++++
 be/src/vec/core/field.h                            |  20 +++-
 be/src/vec/core/types.h                            |  19 ++-
 be/src/vec/core/wide_integer_impl.h                |   3 +-
 be/src/vec/data_types/convert_field_to_type.cpp    |   1 +
 be/src/vec/data_types/data_type_number_base.cpp    |   6 +-
 .../data_types/serde/data_type_number_serde.cpp    |   9 +-
 .../vec/data_types/serde/data_type_number_serde.h  |   2 +
 be/src/vec/functions/functions_comparison.h        |   4 +
 be/src/vec/io/io_helper.h                          |   6 +-
 be/src/vec/olap/olap_data_convertor.cpp            |   4 +-
 .../sql-manual/sql-reference/Data-Types/IPV4.md    |   1 -
 .../sql-manual/sql-reference/Data-Types/IPV6.md    |   3 +-
 .../sql-manual/sql-reference/Data-Types/IPV4.md    |   1 -
 .../sql-manual/sql-reference/Data-Types/IPV6.md    |   3 +-
 .../data/datatype_p0/ip/test_ip_basic.out          | 131 +++++++++++++++++++++
 .../suites/datatype_p0/ip/test_ip_basic.groovy     | 106 +++++++++++++++++
 24 files changed, 363 insertions(+), 32 deletions(-)

diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index cdca939c6ed..0ea771deb88 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -42,6 +42,8 @@
 #include "runtime/type_limit.h"
 #include "vec/core/types.h"
 #include "vec/io/io_helper.h"
+#include "vec/runtime/ipv4_value.h"
+#include "vec/runtime/ipv6_value.h"
 #include "vec/runtime/vdatetime_value.h"
 
 namespace doris {
@@ -68,6 +70,10 @@ std::string cast_to_string(T value, int scale) {
         std::stringstream ss;
         ss << buf;
         return ss.str();
+    } else if constexpr (primitive_type == TYPE_IPV4) {
+        return IPv4Value::to_string(value);
+    } else if constexpr (primitive_type == TYPE_IPV6) {
+        return IPv6Value::to_string(value);
     } else {
         return boost::lexical_cast<std::string>(value);
     }
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index 0b2be2c38bb..74aa86fc49e 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -85,7 +85,7 @@ public:
 
     virtual void direct_copy(void* dest, const void* src) const = 0;
 
-    // Use only in zone map to cut 
data.StringParser::string_to_unsigned_int<uint32_t>
+    // Use only in zone map to cut data.
     virtual void direct_copy_may_cut(void* dest, const void* src) const = 0;
 
     virtual Status from_string(void* buf, const std::string& scan_key, const 
int precision = 0,
@@ -790,8 +790,6 @@ struct BaseFieldTypeTraits : public 
CppTypeTraits<field_type> {
     static inline CppType get_cpp_type_value(const void* address) {
         if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT) {
             return get_int128_from_unalign(address);
-        } else if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_IPV6) {
-            return get_int128_from_unalign(address);
         }
         return *reinterpret_cast<const CppType*>(address);
     }
@@ -1011,27 +1009,27 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_IPV6>
         : public BaseFieldTypeTraits<FieldType::OLAP_FIELD_TYPE_IPV6> {
     static Status from_string(void* buf, const std::string& scan_key, const 
int precision,
                               const int scale) {
-        int128_t value;
+        uint128_t value;
         if (!IPv6Value::from_string(value, scan_key)) {
             return Status::Error<ErrorCode::INVALID_ARGUMENT>(
                     "FieldTypeTraits<OLAP_FIELD_TYPE_IPV6>::from_string meet 
PARSE_FAILURE");
         }
-        memcpy(buf, &value, sizeof(int128_t));
+        memcpy(buf, &value, sizeof(uint128_t));
         return Status::OK();
     }
 
     static std::string to_string(const void* src) {
-        int128_t value = *reinterpret_cast<const int128_t*>(src);
+        uint128_t value = *reinterpret_cast<const uint128_t*>(src);
         IPv6Value ipv6_value(value);
         return ipv6_value.to_string();
     }
 
     static void set_to_max(void* buf) {
-        *reinterpret_cast<int128_t*>(buf) = -1; // ::1
+        *reinterpret_cast<int128_t*>(buf) = -1; // 
ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
     }
 
     static void set_to_min(void* buf) {
-        *reinterpret_cast<int128_t*>(buf) = 0; // ::
+        *reinterpret_cast<uint128_t*>(buf) = 0; // ::
     }
 };
 
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 6040f73388b..d6a4354c9a6 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -243,13 +243,13 @@ struct PrimitiveTypeTraits<TYPE_LARGEINT> {
 template <>
 struct PrimitiveTypeTraits<TYPE_IPV4> {
     using CppType = vectorized::IPv4;
-    using StorageFieldType = uint32_t;
+    using StorageFieldType = CppType;
     using ColumnType = vectorized::ColumnIPv4;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_IPV6> {
     using CppType = vectorized::IPv6;
-    using StorageFieldType = uint64_t;
+    using StorageFieldType = CppType;
     using ColumnType = vectorized::ColumnIPv6;
 };
 template <>
diff --git a/be/src/runtime/runtime_predicate.cpp 
b/be/src/runtime/runtime_predicate.cpp
index 627e22364e5..5141b65978d 100644
--- a/be/src/runtime/runtime_predicate.cpp
+++ b/be/src/runtime/runtime_predicate.cpp
@@ -115,6 +115,14 @@ Status RuntimePredicate::init(const PrimitiveType type, 
const bool nulls_first)
         _get_value_fn = get_decimal256_value;
         break;
     }
+    case PrimitiveType::TYPE_IPV4: {
+        _get_value_fn = get_ipv4_value;
+        break;
+    }
+    case PrimitiveType::TYPE_IPV6: {
+        _get_value_fn = get_ipv6_value;
+        break;
+    }
     default:
         return Status::InvalidArgument("unsupported runtime predicate type 
{}", type);
     }
diff --git a/be/src/runtime/runtime_predicate.h 
b/be/src/runtime/runtime_predicate.h
index 80742fa66dc..297d90979ec 100644
--- a/be/src/runtime/runtime_predicate.h
+++ b/be/src/runtime/runtime_predicate.h
@@ -179,6 +179,16 @@ private:
         auto v = field.get<DecimalField<Decimal256>>();
         return cast_to_string<TYPE_DECIMAL256, ValueType>(v.get_value(), 
v.get_scale());
     }
+
+    static std::string get_ipv4_value(const Field& field) {
+        using ValueType = typename PrimitiveTypeTraits<TYPE_IPV4>::CppType;
+        return cast_to_string<TYPE_IPV4, ValueType>(field.get<ValueType>(), 0);
+    }
+
+    static std::string get_ipv6_value(const Field& field) {
+        using ValueType = typename PrimitiveTypeTraits<TYPE_IPV6>::CppType;
+        return cast_to_string<TYPE_IPV6, ValueType>(field.get<ValueType>(), 0);
+    }
 };
 
 } // namespace vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp
index 27afac7789c..8aa8850a314 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp
@@ -68,6 +68,16 @@ AggregateFunctionPtr 
create_aggregate_function_single_value(const String& name,
                 
AggregateFunctionsSingleValue<Data<SingleValueDataFixed<UInt64>>>>(
                 argument_types, result_is_nullable);
     }
+    if (which.idx == TypeIndex::IPv4) {
+        return creator_without_type::create<
+                
AggregateFunctionsSingleValue<Data<SingleValueDataFixed<IPv4>>>>(
+                argument_types, result_is_nullable);
+    }
+    if (which.idx == TypeIndex::IPv6) {
+        return creator_without_type::create<
+                
AggregateFunctionsSingleValue<Data<SingleValueDataFixed<IPv6>>>>(
+                argument_types, result_is_nullable);
+    }
     return nullptr;
 }
 
diff --git a/be/src/vec/columns/column_vector.cpp 
b/be/src/vec/columns/column_vector.cpp
index 7329f898044..05dd3d2ddeb 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -577,7 +577,7 @@ void ColumnVector<T>::replace_column_null_data(const 
uint8_t* __restrict null_ma
 /// Explicit template instantiations - to avoid code bloat in headers.
 template class ColumnVector<UInt8>;
 template class ColumnVector<UInt16>;
-template class ColumnVector<UInt32>;
+template class ColumnVector<UInt32>; // IPv4
 template class ColumnVector<UInt64>;
 template class ColumnVector<UInt128>;
 template class ColumnVector<Int8>;
@@ -587,4 +587,5 @@ template class ColumnVector<Int64>;
 template class ColumnVector<Int128>;
 template class ColumnVector<Float32>;
 template class ColumnVector<Float64>;
+template class ColumnVector<IPv6>; // IPv6
 } // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index 384a8daa1c7..c75d7db6054 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -67,6 +67,10 @@ namespace doris::vectorized {
   * Integer values are compared as usual.
   * Floating-point numbers are compared this way that NaNs always end up at 
the end
   *  (if you don't do this, the sort would not work at all).
+  * Due to IPv4 being a Little-Endian storage, comparing UInt32 is equivalent 
to comparing IPv4.
+  * However, IPv6 is a Big-Endian storage, and comparing IPv6 is not 
equivalent to comparing uint128_t.
+  * So we should use std::memcmp to start comparing from low bytes to high 
bytes.
+  *  (e.g. :: < ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff)
   */
 template <typename T>
 struct CompareHelper {
@@ -126,6 +130,23 @@ struct CompareHelper<Float32> : public 
FloatCompareHelper<Float32> {};
 template <>
 struct CompareHelper<Float64> : public FloatCompareHelper<Float64> {};
 
+struct IPv6CompareHelper {
+    static bool less(IPv6 a, IPv6 b, int /*nan_direction_hint*/) {
+        return std::memcmp(&a, &b, sizeof(IPv6)) < 0;
+    }
+
+    static bool greater(IPv6 a, IPv6 b, int /*nan_direction_hint*/) {
+        return std::memcmp(&a, &b, sizeof(IPv6)) > 0;
+    }
+
+    static int compare(IPv6 a, IPv6 b, int /*nan_direction_hint*/) {
+        return std::memcmp(&a, &b, sizeof(IPv6));
+    }
+};
+
+template <>
+struct CompareHelper<IPv6> : public IPv6CompareHelper {};
+
 /** A template for columns that use a simple array to store.
  */
 template <typename T>
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 7897f990ae8..6361df6e788 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -341,6 +341,7 @@ public:
             UInt128 = 4,
             Int128 = 5,
             FixedLengthObject = 6,
+            IPv6 = 7,
 
             /// Non-POD types.
 
@@ -359,7 +360,7 @@ public:
             HyperLogLog = 28,
             QuantileState = 29,
             Int256 = 30,
-            Decimal256 = 31,
+            Decimal256 = 31
         };
 
         static const int MIN_NON_POD = 16;
@@ -408,6 +409,8 @@ public:
                 return "HyperLogLog";
             case QuantileState:
                 return "QuantileState";
+            case IPv6:
+                return "IPv6";
             default:
                 LOG(FATAL) << "type not supported, type=" << 
Types::to_string(which);
                 break;
@@ -580,6 +583,8 @@ public:
             return get<Int64>() <=> rhs.get<Int64>();
         case Types::Int128:
             return get<Int128>() <=> rhs.get<Int128>();
+        case Types::IPv6:
+            return get<IPv6>() <=> rhs.get<IPv6>();
         case Types::Float64:
             return get<Float64>() < rhs.get<Float64>()    ? 
std::strong_ordering::less
                    : get<Float64>() == rhs.get<Float64>() ? 
std::strong_ordering::equal
@@ -622,6 +627,9 @@ public:
         case Types::Int128:
             f(field.template get<Int128>());
             return;
+        case Types::IPv6:
+            f(field.template get<IPv6>());
+            return;
         case Types::Float64:
             f(field.template get<Float64>());
             return;
@@ -675,7 +683,7 @@ public:
 
 private:
     std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which), Null, 
UInt64, UInt128, Int64,
-                         Int128, Float64, String, JsonbField, Array, Tuple, 
Map, VariantMap,
+                         Int128, IPv6, Float64, String, JsonbField, Array, 
Tuple, Map, VariantMap,
                          DecimalField<Decimal32>, DecimalField<Decimal64>,
                          DecimalField<Decimal128V2>, 
DecimalField<Decimal128V3>,
                          DecimalField<Decimal256>, BitmapValue, HyperLogLog, 
QuantileState>
@@ -835,6 +843,10 @@ struct Field::TypeToEnum<Float64> {
     static constexpr Types::Which value = Types::Float64;
 };
 template <>
+struct Field::TypeToEnum<IPv6> {
+    static constexpr Types::Which value = Types::IPv6;
+};
+template <>
 struct Field::TypeToEnum<String> {
     static constexpr Types::Which value = Types::String;
 };
@@ -919,6 +931,10 @@ struct Field::EnumToType<Field::Types::Float64> {
     using Type = Float64;
 };
 template <>
+struct Field::EnumToType<Field::Types::IPv6> {
+    using Type = IPv6;
+};
+template <>
 struct Field::EnumToType<Field::Types::String> {
     using Type = String;
 };
diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h
index 82bc67a8fc5..8899b6ce017 100644
--- a/be/src/vec/core/types.h
+++ b/be/src/vec/core/types.h
@@ -296,10 +296,21 @@ struct TypeId<String> {
 /// Not a data type in database, defined just for convenience.
 using Strings = std::vector<String>;
 
-using Int128 = __int128;
+using IPv4 = uint32_t;
+using IPv6 = uint128_t;
+
+template <>
+inline constexpr bool IsNumber<IPv6> = true;
+template <>
+struct TypeName<IPv6> {
+    static const char* get() { return "IPv6"; }
+};
+template <>
+struct TypeId<IPv6> {
+    static constexpr const TypeIndex value = TypeIndex::IPv6;
+};
 
-using IPv4 = UInt32;
-using IPv6 = Int128;
+using Int128 = __int128;
 
 template <>
 inline constexpr bool IsNumber<Int128> = true;
@@ -514,6 +525,7 @@ struct Decimal {
     explicit(IsInt256) Decimal(Int32 value) noexcept : value(value) {}
     explicit(IsInt256) Decimal(Int64 value) noexcept : value(value) {}
     explicit(IsInt256) Decimal(Int128 value) noexcept : value(value) {}
+    explicit(IsInt256) Decimal(IPv6 value) noexcept : value(value) {}
     explicit(IsInt256) Decimal(wide::Int256 value) noexcept : value(value) {}
     explicit(IsInt256) Decimal(UInt64 value) noexcept : value(value) {}
     explicit(IsInt256) Decimal(UInt32 value) noexcept : value(value) {}
@@ -658,6 +670,7 @@ struct Decimal128V3 : public Decimal<Int128> {
 
     DECLARE_NUMERIC_CTOR(wide::Int256)
     DECLARE_NUMERIC_CTOR(Int128)
+    DECLARE_NUMERIC_CTOR(IPv6)
     DECLARE_NUMERIC_CTOR(Int32)
     DECLARE_NUMERIC_CTOR(Int64)
     DECLARE_NUMERIC_CTOR(UInt32)
diff --git a/be/src/vec/core/wide_integer_impl.h 
b/be/src/vec/core/wide_integer_impl.h
index 20f0f9d4937..2eb9381bd37 100644
--- a/be/src/vec/core/wide_integer_impl.h
+++ b/be/src/vec/core/wide_integer_impl.h
@@ -257,7 +257,8 @@ struct integer<Bits, Signed>::_impl {
     template <typename Integral>
     constexpr static void wide_integer_from_builtin(integer<Bits, Signed>& 
self,
                                                     Integral rhs) noexcept {
-        if constexpr (std::is_same_v<Integral, __int128>) {
+        if constexpr (std::is_same_v<Integral, __int128> ||
+                      std::is_same_v<Integral, unsigned __int128>) {
             self.items[little(0)] = rhs;
             self.items[little(1)] = rhs >> 64;
             if (rhs < 0) {
diff --git a/be/src/vec/data_types/convert_field_to_type.cpp 
b/be/src/vec/data_types/convert_field_to_type.cpp
index 0196d5ede76..3717abc74e1 100644
--- a/be/src/vec/data_types/convert_field_to_type.cpp
+++ b/be/src/vec/data_types/convert_field_to_type.cpp
@@ -117,6 +117,7 @@ public:
     void operator()(const Int128& x, JsonbWriter* writer) const {
         writer->writeInt128(int128_t(x));
     }
+    void operator()(const IPv6& x, JsonbWriter* writer) const { 
writer->writeInt128(int128_t(x)); }
     void operator()(const Int64& x, JsonbWriter* writer) const { 
writer->writeInt64(x); }
     void operator()(const Float64& x, JsonbWriter* writer) const { 
writer->writeDouble(x); }
     void operator()(const String& x, JsonbWriter* writer) const {
diff --git a/be/src/vec/data_types/data_type_number_base.cpp 
b/be/src/vec/data_types/data_type_number_base.cpp
index 662b8e18734..29032915999 100644
--- a/be/src/vec/data_types/data_type_number_base.cpp
+++ b/be/src/vec/data_types/data_type_number_base.cpp
@@ -147,7 +147,8 @@ std::string DataTypeNumberBase<T>::to_string(const IColumn& 
column, size_t row_n
     ColumnPtr ptr = result.first;
     row_num = result.second;
 
-    if constexpr (std::is_same<T, __int128_t>::value || std::is_same<T, 
UInt128>::value) {
+    if constexpr (std::is_same<T, int128_t>::value || std::is_same<T, 
uint128_t>::value ||
+                  std::is_same<T, UInt128>::value) {
         return int128_to_string(assert_cast<const 
ColumnVector<T>&>(*ptr).get_element(row_num));
     } else if constexpr (std::is_integral<T>::value) {
         return std::to_string(assert_cast<const 
ColumnVector<T>&>(*ptr).get_element(row_num));
@@ -264,7 +265,7 @@ bool 
DataTypeNumberBase<T>::is_value_represented_by_unsigned_integer() const {
 /// Explicit template instantiations - to avoid code bloat in headers.
 template class DataTypeNumberBase<UInt8>;
 template class DataTypeNumberBase<UInt16>;
-template class DataTypeNumberBase<UInt32>;
+template class DataTypeNumberBase<UInt32>; // IPv4
 template class DataTypeNumberBase<UInt64>;
 template class DataTypeNumberBase<UInt128>;
 template class DataTypeNumberBase<Int8>;
@@ -274,5 +275,6 @@ template class DataTypeNumberBase<Int64>;
 template class DataTypeNumberBase<Int128>;
 template class DataTypeNumberBase<Float32>;
 template class DataTypeNumberBase<Float64>;
+template class DataTypeNumberBase<IPv6>; // IPv6
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index e9dd7ac094a..5d34ea92ae7 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -64,8 +64,8 @@ using DORIS_NUMERIC_ARROW_BUILDER =
                 arrow::UInt16Builder, Int16, arrow::Int16Builder, UInt32, 
arrow::UInt32Builder,
                 Int32, arrow::Int32Builder, UInt64, arrow::UInt64Builder, 
Int64,
                 arrow::Int64Builder, UInt128, arrow::FixedSizeBinaryBuilder, 
Int128,
-                arrow::FixedSizeBinaryBuilder, Float32, arrow::FloatBuilder, 
Float64,
-                arrow::DoubleBuilder, void,
+                arrow::FixedSizeBinaryBuilder, IPv6, 
arrow::FixedSizeBinaryBuilder, Float32,
+                arrow::FloatBuilder, Float64, arrow::DoubleBuilder, void,
                 void // Add this line to represent the end of the TypeMap
                 >;
 
@@ -84,7 +84,7 @@ void DataTypeNumberSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
                                      end - start,
                                      reinterpret_cast<const 
uint8_t*>(arrow_null_map_data)),
                 column.get_name(), array_builder->type()->name());
-    } else if constexpr (std::is_same_v<T, Int128>) {
+    } else if constexpr (std::is_same_v<T, Int128> || std::is_same_v<T, IPv6>) 
{
         auto& string_builder = 
assert_cast<arrow::StringBuilder&>(*array_builder);
         for (size_t i = start; i < end; ++i) {
             auto& data_value = col_data[i];
@@ -341,7 +341,7 @@ Status DataTypeNumberSerDe<T>::write_column_to_orc(const 
std::string& timezone,
 /// Explicit template instantiations - to avoid code bloat in headers.
 template class DataTypeNumberSerDe<UInt8>;
 template class DataTypeNumberSerDe<UInt16>;
-template class DataTypeNumberSerDe<UInt32>;
+template class DataTypeNumberSerDe<UInt32>; // IPv4
 template class DataTypeNumberSerDe<UInt64>;
 template class DataTypeNumberSerDe<UInt128>;
 template class DataTypeNumberSerDe<Int8>;
@@ -351,5 +351,6 @@ template class DataTypeNumberSerDe<Int64>;
 template class DataTypeNumberSerDe<Int128>;
 template class DataTypeNumberSerDe<Float32>;
 template class DataTypeNumberSerDe<Float64>;
+template class DataTypeNumberSerDe<IPv6>; // IPv6
 } // namespace vectorized
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index 806839c1e0f..cea3d451707 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -48,6 +48,8 @@ class Arena;
 //  DataTypeTime => T:Float64
 //  DataTypeDate => T:Int64
 //  DataTypeDateTime => T:Int64
+//  IPv4 => T:UInt32
+//  IPv6 => T:uint128_t
 template <typename T>
 class DataTypeNumberSerDe : public DataTypeSerDe {
     static_assert(IsNumber<T>);
diff --git a/be/src/vec/functions/functions_comparison.h 
b/be/src/vec/functions/functions_comparison.h
index 6a0f3aa634d..09597336c25 100644
--- a/be/src/vec/functions/functions_comparison.h
+++ b/be/src/vec/functions/functions_comparison.h
@@ -343,6 +343,7 @@ private:
                 execute_num_right_type<T0, Int32>(block, result, col_left, 
col_right_untyped) ||
                 execute_num_right_type<T0, Int64>(block, result, col_left, 
col_right_untyped) ||
                 execute_num_right_type<T0, Int128>(block, result, col_left, 
col_right_untyped) ||
+                execute_num_right_type<T0, IPv6>(block, result, col_left, 
col_right_untyped) ||
                 execute_num_right_type<T0, Float32>(block, result, col_left, 
col_right_untyped) ||
                 execute_num_right_type<T0, Float64>(block, result, col_left, 
col_right_untyped))
                 return true;
@@ -371,6 +372,8 @@ private:
                                                         col_right_untyped) ||
                 execute_num_const_right_type<T0, Int128>(block, result, 
col_left_const,
                                                          col_right_untyped) ||
+                execute_num_const_right_type<T0, IPv6>(block, result, 
col_left_const,
+                                                       col_right_untyped) ||
                 execute_num_const_right_type<T0, Float32>(block, result, 
col_left_const,
                                                           col_right_untyped) ||
                 execute_num_const_right_type<T0, Float64>(block, result, 
col_left_const,
@@ -587,6 +590,7 @@ public:
                                                col_right_untyped) ||
                   execute_num_left_type<Int128>(block, result, 
col_left_untyped,
                                                 col_right_untyped) ||
+                  execute_num_left_type<IPv6>(block, result, col_left_untyped, 
col_right_untyped) ||
                   execute_num_left_type<Float32>(block, result, 
col_left_untyped,
                                                  col_right_untyped) ||
                   execute_num_left_type<Float64>(block, result, 
col_left_untyped,
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index 81425e7bc53..1f7a8ffdf18 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -45,7 +45,11 @@ static constexpr size_t DEFAULT_MAX_STRING_SIZE = 
1073741824; // 1GB
 static constexpr size_t DEFAULT_MAX_JSON_SIZE = 1073741824;   // 1GB
 static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U;
 
-inline std::string int128_to_string(__int128_t value) {
+inline std::string int128_to_string(int128_t value) {
+    return fmt::format(FMT_COMPILE("{}"), value);
+}
+
+inline std::string int128_to_string(uint128_t value) {
     return fmt::format(FMT_COMPILE("{}"), value);
 }
 
diff --git a/be/src/vec/olap/olap_data_convertor.cpp 
b/be/src/vec/olap/olap_data_convertor.cpp
index d2f1d431daa..3a130a638b8 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -158,10 +158,10 @@ 
OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
         return 
std::make_unique<OlapColumnDataConvertorSimple<vectorized::Int128>>();
     }
     case FieldType::OLAP_FIELD_TYPE_IPV4: {
-        return 
std::make_unique<OlapColumnDataConvertorSimple<vectorized::UInt32>>();
+        return 
std::make_unique<OlapColumnDataConvertorSimple<vectorized::IPv4>>();
     }
     case FieldType::OLAP_FIELD_TYPE_IPV6: {
-        return 
std::make_unique<OlapColumnDataConvertorSimple<vectorized::Int128>>();
+        return 
std::make_unique<OlapColumnDataConvertorSimple<vectorized::IPv6>>();
     }
     case FieldType::OLAP_FIELD_TYPE_FLOAT: {
         return 
std::make_unique<OlapColumnDataConvertorSimple<vectorized::Float32>>();
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md 
b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md
index 07ab836e4eb..477371ecc2a 100644
--- a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md
@@ -34,7 +34,6 @@ IPV4
 
 ### description
 
-IPV4
 IPv4 type, stored in the form of UInt32 in 4 bytes, used to represent IPv4 
addresses.
 The range of values is ['0.0.0.0', '255.255.255.255'].
 
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md 
b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md
index dcf46813240..f877bbc2d40 100644
--- a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md
@@ -34,8 +34,7 @@ IPV6
 
 ### description
 
-IPV6
-IPv6 type, stored in Int128 format in 16 bytes, used to represent IPv6 
addresses.
+IPv6 type, stored in UInt128 format in 16 bytes, used to represent IPv6 
addresses.
 The range of values is ['::', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'].
 
 `Inputs that exceed the value range or have invalid format will return NULL`
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md 
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md
index 2766a20580e..f44eb195e0d 100644
--- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md
@@ -34,7 +34,6 @@ IPV4
 
 ### description
 
-IPV4
 IPv4类型，以UInt32的形式存储在4个字节中，用于表示IPv4地址。
 取值范围是 ['0.0.0.0', '255.255.255.255']。
 
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md 
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md
index 678b404a672..591ad1eb125 100644
--- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md
@@ -34,8 +34,7 @@ IPV6
 
 ### description
 
-IPV6
-IPv6类型，以Int128的形式存储在16个字节中，用于表示IPv6地址。
+IPv6类型，以UInt128的形式存储在16个字节中，用于表示IPv6地址。
 取值范围是 ['::', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff']。
 
 `超出取值范围或者格式非法的输入将返回NULL`
diff --git a/regression-test/data/datatype_p0/ip/test_ip_basic.out 
b/regression-test/data/datatype_p0/ip/test_ip_basic.out
new file mode 100644
index 00000000000..0262959d83e
--- /dev/null
+++ b/regression-test/data/datatype_p0/ip/test_ip_basic.out
@@ -0,0 +1,131 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql1 --
+\N
+0.0.0.0
+127.0.0.1
+127.0.0.1
+127.0.0.1
+192.168.0.1
+192.168.0.1
+255.255.255.255
+255.255.255.255
+255.255.255.255
+255.255.255.255
+
+-- !sql2 --
+\N
+::
+::1
+::1
+2001:1b70:a1:610::b102:2
+2001:1b70:a1:610::b102:2
+2001:1b70:a1:610::b102:2
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+
+-- !sql3 --
+255.255.255.255
+255.255.255.255
+255.255.255.255
+255.255.255.255
+192.168.0.1
+192.168.0.1
+127.0.0.1
+127.0.0.1
+127.0.0.1
+0.0.0.0
+\N
+
+-- !sql4 --
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+2001:1b70:a1:610::b102:2
+2001:1b70:a1:610::b102:2
+2001:1b70:a1:610::b102:2
+::1
+::1
+::
+\N
+
+-- !sql5 --
+0.0.0.0
+
+-- !sql6 --
+::
+
+-- !sql7 --
+255.255.255.255
+
+-- !sql8 --
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+
+-- !sql9 --
+\N     0
+0.0.0.0        1
+127.0.0.1      3
+192.168.0.1    2
+255.255.255.255        4
+
+-- !sql10 --
+\N     0
+::     1
+::1    2
+2001:1b70:a1:610::b102:2       3
+ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff        4
+
+-- !sql11 --
+0      \N      \N
+1      0.0.0.0 0.0.0.0
+3      127.0.0.1       127.0.0.1
+2      192.168.0.1     192.168.0.1
+4      255.255.255.255 255.255.255.255
+
+-- !sql12 --
+0      \N      \N
+1      ::      ::
+2      ::1     ::1
+3      2001:1b70:a1:610::b102:2        2001:1b70:a1:610::b102:2
+4      ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 
ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+
+-- !sql13 --
+1      0.0.0.0 ::      1       0.0.0.0 ::
+2      192.168.0.1     ::1     2       192.168.0.1     ::1
+2      192.168.0.1     ::1     2       192.168.0.1     ::1
+2      192.168.0.1     ::1     2       192.168.0.1     ::1
+2      192.168.0.1     ::1     2       192.168.0.1     ::1
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+3      127.0.0.1       2001:1b70:a1:610::b102:2        3       127.0.0.1       
2001:1b70:a1:610::b102:2
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+4      255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4       
255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+
+-- !sql14 --
+0.0.0.0        1
+192.168.0.1    4
+127.0.0.1      9
+255.255.255.255        16
+
diff --git a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy 
b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy
new file mode 100644
index 00000000000..586f3d15774
--- /dev/null
+++ b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy
@@ -0,0 +1,106 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_ip_basic") {
+    sql """ DROP TABLE IF EXISTS t0 """
+    sql """ DROP TABLE IF EXISTS t1 """
+
+    sql """ SET enable_nereids_planner=true """
+    sql """ SET enable_fallback_to_original_planner=false """
+
+    sql """
+        CREATE TABLE `t0` (
+          `id` int,
+          `ip_v4` ipv4,
+          `ip_v6` ipv6
+        ) ENGINE=OLAP
+        DISTRIBUTED BY HASH(`id`) BUCKETS 4
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "enable_duplicate_without_keys_by_default" = "true"
+        );
+        """
+
+    sql """
+        CREATE TABLE `t1` (
+          `id` int,
+          `ip_v4` ipv4,
+          `ip_v6` ipv6
+        ) ENGINE=OLAP
+        DISTRIBUTED BY HASH(`id`) BUCKETS 4
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "enable_duplicate_without_keys_by_default" = "true"
+        );
+        """
+
+    sql """
+        insert into t0 values
+        (0, NULL, NULL),
+        (1, '0.0.0.0', '::'),
+        (2, '192.168.0.1', '::1'),
+        (2, '192.168.0.1', '::1'),
+        (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'),
+        (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'),
+        (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff');
+        """
+
+    sql """
+        insert into t1 values
+        (0, NULL, NULL),
+        (1, '0.0.0.0', '::'),
+        (2, '192.168.0.1', '::1'),
+        (2, '192.168.0.1', '::1'),
+        (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'),
+        (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'),
+        (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'),
+        (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff');
+        """
+
+    // order by
+    qt_sql1 "select ip_v4 from t0 order by ip_v4"
+    qt_sql2 "select ip_v6 from t0 order by ip_v6"
+    qt_sql3 "select ip_v4 from t0 order by ip_v4 desc"
+    qt_sql4 "select ip_v6 from t0 order by ip_v6 desc"
+    qt_sql5 "select ip_v4 from t0 order by ip_v4 limit 1 offset 1"
+    qt_sql6 "select ip_v6 from t0 order by ip_v6 limit 1 offset 1"
+    qt_sql7 "select ip_v4 from t0 order by ip_v4 desc limit 1 offset 1"
+    qt_sql8 "select ip_v6 from t0 order by ip_v6 desc limit 1 offset 1"
+
+    // group by and agg
+    qt_sql9 "select ip_v4, count(ip_v4) as cnt from t0 group by ip_v4 order by 
ip_v4"
+    qt_sql10 "select ip_v6, count(ip_v6) as cnt from t0 group by ip_v6 order 
by ip_v6"
+    qt_sql11 "select count(ip_v4) as cnt, min(ip_v4), max(ip_v4) from t0 group 
by ip_v4 order by min(ip_v4)"
+    qt_sql12 "select count(ip_v6) as cnt, min(ip_v6), max(ip_v6) from t0 group 
by ip_v6 order by min(ip_v6)"
+
+    // join
+    qt_sql13 "select t0.id, t0.ip_v4, t0.ip_v6, t1.id, t1.ip_v4, t1.ip_v6 from 
t0 join t1 on t0.ip_v4=t1.ip_v4 and t0.ip_v6=t1.ip_v6 order by t0.id, t1.id"
+
+    // join and group by
+    qt_sql14 "select t0.ip_v4, count(*) as cnt from t0 join t1 on 
t0.ip_v4=t1.ip_v4 and t0.ip_v6=t1.ip_v6 group by t0.ip_v4 order by cnt"
+
+    sql "DROP TABLE t0"
+    sql "DROP TABLE t1"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris) 05/37: [fix](datatype) fix bugs for IPv4/v6 datatype and add some basic regression test cases (#30261)

Reply via email to