This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 221308f78ac5ad18b1a5da0f4b5af8e3fe77199e Author: yangshijie <sjyang2...@zju.edu.cn> AuthorDate: Tue Jan 30 17:29:04 2024 +0800 [fix](datatype) fix bugs for IPv4/v6 datatype and add some basic regression test cases (#30261) --- be/src/exec/olap_common.h | 6 + be/src/olap/types.h | 14 +-- be/src/runtime/primitive_type.h | 4 +- be/src/runtime/runtime_predicate.cpp | 8 ++ be/src/runtime/runtime_predicate.h | 10 ++ .../aggregate_function_min_max.cpp | 10 ++ be/src/vec/columns/column_vector.cpp | 3 +- be/src/vec/columns/column_vector.h | 21 ++++ be/src/vec/core/field.h | 20 +++- be/src/vec/core/types.h | 19 ++- be/src/vec/core/wide_integer_impl.h | 3 +- be/src/vec/data_types/convert_field_to_type.cpp | 1 + be/src/vec/data_types/data_type_number_base.cpp | 6 +- .../data_types/serde/data_type_number_serde.cpp | 9 +- .../vec/data_types/serde/data_type_number_serde.h | 2 + be/src/vec/functions/functions_comparison.h | 4 + be/src/vec/io/io_helper.h | 6 +- be/src/vec/olap/olap_data_convertor.cpp | 4 +- .../sql-manual/sql-reference/Data-Types/IPV4.md | 1 - .../sql-manual/sql-reference/Data-Types/IPV6.md | 3 +- .../sql-manual/sql-reference/Data-Types/IPV4.md | 1 - .../sql-manual/sql-reference/Data-Types/IPV6.md | 3 +- .../data/datatype_p0/ip/test_ip_basic.out | 131 +++++++++++++++++++++ .../suites/datatype_p0/ip/test_ip_basic.groovy | 106 +++++++++++++++++ 24 files changed, 363 insertions(+), 32 deletions(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index cdca939c6ed..0ea771deb88 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -42,6 +42,8 @@ #include "runtime/type_limit.h" #include "vec/core/types.h" #include "vec/io/io_helper.h" +#include "vec/runtime/ipv4_value.h" +#include "vec/runtime/ipv6_value.h" #include "vec/runtime/vdatetime_value.h" namespace doris { @@ -68,6 +70,10 @@ std::string cast_to_string(T value, int scale) { std::stringstream ss; ss << buf; return ss.str(); + } else if constexpr (primitive_type == TYPE_IPV4) { + return IPv4Value::to_string(value); + } else if constexpr (primitive_type == TYPE_IPV6) { + return IPv6Value::to_string(value); } else { return boost::lexical_cast<std::string>(value); } diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 0b2be2c38bb..74aa86fc49e 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -85,7 +85,7 @@ public: virtual void direct_copy(void* dest, const void* src) const = 0; - // Use only in zone map to cut data.StringParser::string_to_unsigned_int<uint32_t> + // Use only in zone map to cut data. virtual void direct_copy_may_cut(void* dest, const void* src) const = 0; virtual Status from_string(void* buf, const std::string& scan_key, const int precision = 0, @@ -790,8 +790,6 @@ struct BaseFieldTypeTraits : public CppTypeTraits<field_type> { static inline CppType get_cpp_type_value(const void* address) { if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT) { return get_int128_from_unalign(address); - } else if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_IPV6) { - return get_int128_from_unalign(address); } return *reinterpret_cast<const CppType*>(address); } @@ -1011,27 +1009,27 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_IPV6> : public BaseFieldTypeTraits<FieldType::OLAP_FIELD_TYPE_IPV6> { static Status from_string(void* buf, const std::string& scan_key, const int precision, const int scale) { - int128_t value; + uint128_t value; if (!IPv6Value::from_string(value, scan_key)) { return Status::Error<ErrorCode::INVALID_ARGUMENT>( "FieldTypeTraits<OLAP_FIELD_TYPE_IPV6>::from_string meet PARSE_FAILURE"); } - memcpy(buf, &value, sizeof(int128_t)); + memcpy(buf, &value, sizeof(uint128_t)); return Status::OK(); } static std::string to_string(const void* src) { - int128_t value = *reinterpret_cast<const int128_t*>(src); + uint128_t value = *reinterpret_cast<const uint128_t*>(src); IPv6Value ipv6_value(value); return ipv6_value.to_string(); } static void set_to_max(void* buf) { - *reinterpret_cast<int128_t*>(buf) = -1; // ::1 + *reinterpret_cast<int128_t*>(buf) = -1; // ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff } static void set_to_min(void* buf) { - *reinterpret_cast<int128_t*>(buf) = 0; // :: + *reinterpret_cast<uint128_t*>(buf) = 0; // :: } }; diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 6040f73388b..d6a4354c9a6 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -243,13 +243,13 @@ struct PrimitiveTypeTraits<TYPE_LARGEINT> { template <> struct PrimitiveTypeTraits<TYPE_IPV4> { using CppType = vectorized::IPv4; - using StorageFieldType = uint32_t; + using StorageFieldType = CppType; using ColumnType = vectorized::ColumnIPv4; }; template <> struct PrimitiveTypeTraits<TYPE_IPV6> { using CppType = vectorized::IPv6; - using StorageFieldType = uint64_t; + using StorageFieldType = CppType; using ColumnType = vectorized::ColumnIPv6; }; template <> diff --git a/be/src/runtime/runtime_predicate.cpp b/be/src/runtime/runtime_predicate.cpp index 627e22364e5..5141b65978d 100644 --- a/be/src/runtime/runtime_predicate.cpp +++ b/be/src/runtime/runtime_predicate.cpp @@ -115,6 +115,14 @@ Status RuntimePredicate::init(const PrimitiveType type, const bool nulls_first) _get_value_fn = get_decimal256_value; break; } + case PrimitiveType::TYPE_IPV4: { + _get_value_fn = get_ipv4_value; + break; + } + case PrimitiveType::TYPE_IPV6: { + _get_value_fn = get_ipv6_value; + break; + } default: return Status::InvalidArgument("unsupported runtime predicate type {}", type); } diff --git a/be/src/runtime/runtime_predicate.h b/be/src/runtime/runtime_predicate.h index 80742fa66dc..297d90979ec 100644 --- a/be/src/runtime/runtime_predicate.h +++ b/be/src/runtime/runtime_predicate.h @@ -179,6 +179,16 @@ private: auto v = field.get<DecimalField<Decimal256>>(); return cast_to_string<TYPE_DECIMAL256, ValueType>(v.get_value(), v.get_scale()); } + + static std::string get_ipv4_value(const Field& field) { + using ValueType = typename PrimitiveTypeTraits<TYPE_IPV4>::CppType; + return cast_to_string<TYPE_IPV4, ValueType>(field.get<ValueType>(), 0); + } + + static std::string get_ipv6_value(const Field& field) { + using ValueType = typename PrimitiveTypeTraits<TYPE_IPV6>::CppType; + return cast_to_string<TYPE_IPV6, ValueType>(field.get<ValueType>(), 0); + } }; } // namespace vectorized diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp index 27afac7789c..8aa8850a314 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp @@ -68,6 +68,16 @@ AggregateFunctionPtr create_aggregate_function_single_value(const String& name, AggregateFunctionsSingleValue<Data<SingleValueDataFixed<UInt64>>>>( argument_types, result_is_nullable); } + if (which.idx == TypeIndex::IPv4) { + return creator_without_type::create< + AggregateFunctionsSingleValue<Data<SingleValueDataFixed<IPv4>>>>( + argument_types, result_is_nullable); + } + if (which.idx == TypeIndex::IPv6) { + return creator_without_type::create< + AggregateFunctionsSingleValue<Data<SingleValueDataFixed<IPv6>>>>( + argument_types, result_is_nullable); + } return nullptr; } diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 7329f898044..05dd3d2ddeb 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -577,7 +577,7 @@ void ColumnVector<T>::replace_column_null_data(const uint8_t* __restrict null_ma /// Explicit template instantiations - to avoid code bloat in headers. template class ColumnVector<UInt8>; template class ColumnVector<UInt16>; -template class ColumnVector<UInt32>; +template class ColumnVector<UInt32>; // IPv4 template class ColumnVector<UInt64>; template class ColumnVector<UInt128>; template class ColumnVector<Int8>; @@ -587,4 +587,5 @@ template class ColumnVector<Int64>; template class ColumnVector<Int128>; template class ColumnVector<Float32>; template class ColumnVector<Float64>; +template class ColumnVector<IPv6>; // IPv6 } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 384a8daa1c7..c75d7db6054 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -67,6 +67,10 @@ namespace doris::vectorized { * Integer values are compared as usual. * Floating-point numbers are compared this way that NaNs always end up at the end * (if you don't do this, the sort would not work at all). + * Due to IPv4 being a Little-Endian storage, comparing UInt32 is equivalent to comparing IPv4. + * However, IPv6 is a Big-Endian storage, and comparing IPv6 is not equivalent to comparing uint128_t. + * So we should use std::memcmp to start comparing from low bytes to high bytes. + * (e.g. :: < ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff) */ template <typename T> struct CompareHelper { @@ -126,6 +130,23 @@ struct CompareHelper<Float32> : public FloatCompareHelper<Float32> {}; template <> struct CompareHelper<Float64> : public FloatCompareHelper<Float64> {}; +struct IPv6CompareHelper { + static bool less(IPv6 a, IPv6 b, int /*nan_direction_hint*/) { + return std::memcmp(&a, &b, sizeof(IPv6)) < 0; + } + + static bool greater(IPv6 a, IPv6 b, int /*nan_direction_hint*/) { + return std::memcmp(&a, &b, sizeof(IPv6)) > 0; + } + + static int compare(IPv6 a, IPv6 b, int /*nan_direction_hint*/) { + return std::memcmp(&a, &b, sizeof(IPv6)); + } +}; + +template <> +struct CompareHelper<IPv6> : public IPv6CompareHelper {}; + /** A template for columns that use a simple array to store. */ template <typename T> diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 7897f990ae8..6361df6e788 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -341,6 +341,7 @@ public: UInt128 = 4, Int128 = 5, FixedLengthObject = 6, + IPv6 = 7, /// Non-POD types. @@ -359,7 +360,7 @@ public: HyperLogLog = 28, QuantileState = 29, Int256 = 30, - Decimal256 = 31, + Decimal256 = 31 }; static const int MIN_NON_POD = 16; @@ -408,6 +409,8 @@ public: return "HyperLogLog"; case QuantileState: return "QuantileState"; + case IPv6: + return "IPv6"; default: LOG(FATAL) << "type not supported, type=" << Types::to_string(which); break; @@ -580,6 +583,8 @@ public: return get<Int64>() <=> rhs.get<Int64>(); case Types::Int128: return get<Int128>() <=> rhs.get<Int128>(); + case Types::IPv6: + return get<IPv6>() <=> rhs.get<IPv6>(); case Types::Float64: return get<Float64>() < rhs.get<Float64>() ? std::strong_ordering::less : get<Float64>() == rhs.get<Float64>() ? std::strong_ordering::equal @@ -622,6 +627,9 @@ public: case Types::Int128: f(field.template get<Int128>()); return; + case Types::IPv6: + f(field.template get<IPv6>()); + return; case Types::Float64: f(field.template get<Float64>()); return; @@ -675,7 +683,7 @@ public: private: std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which), Null, UInt64, UInt128, Int64, - Int128, Float64, String, JsonbField, Array, Tuple, Map, VariantMap, + Int128, IPv6, Float64, String, JsonbField, Array, Tuple, Map, VariantMap, DecimalField<Decimal32>, DecimalField<Decimal64>, DecimalField<Decimal128V2>, DecimalField<Decimal128V3>, DecimalField<Decimal256>, BitmapValue, HyperLogLog, QuantileState> @@ -835,6 +843,10 @@ struct Field::TypeToEnum<Float64> { static constexpr Types::Which value = Types::Float64; }; template <> +struct Field::TypeToEnum<IPv6> { + static constexpr Types::Which value = Types::IPv6; +}; +template <> struct Field::TypeToEnum<String> { static constexpr Types::Which value = Types::String; }; @@ -919,6 +931,10 @@ struct Field::EnumToType<Field::Types::Float64> { using Type = Float64; }; template <> +struct Field::EnumToType<Field::Types::IPv6> { + using Type = IPv6; +}; +template <> struct Field::EnumToType<Field::Types::String> { using Type = String; }; diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 82bc67a8fc5..8899b6ce017 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -296,10 +296,21 @@ struct TypeId<String> { /// Not a data type in database, defined just for convenience. using Strings = std::vector<String>; -using Int128 = __int128; +using IPv4 = uint32_t; +using IPv6 = uint128_t; + +template <> +inline constexpr bool IsNumber<IPv6> = true; +template <> +struct TypeName<IPv6> { + static const char* get() { return "IPv6"; } +}; +template <> +struct TypeId<IPv6> { + static constexpr const TypeIndex value = TypeIndex::IPv6; +}; -using IPv4 = UInt32; -using IPv6 = Int128; +using Int128 = __int128; template <> inline constexpr bool IsNumber<Int128> = true; @@ -514,6 +525,7 @@ struct Decimal { explicit(IsInt256) Decimal(Int32 value) noexcept : value(value) {} explicit(IsInt256) Decimal(Int64 value) noexcept : value(value) {} explicit(IsInt256) Decimal(Int128 value) noexcept : value(value) {} + explicit(IsInt256) Decimal(IPv6 value) noexcept : value(value) {} explicit(IsInt256) Decimal(wide::Int256 value) noexcept : value(value) {} explicit(IsInt256) Decimal(UInt64 value) noexcept : value(value) {} explicit(IsInt256) Decimal(UInt32 value) noexcept : value(value) {} @@ -658,6 +670,7 @@ struct Decimal128V3 : public Decimal<Int128> { DECLARE_NUMERIC_CTOR(wide::Int256) DECLARE_NUMERIC_CTOR(Int128) + DECLARE_NUMERIC_CTOR(IPv6) DECLARE_NUMERIC_CTOR(Int32) DECLARE_NUMERIC_CTOR(Int64) DECLARE_NUMERIC_CTOR(UInt32) diff --git a/be/src/vec/core/wide_integer_impl.h b/be/src/vec/core/wide_integer_impl.h index 20f0f9d4937..2eb9381bd37 100644 --- a/be/src/vec/core/wide_integer_impl.h +++ b/be/src/vec/core/wide_integer_impl.h @@ -257,7 +257,8 @@ struct integer<Bits, Signed>::_impl { template <typename Integral> constexpr static void wide_integer_from_builtin(integer<Bits, Signed>& self, Integral rhs) noexcept { - if constexpr (std::is_same_v<Integral, __int128>) { + if constexpr (std::is_same_v<Integral, __int128> || + std::is_same_v<Integral, unsigned __int128>) { self.items[little(0)] = rhs; self.items[little(1)] = rhs >> 64; if (rhs < 0) { diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index 0196d5ede76..3717abc74e1 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -117,6 +117,7 @@ public: void operator()(const Int128& x, JsonbWriter* writer) const { writer->writeInt128(int128_t(x)); } + void operator()(const IPv6& x, JsonbWriter* writer) const { writer->writeInt128(int128_t(x)); } void operator()(const Int64& x, JsonbWriter* writer) const { writer->writeInt64(x); } void operator()(const Float64& x, JsonbWriter* writer) const { writer->writeDouble(x); } void operator()(const String& x, JsonbWriter* writer) const { diff --git a/be/src/vec/data_types/data_type_number_base.cpp b/be/src/vec/data_types/data_type_number_base.cpp index 662b8e18734..29032915999 100644 --- a/be/src/vec/data_types/data_type_number_base.cpp +++ b/be/src/vec/data_types/data_type_number_base.cpp @@ -147,7 +147,8 @@ std::string DataTypeNumberBase<T>::to_string(const IColumn& column, size_t row_n ColumnPtr ptr = result.first; row_num = result.second; - if constexpr (std::is_same<T, __int128_t>::value || std::is_same<T, UInt128>::value) { + if constexpr (std::is_same<T, int128_t>::value || std::is_same<T, uint128_t>::value || + std::is_same<T, UInt128>::value) { return int128_to_string(assert_cast<const ColumnVector<T>&>(*ptr).get_element(row_num)); } else if constexpr (std::is_integral<T>::value) { return std::to_string(assert_cast<const ColumnVector<T>&>(*ptr).get_element(row_num)); @@ -264,7 +265,7 @@ bool DataTypeNumberBase<T>::is_value_represented_by_unsigned_integer() const { /// Explicit template instantiations - to avoid code bloat in headers. template class DataTypeNumberBase<UInt8>; template class DataTypeNumberBase<UInt16>; -template class DataTypeNumberBase<UInt32>; +template class DataTypeNumberBase<UInt32>; // IPv4 template class DataTypeNumberBase<UInt64>; template class DataTypeNumberBase<UInt128>; template class DataTypeNumberBase<Int8>; @@ -274,5 +275,6 @@ template class DataTypeNumberBase<Int64>; template class DataTypeNumberBase<Int128>; template class DataTypeNumberBase<Float32>; template class DataTypeNumberBase<Float64>; +template class DataTypeNumberBase<IPv6>; // IPv6 } // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index e9dd7ac094a..5d34ea92ae7 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -64,8 +64,8 @@ using DORIS_NUMERIC_ARROW_BUILDER = arrow::UInt16Builder, Int16, arrow::Int16Builder, UInt32, arrow::UInt32Builder, Int32, arrow::Int32Builder, UInt64, arrow::UInt64Builder, Int64, arrow::Int64Builder, UInt128, arrow::FixedSizeBinaryBuilder, Int128, - arrow::FixedSizeBinaryBuilder, Float32, arrow::FloatBuilder, Float64, - arrow::DoubleBuilder, void, + arrow::FixedSizeBinaryBuilder, IPv6, arrow::FixedSizeBinaryBuilder, Float32, + arrow::FloatBuilder, Float64, arrow::DoubleBuilder, void, void // Add this line to represent the end of the TypeMap >; @@ -84,7 +84,7 @@ void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, const end - start, reinterpret_cast<const uint8_t*>(arrow_null_map_data)), column.get_name(), array_builder->type()->name()); - } else if constexpr (std::is_same_v<T, Int128>) { + } else if constexpr (std::is_same_v<T, Int128> || std::is_same_v<T, IPv6>) { auto& string_builder = assert_cast<arrow::StringBuilder&>(*array_builder); for (size_t i = start; i < end; ++i) { auto& data_value = col_data[i]; @@ -341,7 +341,7 @@ Status DataTypeNumberSerDe<T>::write_column_to_orc(const std::string& timezone, /// Explicit template instantiations - to avoid code bloat in headers. template class DataTypeNumberSerDe<UInt8>; template class DataTypeNumberSerDe<UInt16>; -template class DataTypeNumberSerDe<UInt32>; +template class DataTypeNumberSerDe<UInt32>; // IPv4 template class DataTypeNumberSerDe<UInt64>; template class DataTypeNumberSerDe<UInt128>; template class DataTypeNumberSerDe<Int8>; @@ -351,5 +351,6 @@ template class DataTypeNumberSerDe<Int64>; template class DataTypeNumberSerDe<Int128>; template class DataTypeNumberSerDe<Float32>; template class DataTypeNumberSerDe<Float64>; +template class DataTypeNumberSerDe<IPv6>; // IPv6 } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 806839c1e0f..cea3d451707 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -48,6 +48,8 @@ class Arena; // DataTypeTime => T:Float64 // DataTypeDate => T:Int64 // DataTypeDateTime => T:Int64 +// IPv4 => T:UInt32 +// IPv6 => T:uint128_t template <typename T> class DataTypeNumberSerDe : public DataTypeSerDe { static_assert(IsNumber<T>); diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h index 6a0f3aa634d..09597336c25 100644 --- a/be/src/vec/functions/functions_comparison.h +++ b/be/src/vec/functions/functions_comparison.h @@ -343,6 +343,7 @@ private: execute_num_right_type<T0, Int32>(block, result, col_left, col_right_untyped) || execute_num_right_type<T0, Int64>(block, result, col_left, col_right_untyped) || execute_num_right_type<T0, Int128>(block, result, col_left, col_right_untyped) || + execute_num_right_type<T0, IPv6>(block, result, col_left, col_right_untyped) || execute_num_right_type<T0, Float32>(block, result, col_left, col_right_untyped) || execute_num_right_type<T0, Float64>(block, result, col_left, col_right_untyped)) return true; @@ -371,6 +372,8 @@ private: col_right_untyped) || execute_num_const_right_type<T0, Int128>(block, result, col_left_const, col_right_untyped) || + execute_num_const_right_type<T0, IPv6>(block, result, col_left_const, + col_right_untyped) || execute_num_const_right_type<T0, Float32>(block, result, col_left_const, col_right_untyped) || execute_num_const_right_type<T0, Float64>(block, result, col_left_const, @@ -587,6 +590,7 @@ public: col_right_untyped) || execute_num_left_type<Int128>(block, result, col_left_untyped, col_right_untyped) || + execute_num_left_type<IPv6>(block, result, col_left_untyped, col_right_untyped) || execute_num_left_type<Float32>(block, result, col_left_untyped, col_right_untyped) || execute_num_left_type<Float64>(block, result, col_left_untyped, diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h index 81425e7bc53..1f7a8ffdf18 100644 --- a/be/src/vec/io/io_helper.h +++ b/be/src/vec/io/io_helper.h @@ -45,7 +45,11 @@ static constexpr size_t DEFAULT_MAX_STRING_SIZE = 1073741824; // 1GB static constexpr size_t DEFAULT_MAX_JSON_SIZE = 1073741824; // 1GB static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U; -inline std::string int128_to_string(__int128_t value) { +inline std::string int128_to_string(int128_t value) { + return fmt::format(FMT_COMPILE("{}"), value); +} + +inline std::string int128_to_string(uint128_t value) { return fmt::format(FMT_COMPILE("{}"), value); } diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index d2f1d431daa..3a130a638b8 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -158,10 +158,10 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co return std::make_unique<OlapColumnDataConvertorSimple<vectorized::Int128>>(); } case FieldType::OLAP_FIELD_TYPE_IPV4: { - return std::make_unique<OlapColumnDataConvertorSimple<vectorized::UInt32>>(); + return std::make_unique<OlapColumnDataConvertorSimple<vectorized::IPv4>>(); } case FieldType::OLAP_FIELD_TYPE_IPV6: { - return std::make_unique<OlapColumnDataConvertorSimple<vectorized::Int128>>(); + return std::make_unique<OlapColumnDataConvertorSimple<vectorized::IPv6>>(); } case FieldType::OLAP_FIELD_TYPE_FLOAT: { return std::make_unique<OlapColumnDataConvertorSimple<vectorized::Float32>>(); diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md index 07ab836e4eb..477371ecc2a 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV4.md @@ -34,7 +34,6 @@ IPV4 ### description -IPV4 IPv4 type, stored in the form of UInt32 in 4 bytes, used to represent IPv4 addresses. The range of values is ['0.0.0.0', '255.255.255.255']. diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md index dcf46813240..f877bbc2d40 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/IPV6.md @@ -34,8 +34,7 @@ IPV6 ### description -IPV6 -IPv6 type, stored in Int128 format in 16 bytes, used to represent IPv6 addresses. +IPv6 type, stored in UInt128 format in 16 bytes, used to represent IPv6 addresses. The range of values is ['::', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff']. `Inputs that exceed the value range or have invalid format will return NULL` diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md index 2766a20580e..f44eb195e0d 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV4.md @@ -34,7 +34,6 @@ IPV4 ### description -IPV4 IPv4类型,以UInt32的形式存储在4个字节中,用于表示IPv4地址。 取值范围是 ['0.0.0.0', '255.255.255.255']。 diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md index 678b404a672..591ad1eb125 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/IPV6.md @@ -34,8 +34,7 @@ IPV6 ### description -IPV6 -IPv6类型,以Int128的形式存储在16个字节中,用于表示IPv6地址。 +IPv6类型,以UInt128的形式存储在16个字节中,用于表示IPv6地址。 取值范围是 ['::', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff']。 `超出取值范围或者格式非法的输入将返回NULL` diff --git a/regression-test/data/datatype_p0/ip/test_ip_basic.out b/regression-test/data/datatype_p0/ip/test_ip_basic.out new file mode 100644 index 00000000000..0262959d83e --- /dev/null +++ b/regression-test/data/datatype_p0/ip/test_ip_basic.out @@ -0,0 +1,131 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +\N +0.0.0.0 +127.0.0.1 +127.0.0.1 +127.0.0.1 +192.168.0.1 +192.168.0.1 +255.255.255.255 +255.255.255.255 +255.255.255.255 +255.255.255.255 + +-- !sql2 -- +\N +:: +::1 +::1 +2001:1b70:a1:610::b102:2 +2001:1b70:a1:610::b102:2 +2001:1b70:a1:610::b102:2 +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff + +-- !sql3 -- +255.255.255.255 +255.255.255.255 +255.255.255.255 +255.255.255.255 +192.168.0.1 +192.168.0.1 +127.0.0.1 +127.0.0.1 +127.0.0.1 +0.0.0.0 +\N + +-- !sql4 -- +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +2001:1b70:a1:610::b102:2 +2001:1b70:a1:610::b102:2 +2001:1b70:a1:610::b102:2 +::1 +::1 +:: +\N + +-- !sql5 -- +0.0.0.0 + +-- !sql6 -- +:: + +-- !sql7 -- +255.255.255.255 + +-- !sql8 -- +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff + +-- !sql9 -- +\N 0 +0.0.0.0 1 +127.0.0.1 3 +192.168.0.1 2 +255.255.255.255 4 + +-- !sql10 -- +\N 0 +:: 1 +::1 2 +2001:1b70:a1:610::b102:2 3 +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 + +-- !sql11 -- +0 \N \N +1 0.0.0.0 0.0.0.0 +3 127.0.0.1 127.0.0.1 +2 192.168.0.1 192.168.0.1 +4 255.255.255.255 255.255.255.255 + +-- !sql12 -- +0 \N \N +1 :: :: +2 ::1 ::1 +3 2001:1b70:a1:610::b102:2 2001:1b70:a1:610::b102:2 +4 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff + +-- !sql13 -- +1 0.0.0.0 :: 1 0.0.0.0 :: +2 192.168.0.1 ::1 2 192.168.0.1 ::1 +2 192.168.0.1 ::1 2 192.168.0.1 ::1 +2 192.168.0.1 ::1 2 192.168.0.1 ::1 +2 192.168.0.1 ::1 2 192.168.0.1 ::1 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +3 127.0.0.1 2001:1b70:a1:610::b102:2 3 127.0.0.1 2001:1b70:a1:610::b102:2 +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 255.255.255.255 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff + +-- !sql14 -- +0.0.0.0 1 +192.168.0.1 4 +127.0.0.1 9 +255.255.255.255 16 + diff --git a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy new file mode 100644 index 00000000000..586f3d15774 --- /dev/null +++ b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy @@ -0,0 +1,106 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_ip_basic") { + sql """ DROP TABLE IF EXISTS t0 """ + sql """ DROP TABLE IF EXISTS t1 """ + + sql """ SET enable_nereids_planner=true """ + sql """ SET enable_fallback_to_original_planner=false """ + + sql """ + CREATE TABLE `t0` ( + `id` int, + `ip_v4` ipv4, + `ip_v6` ipv6 + ) ENGINE=OLAP + DISTRIBUTED BY HASH(`id`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_duplicate_without_keys_by_default" = "true" + ); + """ + + sql """ + CREATE TABLE `t1` ( + `id` int, + `ip_v4` ipv4, + `ip_v6` ipv6 + ) ENGINE=OLAP + DISTRIBUTED BY HASH(`id`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_duplicate_without_keys_by_default" = "true" + ); + """ + + sql """ + insert into t0 values + (0, NULL, NULL), + (1, '0.0.0.0', '::'), + (2, '192.168.0.1', '::1'), + (2, '192.168.0.1', '::1'), + (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'), + (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'), + (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'); + """ + + sql """ + insert into t1 values + (0, NULL, NULL), + (1, '0.0.0.0', '::'), + (2, '192.168.0.1', '::1'), + (2, '192.168.0.1', '::1'), + (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'), + (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'), + (3, '127.0.0.1', '2001:1b70:a1:610::b102:2'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'), + (4, '255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'); + """ + + // order by + qt_sql1 "select ip_v4 from t0 order by ip_v4" + qt_sql2 "select ip_v6 from t0 order by ip_v6" + qt_sql3 "select ip_v4 from t0 order by ip_v4 desc" + qt_sql4 "select ip_v6 from t0 order by ip_v6 desc" + qt_sql5 "select ip_v4 from t0 order by ip_v4 limit 1 offset 1" + qt_sql6 "select ip_v6 from t0 order by ip_v6 limit 1 offset 1" + qt_sql7 "select ip_v4 from t0 order by ip_v4 desc limit 1 offset 1" + qt_sql8 "select ip_v6 from t0 order by ip_v6 desc limit 1 offset 1" + + // group by and agg + qt_sql9 "select ip_v4, count(ip_v4) as cnt from t0 group by ip_v4 order by ip_v4" + qt_sql10 "select ip_v6, count(ip_v6) as cnt from t0 group by ip_v6 order by ip_v6" + qt_sql11 "select count(ip_v4) as cnt, min(ip_v4), max(ip_v4) from t0 group by ip_v4 order by min(ip_v4)" + qt_sql12 "select count(ip_v6) as cnt, min(ip_v6), max(ip_v6) from t0 group by ip_v6 order by min(ip_v6)" + + // join + qt_sql13 "select t0.id, t0.ip_v4, t0.ip_v6, t1.id, t1.ip_v4, t1.ip_v6 from t0 join t1 on t0.ip_v4=t1.ip_v4 and t0.ip_v6=t1.ip_v6 order by t0.id, t1.id" + + // join and group by + qt_sql14 "select t0.ip_v4, count(*) as cnt from t0 join t1 on t0.ip_v4=t1.ip_v4 and t0.ip_v6=t1.ip_v6 group by t0.ip_v4 order by cnt" + + sql "DROP TABLE t0" + sql "DROP TABLE t1" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org