This is an automated email from the ASF dual-hosted git repository.

starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c9c32c52d7 [Feature](datatype) update be ut codes and fix bugs for 
IPv4/v6 (#28670)
0c9c32c52d7 is described below

commit 0c9c32c52d7605f1bd23b9f24eadecd809813714
Author: yangshijie <sjyang2...@zju.edu.cn>
AuthorDate: Wed Dec 20 14:38:46 2023 +0800

    [Feature](datatype) update be ut codes and fix bugs for IPv4/v6 (#28670)
---
 be/src/olap/rowset/segment_v2/encoding_info.cpp |  2 +
 be/src/olap/types.h                             |  4 +-
 be/src/vec/data_types/data_type_ipv4.cpp        | 43 ++++----------------
 be/src/vec/data_types/data_type_ipv4.h          |  3 --
 be/src/vec/data_types/data_type_ipv6.cpp        | 23 ++++-------
 be/src/vec/data_types/data_type_ipv6.h          |  5 +--
 be/test/vec/data_types/from_string_test.cpp     | 53 +++++++++++++++++++++++++
 7 files changed, 74 insertions(+), 59 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp 
b/be/src/olap/rowset/segment_v2/encoding_info.cpp
index ecf127e27a1..f10aba5cd3b 100644
--- a/be/src/olap/rowset/segment_v2/encoding_info.cpp
+++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp
@@ -330,6 +330,8 @@ EncodingInfoResolver::EncodingInfoResolver() {
     _add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL256, BIT_SHUFFLE, true>();
 
     _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, BIT_SHUFFLE>();
+    _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, PLAIN_ENCODING>();
+    _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, BIT_SHUFFLE, true>();
 
     _add_map<FieldType::OLAP_FIELD_TYPE_IPV6, BIT_SHUFFLE>();
     _add_map<FieldType::OLAP_FIELD_TYPE_IPV6, PLAIN_ENCODING>();
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index 0701aca675a..0b2be2c38bb 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -791,7 +791,7 @@ struct BaseFieldTypeTraits : public 
CppTypeTraits<field_type> {
         if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT) {
             return get_int128_from_unalign(address);
         } else if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_IPV6) {
-            return get_uint128_from_unalign(address);
+            return get_int128_from_unalign(address);
         }
         return *reinterpret_cast<const CppType*>(address);
     }
@@ -1016,7 +1016,7 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_IPV6>
             return Status::Error<ErrorCode::INVALID_ARGUMENT>(
                     "FieldTypeTraits<OLAP_FIELD_TYPE_IPV6>::from_string meet 
PARSE_FAILURE");
         }
-        *reinterpret_cast<int128_t*>(buf) = value;
+        memcpy(buf, &value, sizeof(int128_t));
         return Status::OK();
     }
 
diff --git a/be/src/vec/data_types/data_type_ipv4.cpp 
b/be/src/vec/data_types/data_type_ipv4.cpp
index 90a88aa6fc7..963a1adf82e 100644
--- a/be/src/vec/data_types/data_type_ipv4.cpp
+++ b/be/src/vec/data_types/data_type_ipv4.cpp
@@ -37,8 +37,9 @@ std::string DataTypeIPv4::to_string(const IColumn& column, 
size_t row_num) const
     auto result = check_column_const_set_readability(column, row_num);
     ColumnPtr ptr = result.first;
     row_num = result.second;
-    IPv4 value = assert_cast<const ColumnIPv4&>(*ptr).get_element(row_num);
-    return convert_ipv4_to_string(value);
+    IPv4 ipv4_val = assert_cast<const ColumnIPv4&>(*ptr).get_element(row_num);
+    auto value = IPv4Value(ipv4_val);
+    return value.to_string();
 }
 
 void DataTypeIPv4::to_string(const IColumn& column, size_t row_num, 
BufferWritable& ostr) const {
@@ -48,43 +49,15 @@ void DataTypeIPv4::to_string(const IColumn& column, size_t 
row_num, BufferWritab
 
 Status DataTypeIPv4::from_string(ReadBuffer& rb, IColumn* column) const {
     auto* column_data = static_cast<ColumnIPv4*>(column);
-    StringParser::ParseResult result;
-    IPv4 val = StringParser::string_to_unsigned_int<IPv4>(rb.position(), 
rb.count(), &result);
+    IPv4 val = 0;
+    if (!read_ipv4_text_impl<IPv4>(val, rb)) {
+        return Status::InvalidArgument("parse ipv4 fail, string: '{}'",
+                                       std::string(rb.position(), 
rb.count()).c_str());
+    }
     column_data->insert_value(val);
     return Status::OK();
 }
 
-std::string DataTypeIPv4::convert_ipv4_to_string(IPv4 ipv4) {
-    std::stringstream ss;
-    ss << ((ipv4 >> 24) & 0xFF) << '.' << ((ipv4 >> 16) & 0xFF) << '.' << 
((ipv4 >> 8) & 0xFF)
-       << '.' << (ipv4 & 0xFF);
-    return ss.str();
-}
-
-bool DataTypeIPv4::convert_string_to_ipv4(IPv4& x, std::string ipv4) {
-    const static int IPV4_PARTS_NUM = 4;
-    IPv4 parts[IPV4_PARTS_NUM];
-    int part_index = 0;
-    std::stringstream ss(ipv4);
-    std::string part;
-    StringParser::ParseResult result;
-
-    while (std::getline(ss, part, '.')) {
-        IPv4 val = StringParser::string_to_unsigned_int<IPv4>(part.data(), 
part.size(), &result);
-        if (UNLIKELY(result != StringParser::PARSE_SUCCESS) || val > 255) {
-            return false;
-        }
-        parts[part_index++] = val;
-    }
-
-    if (part_index != 4) {
-        return false;
-    }
-
-    x = (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3];
-    return true;
-}
-
 MutableColumnPtr DataTypeIPv4::create_column() const {
     return ColumnIPv4::create();
 }
diff --git a/be/src/vec/data_types/data_type_ipv4.h 
b/be/src/vec/data_types/data_type_ipv4.h
index d2bd3e487c9..89ac8e18447 100644
--- a/be/src/vec/data_types/data_type_ipv4.h
+++ b/be/src/vec/data_types/data_type_ipv4.h
@@ -57,9 +57,6 @@ public:
     void to_string(const IColumn& column, size_t row_num, BufferWritable& 
ostr) const override;
     Status from_string(ReadBuffer& rb, IColumn* column) const override;
 
-    static std::string convert_ipv4_to_string(IPv4 ipv4);
-    static bool convert_string_to_ipv4(IPv4& x, std::string ipv4);
-
     Field get_field(const TExprNode& node) const override { return 
(IPv4)node.ipv4_literal.value; }
 
     MutableColumnPtr create_column() const override;
diff --git a/be/src/vec/data_types/data_type_ipv6.cpp 
b/be/src/vec/data_types/data_type_ipv6.cpp
index d54a0f48464..78b8e8e07d7 100755
--- a/be/src/vec/data_types/data_type_ipv6.cpp
+++ b/be/src/vec/data_types/data_type_ipv6.cpp
@@ -37,8 +37,9 @@ std::string DataTypeIPv6::to_string(const IColumn& column, 
size_t row_num) const
     auto result = check_column_const_set_readability(column, row_num);
     ColumnPtr ptr = result.first;
     row_num = result.second;
-    IPv6 value = assert_cast<const ColumnIPv6&>(*ptr).get_element(row_num);
-    return convert_ipv6_to_string(value);
+    IPv6 ipv6_val = assert_cast<const ColumnIPv6&>(*ptr).get_element(row_num);
+    auto value = IPv6Value(ipv6_val);
+    return value.to_string();
 }
 
 void DataTypeIPv6::to_string(const IColumn& column, size_t row_num, 
BufferWritable& ostr) const {
@@ -48,23 +49,15 @@ void DataTypeIPv6::to_string(const IColumn& column, size_t 
row_num, BufferWritab
 
 Status DataTypeIPv6::from_string(ReadBuffer& rb, IColumn* column) const {
     auto* column_data = static_cast<ColumnIPv6*>(column);
-    IPv6 value;
-    if (!convert_string_to_ipv6(value, rb.to_string())) {
-        throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
-                               "Invalid value: {} for type IPv6", 
rb.to_string());
+    IPv6 val = 0;
+    if (!read_ipv6_text_impl<IPv6>(val, rb)) {
+        return Status::InvalidArgument("parse ipv6 fail, string: '{}'",
+                                       std::string(rb.position(), 
rb.count()).c_str());
     }
-    column_data->insert_value(value);
+    column_data->insert_value(val);
     return Status::OK();
 }
 
-std::string DataTypeIPv6::convert_ipv6_to_string(IPv6 ipv6) {
-    return IPv6Value::to_string(ipv6);
-}
-
-bool DataTypeIPv6::convert_string_to_ipv6(IPv6& x, std::string ipv6) {
-    return IPv6Value::from_string(x, ipv6);
-}
-
 MutableColumnPtr DataTypeIPv6::create_column() const {
     return ColumnIPv6::create();
 }
diff --git a/be/src/vec/data_types/data_type_ipv6.h 
b/be/src/vec/data_types/data_type_ipv6.h
index f849dab98ac..87236c5592f 100755
--- a/be/src/vec/data_types/data_type_ipv6.h
+++ b/be/src/vec/data_types/data_type_ipv6.h
@@ -56,12 +56,9 @@ public:
     void to_string(const IColumn& column, size_t row_num, BufferWritable& 
ostr) const override;
     Status from_string(ReadBuffer& rb, IColumn* column) const override;
 
-    static std::string convert_ipv6_to_string(IPv6 ipv6);
-    static bool convert_string_to_ipv6(IPv6& x, std::string ipv6);
-
     Field get_field(const TExprNode& node) const override {
         IPv6 value;
-        if (!convert_string_to_ipv6(value, node.ipv6_literal.value)) {
+        if (!IPv6Value::from_string(value, node.ipv6_literal.value)) {
             throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
                                    "Invalid value: {} for type IPv6", 
node.ipv6_literal.value);
         }
diff --git a/be/test/vec/data_types/from_string_test.cpp 
b/be/test/vec/data_types/from_string_test.cpp
index bbfb7da92a4..05015fcb9ca 100644
--- a/be/test/vec/data_types/from_string_test.cpp
+++ b/be/test/vec/data_types/from_string_test.cpp
@@ -283,6 +283,59 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
         }
     }
 
+    // ipv4 and ipv6 type
+    {
+        typedef std::pair<FieldType, string> FieldType_RandStr;
+        std::vector<FieldType_RandStr> ip_scala_field_types = {
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "0.0.0.0"), 
        // min case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, 
"127.0.0.1"),       // rand case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, 
"255.255.255.255"), // max case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, "::"),      
        // min case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+                                  "2405:9800:9800:66::2"), // rand case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+                                  "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"), 
// max case
+        };
+        std::vector<FieldType_RandStr> error_scala_field_types = {
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, 
"255.255.255.256"), // error case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, 
"255.255.255."),    // error case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+                                  "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg"), 
// error case
+                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+                                  "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffff"), 
// error case
+        };
+        for (auto pair : ip_scala_field_types) {
+            auto type = pair.first;
+            DataTypePtr data_type_ptr = 
DataTypeFactory::instance().create_data_type(type, 0, 0);
+            std::cout << "this type is " << data_type_ptr->get_name() << ": "
+                      << fmt::format("{}", type) << std::endl;
+            std::unique_ptr<WrapperField> 
rand_wf(WrapperField::create_by_type(type));
+            Status st = rand_wf->from_string(pair.second, 0, 0);
+            string rand_ip = rand_wf->to_string();
+            ReadBuffer rand_rb(rand_ip.data(), rand_ip.size());
+            auto col = data_type_ptr->create_column();
+            st = data_type_ptr->from_string(rand_rb, col);
+            EXPECT_EQ(st.ok(), true);
+            string rand_s_d = data_type_ptr->to_string(*col, 0);
+            rtrim(rand_ip);
+            std::cout << "rand(" << rand_ip << ") with data_type_str:" << 
rand_s_d << std::endl;
+            EXPECT_EQ(rand_ip, rand_s_d);
+        }
+        for (auto pair : error_scala_field_types) {
+            auto type = pair.first;
+            DataTypePtr data_type_ptr = 
DataTypeFactory::instance().create_data_type(type, 0, 0);
+            std::cout << "this type is " << data_type_ptr->get_name() << ": "
+                      << fmt::format("{}", type) << std::endl;
+            std::unique_ptr<WrapperField> 
rand_wf(WrapperField::create_by_type(type));
+            Status st = rand_wf->from_string(pair.second, 0, 0);
+            EXPECT_EQ(st.ok(), false);
+            ReadBuffer rand_rb(pair.second.data(), pair.second.size());
+            auto col = data_type_ptr->create_column();
+            st = data_type_ptr->from_string(rand_rb, col);
+            EXPECT_EQ(st.ok(), false);
+        }
+    }
+
     // null data type
     {
         DataTypePtr data_type_ptr = 
DataTypeFactory::instance().create_data_type(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to