This is an automated email from the ASF dual-hosted git repository. starocean999 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 0c9c32c52d7 [Feature](datatype) update be ut codes and fix bugs for IPv4/v6 (#28670) 0c9c32c52d7 is described below commit 0c9c32c52d7605f1bd23b9f24eadecd809813714 Author: yangshijie <sjyang2...@zju.edu.cn> AuthorDate: Wed Dec 20 14:38:46 2023 +0800 [Feature](datatype) update be ut codes and fix bugs for IPv4/v6 (#28670) --- be/src/olap/rowset/segment_v2/encoding_info.cpp | 2 + be/src/olap/types.h | 4 +- be/src/vec/data_types/data_type_ipv4.cpp | 43 ++++---------------- be/src/vec/data_types/data_type_ipv4.h | 3 -- be/src/vec/data_types/data_type_ipv6.cpp | 23 ++++------- be/src/vec/data_types/data_type_ipv6.h | 5 +-- be/test/vec/data_types/from_string_test.cpp | 53 +++++++++++++++++++++++++ 7 files changed, 74 insertions(+), 59 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index ecf127e27a1..f10aba5cd3b 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -330,6 +330,8 @@ EncodingInfoResolver::EncodingInfoResolver() { _add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL256, BIT_SHUFFLE, true>(); _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, BIT_SHUFFLE>(); + _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, PLAIN_ENCODING>(); + _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, BIT_SHUFFLE, true>(); _add_map<FieldType::OLAP_FIELD_TYPE_IPV6, BIT_SHUFFLE>(); _add_map<FieldType::OLAP_FIELD_TYPE_IPV6, PLAIN_ENCODING>(); diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 0701aca675a..0b2be2c38bb 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -791,7 +791,7 @@ struct BaseFieldTypeTraits : public CppTypeTraits<field_type> { if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT) { return get_int128_from_unalign(address); } else if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_IPV6) { - return get_uint128_from_unalign(address); + return get_int128_from_unalign(address); } return *reinterpret_cast<const CppType*>(address); } @@ -1016,7 +1016,7 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_IPV6> return Status::Error<ErrorCode::INVALID_ARGUMENT>( "FieldTypeTraits<OLAP_FIELD_TYPE_IPV6>::from_string meet PARSE_FAILURE"); } - *reinterpret_cast<int128_t*>(buf) = value; + memcpy(buf, &value, sizeof(int128_t)); return Status::OK(); } diff --git a/be/src/vec/data_types/data_type_ipv4.cpp b/be/src/vec/data_types/data_type_ipv4.cpp index 90a88aa6fc7..963a1adf82e 100644 --- a/be/src/vec/data_types/data_type_ipv4.cpp +++ b/be/src/vec/data_types/data_type_ipv4.cpp @@ -37,8 +37,9 @@ std::string DataTypeIPv4::to_string(const IColumn& column, size_t row_num) const auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; - IPv4 value = assert_cast<const ColumnIPv4&>(*ptr).get_element(row_num); - return convert_ipv4_to_string(value); + IPv4 ipv4_val = assert_cast<const ColumnIPv4&>(*ptr).get_element(row_num); + auto value = IPv4Value(ipv4_val); + return value.to_string(); } void DataTypeIPv4::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { @@ -48,43 +49,15 @@ void DataTypeIPv4::to_string(const IColumn& column, size_t row_num, BufferWritab Status DataTypeIPv4::from_string(ReadBuffer& rb, IColumn* column) const { auto* column_data = static_cast<ColumnIPv4*>(column); - StringParser::ParseResult result; - IPv4 val = StringParser::string_to_unsigned_int<IPv4>(rb.position(), rb.count(), &result); + IPv4 val = 0; + if (!read_ipv4_text_impl<IPv4>(val, rb)) { + return Status::InvalidArgument("parse ipv4 fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } column_data->insert_value(val); return Status::OK(); } -std::string DataTypeIPv4::convert_ipv4_to_string(IPv4 ipv4) { - std::stringstream ss; - ss << ((ipv4 >> 24) & 0xFF) << '.' << ((ipv4 >> 16) & 0xFF) << '.' << ((ipv4 >> 8) & 0xFF) - << '.' << (ipv4 & 0xFF); - return ss.str(); -} - -bool DataTypeIPv4::convert_string_to_ipv4(IPv4& x, std::string ipv4) { - const static int IPV4_PARTS_NUM = 4; - IPv4 parts[IPV4_PARTS_NUM]; - int part_index = 0; - std::stringstream ss(ipv4); - std::string part; - StringParser::ParseResult result; - - while (std::getline(ss, part, '.')) { - IPv4 val = StringParser::string_to_unsigned_int<IPv4>(part.data(), part.size(), &result); - if (UNLIKELY(result != StringParser::PARSE_SUCCESS) || val > 255) { - return false; - } - parts[part_index++] = val; - } - - if (part_index != 4) { - return false; - } - - x = (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]; - return true; -} - MutableColumnPtr DataTypeIPv4::create_column() const { return ColumnIPv4::create(); } diff --git a/be/src/vec/data_types/data_type_ipv4.h b/be/src/vec/data_types/data_type_ipv4.h index d2bd3e487c9..89ac8e18447 100644 --- a/be/src/vec/data_types/data_type_ipv4.h +++ b/be/src/vec/data_types/data_type_ipv4.h @@ -57,9 +57,6 @@ public: void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; Status from_string(ReadBuffer& rb, IColumn* column) const override; - static std::string convert_ipv4_to_string(IPv4 ipv4); - static bool convert_string_to_ipv4(IPv4& x, std::string ipv4); - Field get_field(const TExprNode& node) const override { return (IPv4)node.ipv4_literal.value; } MutableColumnPtr create_column() const override; diff --git a/be/src/vec/data_types/data_type_ipv6.cpp b/be/src/vec/data_types/data_type_ipv6.cpp index d54a0f48464..78b8e8e07d7 100755 --- a/be/src/vec/data_types/data_type_ipv6.cpp +++ b/be/src/vec/data_types/data_type_ipv6.cpp @@ -37,8 +37,9 @@ std::string DataTypeIPv6::to_string(const IColumn& column, size_t row_num) const auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; - IPv6 value = assert_cast<const ColumnIPv6&>(*ptr).get_element(row_num); - return convert_ipv6_to_string(value); + IPv6 ipv6_val = assert_cast<const ColumnIPv6&>(*ptr).get_element(row_num); + auto value = IPv6Value(ipv6_val); + return value.to_string(); } void DataTypeIPv6::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { @@ -48,23 +49,15 @@ void DataTypeIPv6::to_string(const IColumn& column, size_t row_num, BufferWritab Status DataTypeIPv6::from_string(ReadBuffer& rb, IColumn* column) const { auto* column_data = static_cast<ColumnIPv6*>(column); - IPv6 value; - if (!convert_string_to_ipv6(value, rb.to_string())) { - throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, - "Invalid value: {} for type IPv6", rb.to_string()); + IPv6 val = 0; + if (!read_ipv6_text_impl<IPv6>(val, rb)) { + return Status::InvalidArgument("parse ipv6 fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); } - column_data->insert_value(value); + column_data->insert_value(val); return Status::OK(); } -std::string DataTypeIPv6::convert_ipv6_to_string(IPv6 ipv6) { - return IPv6Value::to_string(ipv6); -} - -bool DataTypeIPv6::convert_string_to_ipv6(IPv6& x, std::string ipv6) { - return IPv6Value::from_string(x, ipv6); -} - MutableColumnPtr DataTypeIPv6::create_column() const { return ColumnIPv6::create(); } diff --git a/be/src/vec/data_types/data_type_ipv6.h b/be/src/vec/data_types/data_type_ipv6.h index f849dab98ac..87236c5592f 100755 --- a/be/src/vec/data_types/data_type_ipv6.h +++ b/be/src/vec/data_types/data_type_ipv6.h @@ -56,12 +56,9 @@ public: void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; Status from_string(ReadBuffer& rb, IColumn* column) const override; - static std::string convert_ipv6_to_string(IPv6 ipv6); - static bool convert_string_to_ipv6(IPv6& x, std::string ipv6); - Field get_field(const TExprNode& node) const override { IPv6 value; - if (!convert_string_to_ipv6(value, node.ipv6_literal.value)) { + if (!IPv6Value::from_string(value, node.ipv6_literal.value)) { throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Invalid value: {} for type IPv6", node.ipv6_literal.value); } diff --git a/be/test/vec/data_types/from_string_test.cpp b/be/test/vec/data_types/from_string_test.cpp index bbfb7da92a4..05015fcb9ca 100644 --- a/be/test/vec/data_types/from_string_test.cpp +++ b/be/test/vec/data_types/from_string_test.cpp @@ -283,6 +283,59 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) { } } + // ipv4 and ipv6 type + { + typedef std::pair<FieldType, string> FieldType_RandStr; + std::vector<FieldType_RandStr> ip_scala_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "0.0.0.0"), // min case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "127.0.0.1"), // rand case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "255.255.255.255"), // max case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, "::"), // min case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, + "2405:9800:9800:66::2"), // rand case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"), // max case + }; + std::vector<FieldType_RandStr> error_scala_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "255.255.255.256"), // error case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "255.255.255."), // error case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg"), // error case + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffff"), // error case + }; + for (auto pair : ip_scala_field_types) { + auto type = pair.first; + DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); + std::cout << "this type is " << data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + std::unique_ptr<WrapperField> rand_wf(WrapperField::create_by_type(type)); + Status st = rand_wf->from_string(pair.second, 0, 0); + string rand_ip = rand_wf->to_string(); + ReadBuffer rand_rb(rand_ip.data(), rand_ip.size()); + auto col = data_type_ptr->create_column(); + st = data_type_ptr->from_string(rand_rb, col); + EXPECT_EQ(st.ok(), true); + string rand_s_d = data_type_ptr->to_string(*col, 0); + rtrim(rand_ip); + std::cout << "rand(" << rand_ip << ") with data_type_str:" << rand_s_d << std::endl; + EXPECT_EQ(rand_ip, rand_s_d); + } + for (auto pair : error_scala_field_types) { + auto type = pair.first; + DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); + std::cout << "this type is " << data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + std::unique_ptr<WrapperField> rand_wf(WrapperField::create_by_type(type)); + Status st = rand_wf->from_string(pair.second, 0, 0); + EXPECT_EQ(st.ok(), false); + ReadBuffer rand_rb(pair.second.data(), pair.second.size()); + auto col = data_type_ptr->create_column(); + st = data_type_ptr->from_string(rand_rb, col); + EXPECT_EQ(st.ok(), false); + } + } + // null data type { DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org