This is an automated email from the ASF dual-hosted git repository. zhangstar333 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c2a23e13f65 [BE](ut) add datatype serde be ut test case (#48431) c2a23e13f65 is described below commit c2a23e13f65196968a1fd94eb24902d271e2122e Author: zhangstar333 <zhangs...@selectdb.com> AuthorDate: Mon Mar 3 15:35:26 2025 +0800 [BE](ut) add datatype serde be ut test case (#48431) ### What problem does this PR solve? Problem Summary: add some serde ut test case about age_state/bitmap/hll/quantilestate/fixed_length_object --- .../vec/data_types/serde/data_type_hll_serde.cpp | 2 +- .../serde/data_type_quantilestate_serde.h | 6 +- .../serde/data_type_serde_agg_state_test.cpp | 439 +++++++++++++++++++++ .../serde/data_type_serde_bitmap_test.cpp | 189 +++++++++ .../data_type_serde_fixed_length_object_test.cpp | 227 +++++++++++ .../data_types/serde/data_type_serde_hll_test.cpp | 199 ++++++++++ .../serde/data_type_serde_quantile_state_test.cpp | 179 +++++++++ 7 files changed, 1238 insertions(+), 3 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index d7c9e7285cd..09b65597dbc 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -121,7 +121,7 @@ void DataTypeHLLSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWrite void DataTypeHLLSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { auto& col = reinterpret_cast<ColumnHLL&>(column); auto blob = static_cast<const JsonbBlobVal*>(arg); - HyperLogLog hyper_log_log(Slice(blob->getBlob())); + HyperLogLog hyper_log_log(Slice(blob->getBlob(), blob->getBlobLen())); col.insert_value(hyper_log_log); } diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index b912257fb72..8920b822d5f 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -98,7 +98,8 @@ public: auto& col = reinterpret_cast<const ColumnQuantileState&>(column); auto& val = const_cast<QuantileState&>(col.get_element(row_num)); size_t actual_size = val.get_serialized_size(); - auto ptr = mem_pool->alloc(actual_size); + auto* ptr = mem_pool->alloc(actual_size); + val.serialize((uint8_t*)ptr); result.writeKey(cast_set<JsonbKeyValue::keyid_type>(col_id)); result.writeStartBinary(); result.writeBinary(reinterpret_cast<const char*>(ptr), actual_size); @@ -109,9 +110,10 @@ public: auto& col = reinterpret_cast<ColumnQuantileState&>(column); auto blob = static_cast<const JsonbBlobVal*>(arg); QuantileState val; - val.deserialize(Slice(blob->getBlob())); + val.deserialize(Slice(blob->getBlob(), blob->getBlobLen())); col.insert_value(val); } + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override { diff --git a/be/test/vec/data_types/serde/data_type_serde_agg_state_test.cpp b/be/test/vec/data_types/serde/data_type_serde_agg_state_test.cpp new file mode 100644 index 00000000000..12d2be8ab02 --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_agg_state_test.cpp @@ -0,0 +1,439 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <arrow/array/builder_base.h> +#include <gtest/gtest.h> + +#include "util/slice.h" +#include "vec/data_types/data_type_agg_state.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/serde/data_type_string_serde.h" + +namespace doris::vectorized { +class AggStateSerdeTest : public ::testing::Test { +public: + void SetUp() override {} + DataTypePtr sub_type = std::make_shared<DataTypeInt32>(); + DataTypes sub_types = {sub_type}; + // DataTypeAggState---> column_fixed_length_object ----> DataTypeStringSerDeBase<ColumnFixedLengthObject> + DataTypePtr datatype_agg_state_count = std::make_shared<DataTypeAggState>( + sub_types, false, "count", BeExecVersionManager::get_newest_version()); + DataTypeSerDeSPtr datatype_agg_state_serde_count = datatype_agg_state_count->get_serde(); + + // DataTypeAggState---> column_string ----> DataTypeStringSerDeBase<ColumnString> + DataTypePtr datatype_agg_state_hll_union = std::make_shared<DataTypeAggState>( + sub_types, false, "hll_union", BeExecVersionManager::get_newest_version()); + DataTypeSerDeSPtr datatype_agg_state_serde_hll_union = + datatype_agg_state_hll_union->get_serde(); +}; + +TEST_F(AggStateSerdeTest, writeColumnToMysql) { + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->insert_default(); + ASSERT_EQ(column_fixed_length->size(), 1); + MysqlRowBuffer<false> mysql_rb; + DataTypeSerDe::FormatOptions options; + options.nested_string_wrapper = "\""; + options.wrapper_len = 1; + options.map_key_delim = ':'; + options.null_format = "null"; + options.null_len = 4; + datatype_agg_state_serde_count->set_return_object_as_string(true); + auto st = datatype_agg_state_serde_count->write_column_to_mysql(*column_fixed_length, mysql_rb, + 0, false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 9); + + column_fixed_length->resize(2); + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + datatype_agg_state_serde_count->set_return_object_as_string(true); + st = datatype_agg_state_serde_count->write_column_to_mysql(*column_fixed_length, mysql_rb, 1, + false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 18); + std::cout << "test write_column_to_mysql success" << std::endl; +} + +TEST_F(AggStateSerdeTest, writeColumnToMysql2) { + auto column_string = ColumnString::create(); + column_string->insert_default(); + ASSERT_EQ(column_string->size(), 1); + MysqlRowBuffer<false> mysql_rb; + DataTypeSerDe::FormatOptions options; + options.nested_string_wrapper = "\""; + options.wrapper_len = 1; + options.map_key_delim = ':'; + options.null_format = "null"; + options.null_len = 4; + datatype_agg_state_serde_hll_union->set_return_object_as_string(true); + auto st = datatype_agg_state_serde_hll_union->write_column_to_mysql(*column_string, mysql_rb, 0, + false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 1); + + std::string str = "123"; + column_string->insert_data(str.c_str(), str.size()); + datatype_agg_state_serde_hll_union->set_return_object_as_string(true); + st = datatype_agg_state_serde_hll_union->write_column_to_mysql(*column_string, mysql_rb, 1, + false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 5); + std::cout << "test write_column_to_mysql2 success" << std::endl; +} + +TEST_F(AggStateSerdeTest, writeOneCellToJsonb) { + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(1); + *((int64_t*)column_fixed_length->get_data().data()) = 123; + ASSERT_EQ(column_fixed_length->size(), 1); + JsonbWriterT<JsonbOutStream> jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + datatype_agg_state_serde_count->write_one_cell_to_jsonb(*column_fixed_length, jsonb_writer, + &pool, 0, 0); + jsonb_writer.writeEndObject(); + + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto* pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + datatype_agg_state_serde_count->read_one_cell_from_jsonb(*column_fixed_length, it->value()); + } + EXPECT_TRUE(column_fixed_length->size() == 2); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(column_fixed_length->get_data_at(1).data), 123); + std::cout << "test write/read_one_cell_to_jsonb success" << std::endl; +} + +TEST_F(AggStateSerdeTest, writeOneCellToJsonb2) { + auto column_string = ColumnString::create(); + std::string str = "123"; + column_string->insert_data(str.c_str(), str.size()); + ASSERT_EQ(column_string->size(), 1); + JsonbWriterT<JsonbOutStream> jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + datatype_agg_state_serde_hll_union->write_one_cell_to_jsonb(*column_string, jsonb_writer, &pool, + 0, 0); + jsonb_writer.writeEndObject(); + + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto* pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + datatype_agg_state_serde_hll_union->read_one_cell_from_jsonb(*column_string, it->value()); + } + EXPECT_TRUE(column_string->size() == 2); + EXPECT_EQ(column_string->get_data_at(1).to_string(), "123"); + std::cout << "test write/read_one_cell_to_jsonb2 success" << std::endl; +} + +TEST_F(AggStateSerdeTest, writeColumnToPb) { + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + PValues pv = PValues(); + Status st = datatype_agg_state_serde_count->write_column_to_pb(*column_fixed_length, pv, 0, + column_fixed_length->size()); + EXPECT_TRUE(st.ok()); + + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + st = datatype_agg_state_serde_count->read_column_from_pb(*except_column, pv); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check pb value from expected column + PValues as_pv = PValues(); + st = datatype_agg_state_serde_count->write_column_to_pb(*except_column, as_pv, 0, + except_column->size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + EXPECT_EQ(pv.bytes_value_size(), as_pv.bytes_value_size()); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)); + } + std::cout << "test write/read_column_to_pb" << std::endl; +} + +TEST_F(AggStateSerdeTest, writeColumnToPb2) { + auto column_string = ColumnString::create(); + std::string str = "11"; + std::string str2 = "22"; + column_string->insert_data(str.c_str(), str.size()); + column_string->insert_data(str2.c_str(), str2.size()); + ASSERT_EQ(column_string->size(), 2); + PValues pv = PValues(); + Status st = datatype_agg_state_serde_hll_union->write_column_to_pb(*column_string, pv, 0, + column_string->size()); + EXPECT_TRUE(st.ok()); + + auto except_column = ColumnString::create(); + st = datatype_agg_state_serde_hll_union->read_column_from_pb(*except_column, pv); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check pb value from expected column + PValues as_pv = PValues(); + st = datatype_agg_state_serde_hll_union->write_column_to_pb(*except_column, as_pv, 0, + except_column->size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + EXPECT_EQ(pv.string_value_size(), as_pv.string_value_size()); + // check column value + for (size_t j = 0; j < column_string->size(); ++j) { + ASSERT_EQ(column_string->operator[](j), except_column->operator[](j)); + } + std::cout << "test write/read_column_to_pb2 success" << std::endl; +} + +TEST_F(AggStateSerdeTest, serializeOneCellToJson) { + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = datatype_agg_state_serde_count->serialize_one_cell_to_json( + *column_fixed_length, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + st = datatype_agg_state_serde_count->serialize_one_cell_to_json(*column_fixed_length, 1, + buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(0).data), 11); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(1).data), 22); + + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + int64_t value = 11; + Slice slice_value((const char*)&value, sizeof(int64_t)); + st = datatype_agg_state_serde_count->deserialize_one_cell_from_json(*except_column, slice_value, + formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + + int64_t value2 = 22; + Slice slice_value2((const char*)&value2, sizeof(int64_t)); + st = datatype_agg_state_serde_count->deserialize_one_cell_from_json( + *except_column, slice_value2, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)) << j; + } + std::cout << "test serialize/deserialize_one_cell_from_json" << std::endl; +} + +TEST_F(AggStateSerdeTest, serializeOneCellToJson2) { + auto column_string = ColumnString::create(); + std::string str = "11"; + std::string str2 = "22"; + column_string->insert_data(str.c_str(), str.size()); + column_string->insert_data(str2.c_str(), str2.size()); + ASSERT_EQ(column_string->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = datatype_agg_state_serde_hll_union->serialize_one_cell_to_json( + *column_string, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + st = datatype_agg_state_serde_hll_union->serialize_one_cell_to_json( + *column_string, 1, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ((ser_col->get_data_at(0).to_string()), "11"); + EXPECT_EQ((ser_col->get_data_at(1).to_string()), "22"); + + auto except_column = ColumnString::create(); + Slice slice_value(ser_col->get_data_at(0).to_slice()); + st = datatype_agg_state_serde_hll_union->deserialize_one_cell_from_json( + *except_column, slice_value, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + + Slice slice_value2(ser_col->get_data_at(1).to_slice()); + st = datatype_agg_state_serde_hll_union->deserialize_one_cell_from_json( + *except_column, slice_value2, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_string->size(); ++j) { + ASSERT_EQ(column_string->get_data_at(j).to_string(), + except_column->get_data_at(j).to_string()) + << j; + } + std::cout << "test serialize/deserialize_one_cell_from_json2 success" << std::endl; +} + +TEST_F(AggStateSerdeTest, serializeColumnToJson) { + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = datatype_agg_state_serde_count->serialize_column_to_json( + *column_fixed_length, 0, 2, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(0).data), 11); + + std::vector<Slice> slices_vec; + uint64_t num_deserialized = 0; + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + int64_t value = 11; + Slice slice1((const char*)&value, sizeof(int64_t)); + slices_vec.emplace_back(slice1); + + int64_t value2 = 22; + Slice slice2((const char*)&value2, sizeof(int64_t)); + slices_vec.emplace_back(slice2); + st = datatype_agg_state_serde_count->deserialize_column_from_json_vector( + *except_column, slices_vec, &num_deserialized, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)) << j; + } + std::cout << "test serialize/deserialize_column_from_json_vector" << std::endl; +} + +TEST_F(AggStateSerdeTest, serializeColumnToJson2) { + auto column_string = ColumnString::create(); + std::string str = "11"; + std::string str2 = "22"; + column_string->insert_data(str.c_str(), str.size()); + column_string->insert_data(str2.c_str(), str2.size()); + std::cout << "asd " << column_string->get_data_at(0).to_string() << std::endl; + ASSERT_EQ(column_string->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = datatype_agg_state_serde_hll_union->serialize_column_to_json( + *column_string, 0, 1, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + st = datatype_agg_state_serde_hll_union->serialize_column_to_json(*column_string, 1, 2, + buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ((ser_col->get_data_at(0).to_string()), "11"); + EXPECT_EQ((ser_col->get_data_at(1).to_string()), "22"); + + std::vector<Slice> slices_vec; + uint64_t num_deserialized = 0; + auto except_column = ColumnString::create(); + slices_vec.emplace_back(ser_col->get_data_at(0).to_slice()); + slices_vec.emplace_back(ser_col->get_data_at(1).to_slice()); + st = datatype_agg_state_serde_hll_union->deserialize_column_from_json_vector( + *except_column, slices_vec, &num_deserialized, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(except_column->size(), 2); + // check column value + for (size_t j = 0; j < column_string->size(); ++j) { + ASSERT_EQ(column_string->get_data_at(j).to_string(), + except_column->get_data_at(j).to_string()) + << j; + } + std::cout << "test serialize/deserialize_column_from_json_vector2 success" << std::endl; +} + +TEST_F(AggStateSerdeTest, serializeOneCellToHiveText) { + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = datatype_agg_state_serde_count->serialize_one_cell_to_hive_text( + *column_fixed_length, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + st = datatype_agg_state_serde_count->serialize_one_cell_to_hive_text( + *column_fixed_length, 1, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(0).data), 11); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(1).data), 22); + + std::vector<Slice> slices_vec; + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + int64_t value = 11; + Slice slice1((const char*)&value, sizeof(int64_t)); + st = datatype_agg_state_serde_count->deserialize_one_cell_from_hive_text(*except_column, slice1, + formatOptions); + + int64_t value2 = 22; + Slice slice2((const char*)&value2, sizeof(int64_t)); + st = datatype_agg_state_serde_count->deserialize_one_cell_from_hive_text(*except_column, slice2, + formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)) << j; + } + std::cout << "test serialize/deserialize_one_cell_from_hive_text" << std::endl; +} + +TEST_F(AggStateSerdeTest, serializeOneCellToHiveText2) { + auto column_string = ColumnString::create(); + std::string str = "11"; + std::string str2 = "22"; + column_string->insert_data(str.c_str(), str.size()); + column_string->insert_data(str2.c_str(), str2.size()); + ASSERT_EQ(column_string->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = datatype_agg_state_serde_hll_union->serialize_one_cell_to_hive_text( + *column_string, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + st = datatype_agg_state_serde_hll_union->serialize_one_cell_to_hive_text( + *column_string, 1, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ((ser_col->get_data_at(0).to_string()), "11"); + EXPECT_EQ((ser_col->get_data_at(1).to_string()), "22"); + + std::vector<Slice> slices_vec; + auto except_column = ColumnString::create(); + Slice slice1(ser_col->get_data_at(0).to_slice()); + st = datatype_agg_state_serde_hll_union->deserialize_one_cell_from_hive_text( + *except_column, slice1, formatOptions); + + Slice slice2(ser_col->get_data_at(1).to_slice()); + st = datatype_agg_state_serde_hll_union->deserialize_one_cell_from_hive_text( + *except_column, slice2, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_string->size(); ++j) { + ASSERT_EQ(column_string->operator[](j), except_column->operator[](j)) << j; + } + std::cout << "test serialize/deserialize_one_cell_from_hive_text2 success" << std::endl; +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/serde/data_type_serde_bitmap_test.cpp b/be/test/vec/data_types/serde/data_type_serde_bitmap_test.cpp new file mode 100644 index 00000000000..4e428cb53cc --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_bitmap_test.cpp @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <arrow/array/builder_base.h> +#include <gtest/gtest.h> + +#include "util/slice.h" +#include "vec/columns/column_complex.h" +#include "vec/data_types/serde/data_type_bitmap_serde.h" + +namespace doris::vectorized { + +TEST(BitmapSerdeTest, writeColumnToMysql) { + auto bitmap_serde = std::make_shared<vectorized::DataTypeBitMapSerDe>(1); + auto column_bitmap = ColumnBitmap::create(); + column_bitmap->insert_value(BitmapValue::empty_bitmap()); + ASSERT_EQ(column_bitmap->size(), 1); + MysqlRowBuffer<false> mysql_rb; + DataTypeSerDe::FormatOptions options; + options.nested_string_wrapper = "\""; + options.wrapper_len = 1; + options.map_key_delim = ':'; + options.null_format = "null"; + options.null_len = 4; + bitmap_serde->set_return_object_as_string(true); + auto st = bitmap_serde->write_column_to_mysql(*column_bitmap, mysql_rb, 0, false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 2); + + column_bitmap->insert_value(BitmapValue(123)); + bitmap_serde->set_return_object_as_string(true); + st = bitmap_serde->write_column_to_mysql(*column_bitmap, mysql_rb, 1, false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 8); + std::cout << "test write_column_to_mysql success" << std::endl; +} + +TEST(BitmapSerdeTest, writeOneCellToJsonb) { + auto bitmap_serde = std::make_shared<vectorized::DataTypeBitMapSerDe>(1); + auto column_bitmap = ColumnBitmap::create(); + column_bitmap->insert_value(BitmapValue(123)); + ASSERT_EQ(column_bitmap->size(), 1); + JsonbWriterT<JsonbOutStream> jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + bitmap_serde->write_one_cell_to_jsonb(*column_bitmap, jsonb_writer, &pool, 0, 0); + jsonb_writer.writeEndObject(); + + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto* pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + bitmap_serde->read_one_cell_from_jsonb(*column_bitmap, it->value()); + } + EXPECT_TRUE(column_bitmap->size() == 2); + BitmapValue data = column_bitmap->get_element(1); + EXPECT_EQ(data.to_string(), "123"); + std::cout << "test write/read_one_cell_to_jsonb success" << std::endl; +} + +TEST(BitmapSerdeTest, writeColumnToPb) { + auto bitmap_serde = std::make_shared<vectorized::DataTypeBitMapSerDe>(1); + auto column_bitmap = ColumnBitmap::create(); + column_bitmap->insert_value(BitmapValue::empty_bitmap()); + column_bitmap->insert_value(BitmapValue(123)); + ASSERT_EQ(column_bitmap->size(), 2); + PValues pv = PValues(); + Status st = bitmap_serde->write_column_to_pb(*column_bitmap, pv, 0, column_bitmap->size()); + EXPECT_TRUE(st.ok()); + + auto except_column = ColumnBitmap::create(); + st = bitmap_serde->read_column_from_pb(*except_column, pv); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check pb value from expected column + PValues as_pv = PValues(); + st = bitmap_serde->write_column_to_pb(*except_column, as_pv, 0, except_column->size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + EXPECT_EQ(pv.bytes_value_size(), as_pv.bytes_value_size()); + // check column value + for (size_t j = 0; j < column_bitmap->size(); ++j) { + EXPECT_EQ(column_bitmap->get_element(j).to_string(), + except_column->get_element(j).to_string()); + } + std::cout << "test write/read_column_to_pb" << std::endl; +} + +TEST(BitmapSerdeTest, serializeOneCellToJson) { + auto bitmap_serde = std::make_shared<vectorized::DataTypeBitMapSerDe>(1); + auto column_bitmap = ColumnBitmap::create(); + column_bitmap->insert_value(BitmapValue::empty_bitmap()); + column_bitmap->insert_value(BitmapValue(123)); + ASSERT_EQ(column_bitmap->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = bitmap_serde->serialize_one_cell_to_json(*column_bitmap, 0, buffer_writer, + formatOptions); + buffer_writer.commit(); + st = bitmap_serde->serialize_one_cell_to_json(*column_bitmap, 1, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(ser_col->get_data_at(0).to_string(), "\\N") << st.to_string(); + EXPECT_EQ(ser_col->get_data_at(1).to_string(), "\\N") << st.to_string(); + + auto except_column = ColumnBitmap::create(); + std::string memory_buffer; + auto bytesize = column_bitmap->get_element(0).getSizeInBytes(); + memory_buffer.resize(bytesize); + column_bitmap->get_element(0).write_to(const_cast<char*>(memory_buffer.data())); + Slice slice_value(memory_buffer.data(), memory_buffer.size()); + st = bitmap_serde->deserialize_one_cell_from_json(*except_column, slice_value, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + + memory_buffer.clear(); + bytesize = column_bitmap->get_element(1).getSizeInBytes(); + memory_buffer.resize(bytesize); + column_bitmap->get_element(1).write_to(const_cast<char*>(memory_buffer.data())); + slice_value = Slice(memory_buffer.data(), memory_buffer.size()); + st = bitmap_serde->deserialize_one_cell_from_json(*except_column, slice_value, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_bitmap->size(); ++j) { + EXPECT_EQ(column_bitmap->get_element(j).to_string(), + except_column->get_element(j).to_string()); + } + std::cout << "test serialize/deserialize_one_cell_from_json" << std::endl; +} + +TEST(BitmapSerdeTest, serializeColumnToJson) { + auto bitmap_serde = std::make_shared<vectorized::DataTypeBitMapSerDe>(1); + auto column_bitmap = ColumnBitmap::create(); + column_bitmap->insert_value(BitmapValue::empty_bitmap()); + column_bitmap->insert_value(BitmapValue(123)); + ASSERT_EQ(column_bitmap->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = bitmap_serde->serialize_column_to_json(*column_bitmap, 0, 2, buffer_writer, + formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(ser_col->get_data_at(0).to_string(), "\\N,\\N") << st.to_string(); + + std::vector<Slice> slices_vec; + uint64_t num_deserialized = 0; + auto except_column = ColumnBitmap::create(); + std::string memory_buffer; + auto bytesize = column_bitmap->get_element(0).getSizeInBytes(); + memory_buffer.resize(bytesize); + column_bitmap->get_element(0).write_to(const_cast<char*>(memory_buffer.data())); + slices_vec.emplace_back(memory_buffer.data(), memory_buffer.size()); + + std::string memory_buffer2; + memory_buffer2.clear(); + bytesize = column_bitmap->get_element(1).getSizeInBytes(); + memory_buffer2.resize(bytesize); + column_bitmap->get_element(1).write_to(const_cast<char*>(memory_buffer2.data())); + slices_vec.emplace_back(memory_buffer2.data(), memory_buffer2.size()); + st = bitmap_serde->deserialize_column_from_json_vector(*except_column, slices_vec, + &num_deserialized, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_bitmap->size(); ++j) { + EXPECT_EQ(column_bitmap->get_element(j).to_string(), + except_column->get_element(j).to_string()) + << j; + } + std::cout << "test serialize/deserialize_column_from_json_vector" << std::endl; +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/serde/data_type_serde_fixed_length_object_test.cpp b/be/test/vec/data_types/serde/data_type_serde_fixed_length_object_test.cpp new file mode 100644 index 00000000000..74d3c1d19e4 --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_fixed_length_object_test.cpp @@ -0,0 +1,227 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <arrow/array/builder_base.h> +#include <gtest/gtest.h> + +#include <string> + +#include "util/slice.h" +#include "vec/columns/column_fixed_length_object.h" +#include "vec/data_types/serde/data_type_string_serde.h" + +namespace doris::vectorized { + +TEST(FixedLengthObjectSerdeTest, writeColumnToMysql) { + auto fixed_length_serde = std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1); + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->insert_default(); + ASSERT_EQ(column_fixed_length->size(), 1); + MysqlRowBuffer<false> mysql_rb; + DataTypeSerDe::FormatOptions options; + options.nested_string_wrapper = "\""; + options.wrapper_len = 1; + options.map_key_delim = ':'; + options.null_format = "null"; + options.null_len = 4; + fixed_length_serde->set_return_object_as_string(true); + auto st = fixed_length_serde->write_column_to_mysql(*column_fixed_length, mysql_rb, 0, false, + options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 9); + + column_fixed_length->resize(2); + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + fixed_length_serde->set_return_object_as_string(true); + st = fixed_length_serde->write_column_to_mysql(*column_fixed_length, mysql_rb, 1, false, + options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 18); + std::cout << "test write_column_to_mysql success" << std::endl; +} + +TEST(FixedLengthObjectSerdeTest, writeOneCellToJsonb) { + auto fixed_length_serde = std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1); + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(1); + *((int64_t*)column_fixed_length->get_data().data()) = 123; + ASSERT_EQ(column_fixed_length->size(), 1); + JsonbWriterT<JsonbOutStream> jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + fixed_length_serde->write_one_cell_to_jsonb(*column_fixed_length, jsonb_writer, &pool, 0, 0); + jsonb_writer.writeEndObject(); + + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto* pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + fixed_length_serde->read_one_cell_from_jsonb(*column_fixed_length, it->value()); + } + EXPECT_TRUE(column_fixed_length->size() == 2); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(column_fixed_length->get_data_at(1).data), 123); + std::cout << "test write/read_one_cell_to_jsonb success" << std::endl; +} + +TEST(FixedLengthObjectSerdeTest, writeColumnToPb) { + auto fixed_length_serde = std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1); + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + PValues pv = PValues(); + Status st = fixed_length_serde->write_column_to_pb(*column_fixed_length, pv, 0, + column_fixed_length->size()); + EXPECT_TRUE(st.ok()); + + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + st = fixed_length_serde->read_column_from_pb(*except_column, pv); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check pb value from expected column + PValues as_pv = PValues(); + st = fixed_length_serde->write_column_to_pb(*except_column, as_pv, 0, except_column->size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + EXPECT_EQ(pv.bytes_value_size(), as_pv.bytes_value_size()); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)); + } + std::cout << "test write/read_column_to_pb" << std::endl; +} + +TEST(FixedLengthObjectSerdeTest, serializeOneCellToJson) { + auto fixed_length_serde = std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1); + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = fixed_length_serde->serialize_one_cell_to_json(*column_fixed_length, 0, buffer_writer, + formatOptions); + buffer_writer.commit(); + st = fixed_length_serde->serialize_one_cell_to_json(*column_fixed_length, 1, buffer_writer, + formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(0).data), 11); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(1).data), 22); + + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + int64_t value = 11; + Slice slice_value((const char*)&value, sizeof(int64_t)); + st = fixed_length_serde->deserialize_one_cell_from_json(*except_column, slice_value, + formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + + int64_t value2 = 22; + Slice slice_value2((const char*)&value2, sizeof(int64_t)); + st = fixed_length_serde->deserialize_one_cell_from_json(*except_column, slice_value2, + formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)) << j; + } + std::cout << "test serialize/deserialize_one_cell_from_json" << std::endl; +} + +TEST(FixedLengthObjectSerdeTest, serializeColumnToJson) { + auto fixed_length_serde = std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1); + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = fixed_length_serde->serialize_column_to_json(*column_fixed_length, 0, 2, + buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(0).data), 11); + + std::vector<Slice> slices_vec; + uint64_t num_deserialized = 0; + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + int64_t value = 11; + Slice slice1((const char*)&value, sizeof(int64_t)); + slices_vec.emplace_back(slice1); + + int64_t value2 = 22; + Slice slice2((const char*)&value2, sizeof(int64_t)); + slices_vec.emplace_back(slice2); + st = fixed_length_serde->deserialize_column_from_json_vector(*except_column, slices_vec, + &num_deserialized, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)) << j; + } + std::cout << "test serialize/deserialize_column_from_json_vector" << std::endl; +} + +TEST(FixedLengthObjectSerdeTest, serializeOneCellToHiveText) { + auto fixed_length_serde = std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1); + auto column_fixed_length = ColumnFixedLengthObject::create(sizeof(int64_t)); + column_fixed_length->resize(2); + *((int64_t*)column_fixed_length->get_data().data()) = 11; + *((int64_t*)&(column_fixed_length->get_data()[column_fixed_length->item_size()])) = 22; + ASSERT_EQ(column_fixed_length->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = fixed_length_serde->serialize_one_cell_to_hive_text(*column_fixed_length, 0, + buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + st = fixed_length_serde->serialize_one_cell_to_hive_text(*column_fixed_length, 1, buffer_writer, + formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(0).data), 11); + EXPECT_EQ(*reinterpret_cast<const int64_t*>(ser_col->get_data_at(1).data), 22); + + std::vector<Slice> slices_vec; + auto except_column = ColumnFixedLengthObject::create(sizeof(int64_t)); + int64_t value = 11; + Slice slice1((const char*)&value, sizeof(int64_t)); + st = fixed_length_serde->deserialize_one_cell_from_hive_text(*except_column, slice1, + formatOptions); + + int64_t value2 = 22; + Slice slice2((const char*)&value2, sizeof(int64_t)); + st = fixed_length_serde->deserialize_one_cell_from_hive_text(*except_column, slice2, + formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_fixed_length->size(); ++j) { + ASSERT_EQ(column_fixed_length->operator[](j), except_column->operator[](j)) << j; + } + std::cout << "test serialize/deserialize_one_cell_from_hive_text" << std::endl; +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/serde/data_type_serde_hll_test.cpp b/be/test/vec/data_types/serde/data_type_serde_hll_test.cpp new file mode 100644 index 00000000000..d496d22abc2 --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_hll_test.cpp @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <arrow/array/builder_base.h> +#include <gtest/gtest.h> + +#include "olap/hll.h" +#include "util/slice.h" +#include "vec/columns/column_complex.h" +#include "vec/data_types/serde/data_type_hll_serde.h" + +namespace doris::vectorized { + +TEST(HLLSerdeTest, writeColumnToMysql) { + auto hll_serde = std::make_shared<vectorized::DataTypeHLLSerDe>(1); + auto column_hll = ColumnHLL::create(); + column_hll->insert_value(HyperLogLog::empty()); + ASSERT_EQ(column_hll->size(), 1); + MysqlRowBuffer<false> mysql_rb; + DataTypeSerDe::FormatOptions options; + options.nested_string_wrapper = "\""; + options.wrapper_len = 1; + options.map_key_delim = ':'; + options.null_format = "null"; + options.null_len = 4; + hll_serde->set_return_object_as_string(true); + auto st = hll_serde->write_column_to_mysql(*column_hll, mysql_rb, 0, false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 2); + + HyperLogLog hll; + hll.update(123); + column_hll->insert_value(hll); + hll_serde->set_return_object_as_string(true); + st = hll_serde->write_column_to_mysql(*column_hll, mysql_rb, 1, false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 13); + std::cout << "test write_column_to_mysql success" << std::endl; +} + +TEST(HLLSerdeTest, writeOneCellToJsonb) { + auto hll_serde = std::make_shared<vectorized::DataTypeHLLSerDe>(1); + auto column_hll = ColumnHLL::create(); + HyperLogLog hll; + hll.update(123); + column_hll->insert_value(hll); + ASSERT_EQ(column_hll->size(), 1); + JsonbWriterT<JsonbOutStream> jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + hll_serde->write_one_cell_to_jsonb(*column_hll, jsonb_writer, &pool, 0, 0); + jsonb_writer.writeEndObject(); + + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto* pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + hll_serde->read_one_cell_from_jsonb(*column_hll, it->value()); + } + EXPECT_TRUE(column_hll->size() == 2); + HyperLogLog data = column_hll->get_element(1); + EXPECT_EQ(data.to_string(), "hash set size: 1\ncardinality:\t1\ntype:\t1"); +} + +TEST(HLLSerdeTest, writeColumnToPb) { + auto hll_serde = std::make_shared<vectorized::DataTypeHLLSerDe>(1); + auto column_hll = ColumnHLL::create(); + column_hll->insert_value(HyperLogLog::empty()); + HyperLogLog hll; + hll.update(123); + column_hll->insert_value(hll); + ASSERT_EQ(column_hll->size(), 2); + PValues pv = PValues(); + Status st = hll_serde->write_column_to_pb(*column_hll, pv, 0, column_hll->size()); + EXPECT_TRUE(st.ok()); + + auto except_column = ColumnHLL::create(); + st = hll_serde->read_column_from_pb(*except_column, pv); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check pb value from expected column + PValues as_pv = PValues(); + st = hll_serde->write_column_to_pb(*except_column, as_pv, 0, except_column->size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + EXPECT_EQ(pv.bytes_value_size(), as_pv.bytes_value_size()); + // check column value + for (size_t j = 0; j < column_hll->size(); ++j) { + EXPECT_EQ(column_hll->get_element(j).to_string(), + except_column->get_element(j).to_string()); + } + std::cout << "test write/read_column_to_pb" << std::endl; +} + +TEST(HLLSerdeTest, serializeOneCellToJson) { + auto hll_serde = std::make_shared<vectorized::DataTypeHLLSerDe>(1); + auto column_hll = ColumnHLL::create(); + column_hll->insert_value(HyperLogLog::empty()); + HyperLogLog hll; + hll.update(123); + column_hll->insert_value(hll); + ASSERT_EQ(column_hll->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = hll_serde->serialize_one_cell_to_json(*column_hll, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + st = hll_serde->serialize_one_cell_to_json(*column_hll, 1, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(ser_col->get_data_at(0).to_string(), "\\N") << st.to_string(); + EXPECT_EQ(ser_col->get_data_at(1).to_string(), "\\N") << st.to_string(); + + auto except_column = ColumnHLL::create(); + std::string memory_buffer; + auto bytesize = column_hll->get_element(0).max_serialized_size(); + memory_buffer.resize(bytesize); + size_t actual_size = column_hll->get_element(0).serialize((uint8_t*)(memory_buffer.data())); + memory_buffer.resize(actual_size); + Slice slice_value(memory_buffer.data(), memory_buffer.size()); + st = hll_serde->deserialize_one_cell_from_json(*except_column, slice_value, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + + memory_buffer.clear(); + bytesize = column_hll->get_element(1).max_serialized_size(); + memory_buffer.resize(bytesize); + actual_size = column_hll->get_element(1).serialize((uint8_t*)(memory_buffer.data())); + memory_buffer.resize(actual_size); + slice_value = Slice(memory_buffer.data(), memory_buffer.size()); + st = hll_serde->deserialize_one_cell_from_json(*except_column, slice_value, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_hll->size(); ++j) { + EXPECT_EQ(column_hll->get_element(j).to_string(), + except_column->get_element(j).to_string()); + } + std::cout << "test serialize/deserialize_one_cell_from_json" << std::endl; +} + +TEST(HLLSerdeTest, serializeColumnToJson) { + auto hll_serde = std::make_shared<vectorized::DataTypeHLLSerDe>(1); + auto column_hll = ColumnHLL::create(); + column_hll->insert_value(HyperLogLog::empty()); + HyperLogLog hll; + hll.update(123); + column_hll->insert_value(hll); + ASSERT_EQ(column_hll->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = hll_serde->serialize_column_to_json(*column_hll, 0, 2, buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(ser_col->get_data_at(0).to_string(), "\\N,\\N") << st.to_string(); + + std::vector<Slice> slices_vec; + uint64_t num_deserialized = 0; + auto except_column = ColumnHLL::create(); + std::string memory_buffer; + auto bytesize = column_hll->get_element(0).max_serialized_size(); + memory_buffer.resize(bytesize); + size_t actual_size = column_hll->get_element(0).serialize((uint8_t*)(memory_buffer.data())); + memory_buffer.resize(actual_size); + slices_vec.emplace_back(memory_buffer.data(), memory_buffer.size()); + + std::string memory_buffer2; + bytesize = column_hll->get_element(1).max_serialized_size(); + memory_buffer2.resize(bytesize); + actual_size = column_hll->get_element(1).serialize((uint8_t*)(memory_buffer2.data())); + memory_buffer2.resize(actual_size); + slices_vec.emplace_back(memory_buffer2.data(), memory_buffer2.size()); + st = hll_serde->deserialize_column_from_json_vector(*except_column, slices_vec, + &num_deserialized, formatOptions); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check column value + for (size_t j = 0; j < column_hll->size(); ++j) { + EXPECT_EQ(column_hll->get_element(j).to_string(), + except_column->get_element(j).to_string()); + } + std::cout << "test serialize/deserialize_column_from_json_vector" << std::endl; +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/serde/data_type_serde_quantile_state_test.cpp b/be/test/vec/data_types/serde/data_type_serde_quantile_state_test.cpp new file mode 100644 index 00000000000..d280dca0cde --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_quantile_state_test.cpp @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <arrow/array/builder_base.h> +#include <gtest/gtest.h> + +#include "util/slice.h" +#include "vec/columns/column_complex.h" +#include "vec/data_types/serde/data_type_quantilestate_serde.h" + +namespace doris::vectorized { + +TEST(QuantileStateSerdeTest, writeColumnToMysql) { + auto quantile_state_serde = std::make_shared<vectorized::DataTypeQuantileStateSerDe>(1); + auto column_quantile_state = ColumnQuantileState::create(); + column_quantile_state->insert_value(QuantileState()); + ASSERT_EQ(column_quantile_state->size(), 1); + MysqlRowBuffer<false> mysql_rb; + DataTypeSerDe::FormatOptions options; + options.nested_string_wrapper = "\""; + options.wrapper_len = 1; + options.map_key_delim = ':'; + options.null_format = "null"; + options.null_len = 4; + quantile_state_serde->set_return_object_as_string(true); + auto st = quantile_state_serde->write_column_to_mysql(*column_quantile_state, mysql_rb, 0, + false, options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 6); + + QuantileState quantile_state; + quantile_state.add_value(123); + column_quantile_state->insert_value(quantile_state); + quantile_state_serde->set_return_object_as_string(true); + st = quantile_state_serde->write_column_to_mysql(*column_quantile_state, mysql_rb, 1, false, + options); + EXPECT_TRUE(st.ok()); + ASSERT_EQ(mysql_rb.length(), 20); + std::cout << "test write_column_to_mysql success" << std::endl; +} + +TEST(QuantileStateSerdeTest, writeOneCellToJsonb) { + auto quantile_state_serde = std::make_shared<vectorized::DataTypeQuantileStateSerDe>(1); + auto column_quantile_state = ColumnQuantileState::create(); + QuantileState quantile_state; + quantile_state.add_value(123); + column_quantile_state->insert_value(quantile_state); + ASSERT_EQ(column_quantile_state->size(), 1); + JsonbWriterT<JsonbOutStream> jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + quantile_state_serde->write_one_cell_to_jsonb(*column_quantile_state, jsonb_writer, &pool, 0, + 0); + jsonb_writer.writeEndObject(); + + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto* pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + quantile_state_serde->read_one_cell_from_jsonb(*column_quantile_state, it->value()); + } + EXPECT_TRUE(column_quantile_state->size() == 2); + QuantileState data = column_quantile_state->get_element(1); + EXPECT_EQ(data.get_value_by_percentile(1), 123); + std::cout << "test write/read_one_cell_to_jsonb success" << std::endl; +} + +TEST(QuantileStateSerdeTest, writeColumnToPb) { + auto quantile_state_serde = std::make_shared<vectorized::DataTypeQuantileStateSerDe>(1); + auto column_quantile_state = ColumnQuantileState::create(); + column_quantile_state->insert_value(QuantileState()); + QuantileState quantile_state; + quantile_state.add_value(123); + column_quantile_state->insert_value(quantile_state); + ASSERT_EQ(column_quantile_state->size(), 2); + PValues pv = PValues(); + Status st = quantile_state_serde->write_column_to_pb(*column_quantile_state, pv, 0, + column_quantile_state->size()); + EXPECT_TRUE(st.ok()); + + auto except_column = ColumnQuantileState::create(); + st = quantile_state_serde->read_column_from_pb(*except_column, pv); + EXPECT_TRUE(st.ok()) << st.to_string(); + // check pb value from expected column + PValues as_pv = PValues(); + st = quantile_state_serde->write_column_to_pb(*except_column, as_pv, 0, except_column->size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + EXPECT_EQ(pv.bytes_value_size(), as_pv.bytes_value_size()); + // check column value + for (size_t j = 0; j < column_quantile_state->size(); ++j) { + EXPECT_EQ(column_quantile_state->get_data_at(j), except_column->get_data_at(j)); + } + std::cout << "test write/read_column_to_pb" << std::endl; +} + +TEST(QuantileStateSerdeTest, serializeOneCellToJson) { + auto quantile_state_serde = std::make_shared<vectorized::DataTypeQuantileStateSerDe>(1); + auto column_quantile_state = ColumnQuantileState::create(); + column_quantile_state->insert_value(QuantileState()); + QuantileState quantile_state; + quantile_state.add_value(123); + column_quantile_state->insert_value(quantile_state); + ASSERT_EQ(column_quantile_state->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = quantile_state_serde->serialize_one_cell_to_json(*column_quantile_state, 0, + buffer_writer, formatOptions); + buffer_writer.commit(); + st = quantile_state_serde->serialize_one_cell_to_json(*column_quantile_state, 1, buffer_writer, + formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(ser_col->get_data_at(0).to_string(), "\\N") << st.to_string(); + EXPECT_EQ(ser_col->get_data_at(1).to_string(), "\\N") << st.to_string(); + std::cout << "test serialize_one_cell_to_json success" << std::endl; +} + +TEST(QuantileStateSerdeTest, serializeColumnToJson) { + auto quantile_state_serde = std::make_shared<vectorized::DataTypeQuantileStateSerDe>(1); + auto column_quantile_state = ColumnQuantileState::create(); + column_quantile_state->insert_value(QuantileState()); + QuantileState quantile_state; + quantile_state.add_value(123); + column_quantile_state->insert_value(quantile_state); + ASSERT_EQ(column_quantile_state->size(), 2); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto st = quantile_state_serde->serialize_column_to_json(*column_quantile_state, 0, 2, + buffer_writer, formatOptions); + buffer_writer.commit(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(ser_col->get_data_at(0).to_string(), "\\N,\\N") << st.to_string(); + + std::vector<Slice> slices_vec; + auto except_column = ColumnQuantileState::create(); + std::string memory_buffer; + auto bytesize = column_quantile_state->get_element(0).get_serialized_size(); + memory_buffer.resize(bytesize); + column_quantile_state->get_element(0).serialize((uint8_t*)(memory_buffer.data())); + slices_vec.emplace_back(memory_buffer.data(), memory_buffer.size()); + QuantileState quantile_state_res_0; + auto res = quantile_state_res_0.deserialize(slices_vec[0]); + EXPECT_TRUE(res); + + std::string memory_buffer2; + memory_buffer2.clear(); + bytesize = column_quantile_state->get_element(1).get_serialized_size(); + memory_buffer2.resize(bytesize); + column_quantile_state->get_element(1).serialize((uint8_t*)(memory_buffer2.data())); + slices_vec.emplace_back(memory_buffer2.data(), memory_buffer2.size()); + QuantileState quantile_state_res_1; + res = quantile_state_res_1.deserialize(slices_vec[1]); + EXPECT_TRUE(res); + EXPECT_EQ(quantile_state.get_value_by_percentile(1), + quantile_state_res_1.get_value_by_percentile(1)); + std::cout << "test serialize/deserialize_column_from_json_vector" << std::endl; +} +} // namespace doris::vectorized \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org