This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new e5a2b0eea81 Revert "[cherry-pick](jsonb) add a check for jsonb value to avoid invalid jsonb value write into segment file " (#49058) e5a2b0eea81 is described below commit e5a2b0eea8116c25258d66570dd605705f125009 Author: amory <wangqian...@selectdb.com> AuthorDate: Fri Mar 14 17:41:06 2025 +0800 Revert "[cherry-pick](jsonb) add a check for jsonb value to avoid invalid jsonb value write into segment file " (#49058) Reverts apache/doris#48729 temp revert this pr for PartialUpdateInfo::_generate_default_values_for_missing_cids using empty string , which will make this check fail. --- be/src/util/jsonb_document.h | 4 +- be/src/util/jsonb_utils.h | 2 +- be/src/util/jsonb_writer.h | 3 +- .../exprs/table_function/vexplode_json_array.cpp | 2 +- .../exprs/table_function/vexplode_json_object.cpp | 4 +- be/src/vec/functions/function_cast.h | 4 +- be/src/vec/functions/function_jsonb.cpp | 17 +- be/src/vec/jsonb/serialize.cpp | 4 +- be/src/vec/olap/olap_data_convertor.cpp | 23 +- be/src/vec/olap/olap_data_convertor.h | 4 +- .../vec/data_types/serde/data_type_serde_test.cpp | 4 +- be/test/vec/olap/jsonb_value_test.cpp | 242 --------------------- 12 files changed, 26 insertions(+), 287 deletions(-) diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h index a8c59b93544..2a9cf8a8191 100644 --- a/be/src/util/jsonb_document.h +++ b/be/src/util/jsonb_document.h @@ -177,7 +177,7 @@ public: static JsonbDocument* makeDocument(char* pb, uint32_t size, const JsonbValue* rval); // create an JsonbDocument object from JSONB packed bytes - static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size); + static JsonbDocument* createDocument(const char* pb, uint32_t size); // create an JsonbValue from JSONB packed bytes static JsonbValue* createValue(const char* pb, uint32_t size); @@ -1138,7 +1138,7 @@ inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, const return doc; } -inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb, size_t size) { +inline JsonbDocument* JsonbDocument::createDocument(const char* pb, uint32_t size) { if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { return nullptr; } diff --git a/be/src/util/jsonb_utils.h b/be/src/util/jsonb_utils.h index c32588e2610..7dba0dca3af 100644 --- a/be/src/util/jsonb_utils.h +++ b/be/src/util/jsonb_utils.h @@ -40,7 +40,7 @@ public: // get json string const std::string to_json_string(const char* data, size_t size) { - JsonbDocument* pdoc = doris::JsonbDocument::checkAndCreateDocument(data, size); + JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size); if (!pdoc) { LOG(FATAL) << "invalid json binary value: " << std::string_view(data, size); } diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h index f92d8a4096b..61bd28bb783 100644 --- a/be/src/util/jsonb_writer.h +++ b/be/src/util/jsonb_writer.h @@ -479,8 +479,7 @@ public: OS_TYPE* getOutput() { return os_; } JsonbDocument* getDocument() { - return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(), - getOutput()->getSize()); + return JsonbDocument::createDocument(getOutput()->getBuffer(), getOutput()->getSize()); } JsonbValue* getValue() { diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 7594d9a5cc6..3c22ef4e078 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -63,7 +63,7 @@ void VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) { StringRef text = _text_column->get_data_at(row_idx); if (text.data != nullptr) { if (WhichDataType(_text_datatype).is_json()) { - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size); + JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size); if (doc && doc->getValue() && doc->getValue()->isArray()) { auto* a = (ArrayVal*)doc->getValue(); if (a->numElem() > 0) { diff --git a/be/src/vec/exprs/table_function/vexplode_json_object.cpp b/be/src/vec/exprs/table_function/vexplode_json_object.cpp index 38a00d60b19..1981f48f62c 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_object.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_object.cpp @@ -54,8 +54,8 @@ void VExplodeJsonObjectTableFunction::process_row(size_t row_idx) { StringRef text = _json_object_column->get_data_at(row_idx); if (text.data != nullptr) { - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size); - if (!doc || !doc->getValue()) [[unlikely]] { + JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size); + if (UNLIKELY(!doc || !doc->getValue())) { // error jsonb, put null into output, cur_size = 0 , we will insert_default return; } diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index e4396c51c22..cf3ea7b0791 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -714,7 +714,7 @@ struct ConvertImplGenericFromJsonb { const bool is_dst_string = is_string_or_fixed_string(data_type_to); for (size_t i = 0; i < size; ++i) { const auto& val = col_from_string->get_data_at(i); - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size); + JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size); if (UNLIKELY(!doc || !doc->getValue())) { (*vec_null_map_to)[i] = 1; col_to->insert_default(); @@ -862,7 +862,7 @@ struct ConvertImplFromJsonb { } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size); + JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size); if (UNLIKELY(!doc || !doc->getValue())) { null_map[i] = 1; res[i] = 0; diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 0b0c5d8c45b..c262a6ce1ad 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -557,7 +557,7 @@ private: continue; } const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size); + JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size); if (UNLIKELY(!doc || !doc->getValue())) { dst_arr.clear(); return Status::InvalidArgument("jsonb data is invalid"); @@ -665,7 +665,7 @@ private: static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str, int l_str_size, JsonbPath& path) { // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size); + JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size); if (UNLIKELY(!doc || !doc->getValue())) { return; } @@ -760,7 +760,7 @@ private: } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size); + JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size); if (UNLIKELY(!doc || !doc->getValue())) { StringOP::push_null_string(i, res_data, res_offsets, null_map); return; @@ -886,7 +886,7 @@ public: writer->writeStartArray(); // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size); + JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size); for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { if (UNLIKELY(!doc || !doc->getValue())) { @@ -1027,7 +1027,7 @@ private: } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size); + JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size); if (UNLIKELY(!doc || !doc->getValue())) { null_map[i] = 1; res[i] = 0; @@ -1406,8 +1406,7 @@ struct JsonbLengthUtil { } auto jsonb_value = jsonb_data_column->get_data_at(i); // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = - JsonbDocument::checkAndCreateDocument(jsonb_value.data, jsonb_value.size); + JsonbDocument* doc = JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size); JsonbValue* value = doc->getValue()->findValue(path, nullptr); if (UNLIKELY(!value)) { null_map->get_data()[i] = 1; @@ -1542,9 +1541,9 @@ struct JsonbContainsUtil { } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory JsonbDocument* doc1 = - JsonbDocument::checkAndCreateDocument(jsonb_value1.data, jsonb_value1.size); + JsonbDocument::createDocument(jsonb_value1.data, jsonb_value1.size); JsonbDocument* doc2 = - JsonbDocument::checkAndCreateDocument(jsonb_value2.data, jsonb_value2.size); + JsonbDocument::createDocument(jsonb_value2.data, jsonb_value2.size); JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr); JsonbValue* value2 = doc2->getValue(); diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp index 1c7d0cad207..0dd0d342918 100644 --- a/be/src/vec/jsonb/serialize.cpp +++ b/be/src/vec/jsonb/serialize.cpp @@ -86,7 +86,7 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const const std::unordered_map<uint32_t, uint32_t>& col_id_to_idx, Block& dst, const std::vector<std::string>& default_values) { - auto pdoc = JsonbDocument::checkAndCreateDocument(data, size); + auto pdoc = JsonbDocument::createDocument(data, size); JsonbDocument& doc = *pdoc; size_t num_rows = dst.rows(); size_t filled_columns = 0; @@ -120,4 +120,4 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const } } -} // namespace doris::vectorized +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index 20818f6b96a..5bb00832cdf 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -139,7 +139,7 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co return std::make_unique<OlapColumnDataConvertorDecimalV3<Decimal256>>(); } case FieldType::OLAP_FIELD_TYPE_JSONB: { - return std::make_unique<OlapColumnDataConvertorVarChar>(true, true); + return std::make_unique<OlapColumnDataConvertorVarChar>(true); } case FieldType::OLAP_FIELD_TYPE_BOOL: { return std::make_unique<OlapColumnDataConvertorSimple<vectorized::UInt8>>(); @@ -204,10 +204,7 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co void OlapBlockDataConvertor::set_source_content(const vectorized::Block* block, size_t row_pos, size_t num_rows) { DCHECK(block && num_rows > 0 && row_pos + num_rows <= block->rows() && - block->columns() == _convertors.size()) - << "block=" << block->dump_structure() << ", block rows=" << block->rows() - << ", row_pos=" << row_pos << ", num_rows=" << num_rows - << ", convertors.size=" << _convertors.size(); + block->columns() == _convertors.size()); size_t cid = 0; for (const auto& typed_column : *block) { if (typed_column.column->size() != block->rows()) { @@ -604,8 +601,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorChar::convert_to_olap() { // class OlapBlockDataConvertor::OlapColumnDataConvertorVarChar OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::OlapColumnDataConvertorVarChar( - bool check_length, bool is_jsonb) - : _check_length(check_length), _is_jsonb(is_jsonb) {} + bool check_length) + : _check_length(check_length) {} void OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::set_source_column( const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t num_rows) { @@ -649,12 +646,6 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap( "Not support string len over than " "`string_type_length_soft_limit_bytes` in vec engine."); } - // Make sure that the json binary data written in is the correct jsonb value. - if (_is_jsonb && - !doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) { - return Status::InvalidArgument("invalid json binary value: {}", - std::string_view(slice->data, slice->size)); - } } else { // TODO: this may not be necessary, check and remove later slice->data = nullptr; @@ -676,12 +667,6 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap( "Not support string len over than `string_type_length_soft_limit_bytes`" " in vec engine."); } - // Make sure that the json binary data written in is the correct jsonb value. - if (_is_jsonb && - !doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) { - return Status::InvalidArgument("invalid json binary value: {}", - std::string_view(slice->data, slice->size)); - } string_offset = *offset_cur; ++slice; ++offset_cur; diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 66a91016b39..764a7a4a7c3 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -197,7 +197,7 @@ private: class OlapColumnDataConvertorVarChar : public OlapColumnDataConvertorBase { public: - OlapColumnDataConvertorVarChar(bool check_length, bool is_jsonb = false); + OlapColumnDataConvertorVarChar(bool check_length); ~OlapColumnDataConvertorVarChar() override = default; void set_source_column(const ColumnWithTypeAndName& typed_column, size_t row_pos, @@ -209,8 +209,6 @@ private: private: bool _check_length; - bool _is_jsonb = - false; // Make sure that the json binary data written in is the correct jsonb value. PaddedPODArray<Slice> _slice; }; diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp b/be/test/vec/data_types/serde/data_type_serde_test.cpp index 3c9498f1d6d..82674b0aa44 100644 --- a/be/test/vec/data_types/serde/data_type_serde_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp @@ -240,7 +240,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) { jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), jsonb_writer.getOutput()->getSize()); StringRef jsonb_data = jsonb_column->get_data_at(0); - auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size); + auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); JsonbDocument& doc = *pdoc; for (auto it = doc->begin(); it != doc->end(); ++it) { serde->read_one_cell_from_jsonb(*vec, it->value()); @@ -270,7 +270,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) { jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), jsonb_writer.getOutput()->getSize()); StringRef jsonb_data = jsonb_column->get_data_at(0); - auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size); + auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); JsonbDocument& doc = *pdoc; for (auto it = doc->begin(); it != doc->end(); ++it) { serde->read_one_cell_from_jsonb(*vec, it->value()); diff --git a/be/test/vec/olap/jsonb_value_test.cpp b/be/test/vec/olap/jsonb_value_test.cpp deleted file mode 100644 index 3111163c0be..00000000000 --- a/be/test/vec/olap/jsonb_value_test.cpp +++ /dev/null @@ -1,242 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include <gtest/gtest-message.h> -#include <gtest/gtest-test-part.h> - -#include <string> - -#include "gtest/gtest_pred_impl.h" -#include "vec/columns/column_string.h" -#include "vec/common/string_ref.h" -#include "vec/core/columns_with_type_and_name.h" -#include "vec/data_types/serde/data_type_serde.h" -#include "vec/olap/olap_data_convertor.h" - -namespace doris::vectorized { - -TEST(JsonbValueConvertorTest, JsonbValueValid) { - // 1. create jsonb column with serde - auto input = ColumnString::create(); - auto dataTypeJsonb = std::make_shared<DataTypeJsonb>(); - auto serde = dataTypeJsonb->get_serde(); - vectorized::DataTypeSerDe::FormatOptions options; - - // Test case 1 - std::string str1 = "{\"key1\": \"value1\"}"; - Slice slice1 = Slice(str1.data(), str1.length()); - auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options); - ASSERT_TRUE(st1.ok()); - ASSERT_EQ(input->size(), 1); - - // Test case 2 - std::string str2 = "{\"key2\": 12345}"; - Slice slice2 = Slice(str2.data(), str2.length()); - auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options); - ASSERT_TRUE(st2.ok()); - ASSERT_EQ(input->size(), 2); - - // Test case 3 - std::string str3 = "{\"key3\": true}"; - Slice slice3 = Slice(str3.data(), str3.length()); - auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options); - ASSERT_TRUE(st3.ok()); - ASSERT_EQ(input->size(), 3); - - // Test case 4 - std::string str4 = "{\"key4\": [1, 2, 3]}"; - Slice slice4 = Slice(str4.data(), str4.length()); - auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options); - ASSERT_TRUE(st4.ok()); - ASSERT_EQ(input->size(), 4); - - // Test case 5 - std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}"; - Slice slice5 = Slice(str5.data(), str5.length()); - auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options); - ASSERT_TRUE(st5.ok()); - ASSERT_EQ(input->size(), 5); - - // 2. put column into block - vectorized::ColumnWithTypeAndName argument(input->assume_mutable(), dataTypeJsonb, - "jsonb_column"); - Block block; - block.insert(argument); - - // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data - auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>(); - TabletColumn jsonb_column = TabletColumn(); - jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB); - _olap_data_convertor->add_column_data_convertor(jsonb_column); - _olap_data_convertor->set_source_content(&block, 0, 5); - auto [status, column] = _olap_data_convertor->convert_column_data(0); - ASSERT_TRUE(status.ok()); - ASSERT_NE(column, nullptr); - - // test with null map - auto nullable_col = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); - auto nullable_dataTypeJsonb = make_nullable(std::make_shared<DataTypeJsonb>()); - auto serde1 = nullable_dataTypeJsonb->get_serde(); - - auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(1, nullable_col->size()); - - // insert null - nullable_col->insert_default(); - ASSERT_EQ(2, nullable_col->size()); - - st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(3, nullable_col->size()); - - // deserialize null - Slice slice_null = "NULL"; - st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(4, nullable_col->size()); - - st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(5, nullable_col->size()); - - // 2. put column into block - vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(), - nullable_dataTypeJsonb, "jsonb_column_null"); - block.clear(); - block.insert(argument1); - - // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data - _olap_data_convertor->reset(); - _olap_data_convertor->add_column_data_convertor(jsonb_column); - _olap_data_convertor->set_source_content(&block, 0, 5); - auto [status1, column1] = _olap_data_convertor->convert_column_data(0); - ASSERT_TRUE(status1.ok()) << status1.to_string(); - ASSERT_NE(column1, nullptr); -} - -TEST(JsonbValueConvertorTest, JsonbValueInvalid) { - // 1. create jsonb column with serde - auto input = ColumnString::create(); - auto dataTypeJsonb = std::make_shared<DataTypeJsonb>(); - auto serde = dataTypeJsonb->get_serde(); - vectorized::DataTypeSerDe::FormatOptions options; - - // Test case 1 - std::string str1 = "{\"key1\": \"value1\"}"; - Slice slice1 = Slice(str1.data(), str1.length()); - auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options); - ASSERT_TRUE(st1.ok()); - ASSERT_EQ(input->size(), 1); - - // Test case 2 - std::string str2 = "{\"key2\": 12345}"; - Slice slice2 = Slice(str2.data(), str2.length()); - auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options); - ASSERT_TRUE(st2.ok()); - ASSERT_EQ(input->size(), 2); - - // Test case 3 - std::string str3 = "{\"key3\": true}"; - Slice slice3 = Slice(str3.data(), str3.length()); - auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options); - ASSERT_TRUE(st3.ok()); - ASSERT_EQ(input->size(), 3); - - // Test case 4 - std::string str4 = "{\"key4\": [1, 2, 3]}"; - Slice slice4 = Slice(str4.data(), str4.length()); - auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options); - ASSERT_TRUE(st4.ok()); - ASSERT_EQ(input->size(), 4); - // invalid jsonb data - auto& data = input->get_chars(); - data.emplace_back('s'); - - // Test case 5 - std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}"; - Slice slice5 = Slice(str5.data(), str5.length()); - auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options); - ASSERT_TRUE(st5.ok()); - ASSERT_EQ(input->size(), 5); - - // 2. put column into block - vectorized::ColumnWithTypeAndName argument(input->assume_mutable(), dataTypeJsonb, - "jsonb_column"); - Block block; - block.insert(argument); - - // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data - auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>(); - TabletColumn jsonb_column = TabletColumn(); - jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB); - _olap_data_convertor->add_column_data_convertor(jsonb_column); - _olap_data_convertor->set_source_content(&block, 0, 5); - auto [status, column] = _olap_data_convertor->convert_column_data(0); - // invalid will make error - ASSERT_FALSE(status.ok()); - ASSERT_TRUE(status.to_string().find("invalid json binary value") != std::string::npos); - ASSERT_NE(column, nullptr); - - // test with null map - auto nullable_col = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); - auto nullable_dataTypeJsonb = make_nullable(std::make_shared<DataTypeJsonb>()); - auto serde1 = nullable_dataTypeJsonb->get_serde(); - - auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(1, nullable_col->size()); - - // insert null - nullable_col->insert_default(); - ASSERT_EQ(2, nullable_col->size()); - - st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(3, nullable_col->size()); - // invalid jsonb data - auto string_data = assert_cast<ColumnString*>(nullable_col->get_nested_column_ptr().get()); - auto& dat = string_data->get_chars(); - dat.emplace_back('s'); - - // deserialize null - Slice slice_null = "NULL"; - st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(4, nullable_col->size()); - - st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3, options); - ASSERT_TRUE(st.ok()); - ASSERT_EQ(5, nullable_col->size()); - - // 2. put column into block - vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(), - nullable_dataTypeJsonb, "jsonb_column_null"); - block.clear(); - block.insert(argument1); - - // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data - _olap_data_convertor->reset(); - _olap_data_convertor->add_column_data_convertor(jsonb_column); - _olap_data_convertor->set_source_content(&block, 0, 5); - auto [status1, column1] = _olap_data_convertor->convert_column_data(0); - ASSERT_FALSE(status.ok()); - ASSERT_TRUE(status.to_string().find("invalid json binary value") != std::string::npos); - ASSERT_NE(column, nullptr); -} - -} // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org