This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new e5a2b0eea81 Revert "[cherry-pick](jsonb) add a check for jsonb value 
to avoid  invalid jsonb value write into segment file " (#49058)
e5a2b0eea81 is described below

commit e5a2b0eea8116c25258d66570dd605705f125009
Author: amory <wangqian...@selectdb.com>
AuthorDate: Fri Mar 14 17:41:06 2025 +0800

    Revert "[cherry-pick](jsonb) add a check for jsonb value to avoid  invalid 
jsonb value write into segment file " (#49058)
    
    Reverts apache/doris#48729
    temp revert this pr for
    PartialUpdateInfo::_generate_default_values_for_missing_cids using empty
    string , which will make this check fail.
---
 be/src/util/jsonb_document.h                       |   4 +-
 be/src/util/jsonb_utils.h                          |   2 +-
 be/src/util/jsonb_writer.h                         |   3 +-
 .../exprs/table_function/vexplode_json_array.cpp   |   2 +-
 .../exprs/table_function/vexplode_json_object.cpp  |   4 +-
 be/src/vec/functions/function_cast.h               |   4 +-
 be/src/vec/functions/function_jsonb.cpp            |  17 +-
 be/src/vec/jsonb/serialize.cpp                     |   4 +-
 be/src/vec/olap/olap_data_convertor.cpp            |  23 +-
 be/src/vec/olap/olap_data_convertor.h              |   4 +-
 .../vec/data_types/serde/data_type_serde_test.cpp  |   4 +-
 be/test/vec/olap/jsonb_value_test.cpp              | 242 ---------------------
 12 files changed, 26 insertions(+), 287 deletions(-)

diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index a8c59b93544..2a9cf8a8191 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -177,7 +177,7 @@ public:
     static JsonbDocument* makeDocument(char* pb, uint32_t size, const 
JsonbValue* rval);
 
     // create an JsonbDocument object from JSONB packed bytes
-    static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size);
+    static JsonbDocument* createDocument(const char* pb, uint32_t size);
 
     // create an JsonbValue from JSONB packed bytes
     static JsonbValue* createValue(const char* pb, uint32_t size);
@@ -1138,7 +1138,7 @@ inline JsonbDocument* JsonbDocument::makeDocument(char* 
pb, uint32_t size, const
     return doc;
 }
 
-inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb, 
size_t size) {
+inline JsonbDocument* JsonbDocument::createDocument(const char* pb, uint32_t 
size) {
     if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
         return nullptr;
     }
diff --git a/be/src/util/jsonb_utils.h b/be/src/util/jsonb_utils.h
index c32588e2610..7dba0dca3af 100644
--- a/be/src/util/jsonb_utils.h
+++ b/be/src/util/jsonb_utils.h
@@ -40,7 +40,7 @@ public:
 
     // get json string
     const std::string to_json_string(const char* data, size_t size) {
-        JsonbDocument* pdoc = 
doris::JsonbDocument::checkAndCreateDocument(data, size);
+        JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size);
         if (!pdoc) {
             LOG(FATAL) << "invalid json binary value: " << 
std::string_view(data, size);
         }
diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h
index f92d8a4096b..61bd28bb783 100644
--- a/be/src/util/jsonb_writer.h
+++ b/be/src/util/jsonb_writer.h
@@ -479,8 +479,7 @@ public:
 
     OS_TYPE* getOutput() { return os_; }
     JsonbDocument* getDocument() {
-        return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
-                                                     getOutput()->getSize());
+        return JsonbDocument::createDocument(getOutput()->getBuffer(), 
getOutput()->getSize());
     }
 
     JsonbValue* getValue() {
diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp 
b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
index 7594d9a5cc6..3c22ef4e078 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
@@ -63,7 +63,7 @@ void 
VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) {
     StringRef text = _text_column->get_data_at(row_idx);
     if (text.data != nullptr) {
         if (WhichDataType(_text_datatype).is_json()) {
-            JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(text.data, text.size);
+            JsonbDocument* doc = JsonbDocument::createDocument(text.data, 
text.size);
             if (doc && doc->getValue() && doc->getValue()->isArray()) {
                 auto* a = (ArrayVal*)doc->getValue();
                 if (a->numElem() > 0) {
diff --git a/be/src/vec/exprs/table_function/vexplode_json_object.cpp 
b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
index 38a00d60b19..1981f48f62c 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_object.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
@@ -54,8 +54,8 @@ void VExplodeJsonObjectTableFunction::process_row(size_t 
row_idx) {
 
     StringRef text = _json_object_column->get_data_at(row_idx);
     if (text.data != nullptr) {
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, 
text.size);
-        if (!doc || !doc->getValue()) [[unlikely]] {
+        JsonbDocument* doc = JsonbDocument::createDocument(text.data, 
text.size);
+        if (UNLIKELY(!doc || !doc->getValue())) {
             // error jsonb, put null into output, cur_size = 0 , we will 
insert_default
             return;
         }
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index e4396c51c22..cf3ea7b0791 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -714,7 +714,7 @@ struct ConvertImplGenericFromJsonb {
             const bool is_dst_string = is_string_or_fixed_string(data_type_to);
             for (size_t i = 0; i < size; ++i) {
                 const auto& val = col_from_string->get_data_at(i);
-                JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(val.data, val.size);
+                JsonbDocument* doc = JsonbDocument::createDocument(val.data, 
val.size);
                 if (UNLIKELY(!doc || !doc->getValue())) {
                     (*vec_null_map_to)[i] = 1;
                     col_to->insert_default();
@@ -862,7 +862,7 @@ struct ConvertImplFromJsonb {
                 }
 
                 // doc is NOT necessary to be deleted since JsonbDocument will 
not allocate memory
-                JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(val.data, val.size);
+                JsonbDocument* doc = JsonbDocument::createDocument(val.data, 
val.size);
                 if (UNLIKELY(!doc || !doc->getValue())) {
                     null_map[i] = 1;
                     res[i] = 0;
diff --git a/be/src/vec/functions/function_jsonb.cpp 
b/be/src/vec/functions/function_jsonb.cpp
index 0b0c5d8c45b..c262a6ce1ad 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -557,7 +557,7 @@ private:
                 continue;
             }
             const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
-            JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, 
l_size);
+            JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
             if (UNLIKELY(!doc || !doc->getValue())) {
                 dst_arr.clear();
                 return Status::InvalidArgument("jsonb data is invalid");
@@ -665,7 +665,7 @@ private:
     static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const 
char* l_raw_str,
                                               int l_str_size, JsonbPath& path) 
{
         // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, 
l_str_size);
+        JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, 
l_str_size);
         if (UNLIKELY(!doc || !doc->getValue())) {
             return;
         }
@@ -760,7 +760,7 @@ private:
         }
 
         // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, 
l_size);
+        JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
         if (UNLIKELY(!doc || !doc->getValue())) {
             StringOP::push_null_string(i, res_data, res_offsets, null_map);
             return;
@@ -886,7 +886,7 @@ public:
                 writer->writeStartArray();
 
                 // doc is NOT necessary to be deleted since JsonbDocument will 
not allocate memory
-                JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(l_raw, l_size);
+                JsonbDocument* doc = JsonbDocument::createDocument(l_raw, 
l_size);
 
                 for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
                     if (UNLIKELY(!doc || !doc->getValue())) {
@@ -1027,7 +1027,7 @@ private:
         }
 
         // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, 
l_str_size);
+        JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, 
l_str_size);
         if (UNLIKELY(!doc || !doc->getValue())) {
             null_map[i] = 1;
             res[i] = 0;
@@ -1406,8 +1406,7 @@ struct JsonbLengthUtil {
             }
             auto jsonb_value = jsonb_data_column->get_data_at(i);
             // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-            JsonbDocument* doc =
-                    JsonbDocument::checkAndCreateDocument(jsonb_value.data, 
jsonb_value.size);
+            JsonbDocument* doc = 
JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size);
             JsonbValue* value = doc->getValue()->findValue(path, nullptr);
             if (UNLIKELY(!value)) {
                 null_map->get_data()[i] = 1;
@@ -1542,9 +1541,9 @@ struct JsonbContainsUtil {
             }
             // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
             JsonbDocument* doc1 =
-                    JsonbDocument::checkAndCreateDocument(jsonb_value1.data, 
jsonb_value1.size);
+                    JsonbDocument::createDocument(jsonb_value1.data, 
jsonb_value1.size);
             JsonbDocument* doc2 =
-                    JsonbDocument::checkAndCreateDocument(jsonb_value2.data, 
jsonb_value2.size);
+                    JsonbDocument::createDocument(jsonb_value2.data, 
jsonb_value2.size);
 
             JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr);
             JsonbValue* value2 = doc2->getValue();
diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp
index 1c7d0cad207..0dd0d342918 100644
--- a/be/src/vec/jsonb/serialize.cpp
+++ b/be/src/vec/jsonb/serialize.cpp
@@ -86,7 +86,7 @@ void JsonbSerializeUtil::jsonb_to_block(const 
DataTypeSerDeSPtrs& serdes, const
                                         const std::unordered_map<uint32_t, 
uint32_t>& col_id_to_idx,
                                         Block& dst,
                                         const std::vector<std::string>& 
default_values) {
-    auto pdoc = JsonbDocument::checkAndCreateDocument(data, size);
+    auto pdoc = JsonbDocument::createDocument(data, size);
     JsonbDocument& doc = *pdoc;
     size_t num_rows = dst.rows();
     size_t filled_columns = 0;
@@ -120,4 +120,4 @@ void JsonbSerializeUtil::jsonb_to_block(const 
DataTypeSerDeSPtrs& serdes, const
     }
 }
 
-} // namespace doris::vectorized
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/olap/olap_data_convertor.cpp 
b/be/src/vec/olap/olap_data_convertor.cpp
index 20818f6b96a..5bb00832cdf 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -139,7 +139,7 @@ 
OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
         return 
std::make_unique<OlapColumnDataConvertorDecimalV3<Decimal256>>();
     }
     case FieldType::OLAP_FIELD_TYPE_JSONB: {
-        return std::make_unique<OlapColumnDataConvertorVarChar>(true, true);
+        return std::make_unique<OlapColumnDataConvertorVarChar>(true);
     }
     case FieldType::OLAP_FIELD_TYPE_BOOL: {
         return 
std::make_unique<OlapColumnDataConvertorSimple<vectorized::UInt8>>();
@@ -204,10 +204,7 @@ 
OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
 void OlapBlockDataConvertor::set_source_content(const vectorized::Block* 
block, size_t row_pos,
                                                 size_t num_rows) {
     DCHECK(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
-           block->columns() == _convertors.size())
-            << "block=" << block->dump_structure() << ", block rows=" << 
block->rows()
-            << ", row_pos=" << row_pos << ", num_rows=" << num_rows
-            << ", convertors.size=" << _convertors.size();
+           block->columns() == _convertors.size());
     size_t cid = 0;
     for (const auto& typed_column : *block) {
         if (typed_column.column->size() != block->rows()) {
@@ -604,8 +601,8 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorChar::convert_to_olap() {
 
 // class OlapBlockDataConvertor::OlapColumnDataConvertorVarChar
 
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::OlapColumnDataConvertorVarChar(
-        bool check_length, bool is_jsonb)
-        : _check_length(check_length), _is_jsonb(is_jsonb) {}
+        bool check_length)
+        : _check_length(check_length) {}
 
 void OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::set_source_column(
         const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t 
num_rows) {
@@ -649,12 +646,6 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
                             "Not support string len over than "
                             "`string_type_length_soft_limit_bytes` in vec 
engine.");
                 }
-                // Make sure that the json binary data written in is the 
correct jsonb value.
-                if (_is_jsonb &&
-                    !doris::JsonbDocument::checkAndCreateDocument(slice->data, 
slice->size)) {
-                    return Status::InvalidArgument("invalid json binary value: 
{}",
-                                                   
std::string_view(slice->data, slice->size));
-                }
             } else {
                 // TODO: this may not be necessary, check and remove later
                 slice->data = nullptr;
@@ -676,12 +667,6 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
                         "Not support string len over than 
`string_type_length_soft_limit_bytes`"
                         " in vec engine.");
             }
-            // Make sure that the json binary data written in is the correct 
jsonb value.
-            if (_is_jsonb &&
-                !doris::JsonbDocument::checkAndCreateDocument(slice->data, 
slice->size)) {
-                return Status::InvalidArgument("invalid json binary value: {}",
-                                               std::string_view(slice->data, 
slice->size));
-            }
             string_offset = *offset_cur;
             ++slice;
             ++offset_cur;
diff --git a/be/src/vec/olap/olap_data_convertor.h 
b/be/src/vec/olap/olap_data_convertor.h
index 66a91016b39..764a7a4a7c3 100644
--- a/be/src/vec/olap/olap_data_convertor.h
+++ b/be/src/vec/olap/olap_data_convertor.h
@@ -197,7 +197,7 @@ private:
 
     class OlapColumnDataConvertorVarChar : public OlapColumnDataConvertorBase {
     public:
-        OlapColumnDataConvertorVarChar(bool check_length, bool is_jsonb = 
false);
+        OlapColumnDataConvertorVarChar(bool check_length);
         ~OlapColumnDataConvertorVarChar() override = default;
 
         void set_source_column(const ColumnWithTypeAndName& typed_column, 
size_t row_pos,
@@ -209,8 +209,6 @@ private:
 
     private:
         bool _check_length;
-        bool _is_jsonb =
-                false; // Make sure that the json binary data written in is 
the correct jsonb value.
         PaddedPODArray<Slice> _slice;
     };
 
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index 3c9498f1d6d..82674b0aa44 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -240,7 +240,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
         jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
                                   jsonb_writer.getOutput()->getSize());
         StringRef jsonb_data = jsonb_column->get_data_at(0);
-        auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size);
+        auto pdoc = JsonbDocument::createDocument(jsonb_data.data, 
jsonb_data.size);
         JsonbDocument& doc = *pdoc;
         for (auto it = doc->begin(); it != doc->end(); ++it) {
             serde->read_one_cell_from_jsonb(*vec, it->value());
@@ -270,7 +270,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
         jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
                                   jsonb_writer.getOutput()->getSize());
         StringRef jsonb_data = jsonb_column->get_data_at(0);
-        auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size);
+        auto pdoc = JsonbDocument::createDocument(jsonb_data.data, 
jsonb_data.size);
         JsonbDocument& doc = *pdoc;
         for (auto it = doc->begin(); it != doc->end(); ++it) {
             serde->read_one_cell_from_jsonb(*vec, it->value());
diff --git a/be/test/vec/olap/jsonb_value_test.cpp 
b/be/test/vec/olap/jsonb_value_test.cpp
deleted file mode 100644
index 3111163c0be..00000000000
--- a/be/test/vec/olap/jsonb_value_test.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest-message.h>
-#include <gtest/gtest-test-part.h>
-
-#include <string>
-
-#include "gtest/gtest_pred_impl.h"
-#include "vec/columns/column_string.h"
-#include "vec/common/string_ref.h"
-#include "vec/core/columns_with_type_and_name.h"
-#include "vec/data_types/serde/data_type_serde.h"
-#include "vec/olap/olap_data_convertor.h"
-
-namespace doris::vectorized {
-
-TEST(JsonbValueConvertorTest, JsonbValueValid) {
-    // 1. create jsonb column with serde
-    auto input = ColumnString::create();
-    auto dataTypeJsonb = std::make_shared<DataTypeJsonb>();
-    auto serde = dataTypeJsonb->get_serde();
-    vectorized::DataTypeSerDe::FormatOptions options;
-
-    // Test case 1
-    std::string str1 = "{\"key1\": \"value1\"}";
-    Slice slice1 = Slice(str1.data(), str1.length());
-    auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options);
-    ASSERT_TRUE(st1.ok());
-    ASSERT_EQ(input->size(), 1);
-
-    // Test case 2
-    std::string str2 = "{\"key2\": 12345}";
-    Slice slice2 = Slice(str2.data(), str2.length());
-    auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options);
-    ASSERT_TRUE(st2.ok());
-    ASSERT_EQ(input->size(), 2);
-
-    // Test case 3
-    std::string str3 = "{\"key3\": true}";
-    Slice slice3 = Slice(str3.data(), str3.length());
-    auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options);
-    ASSERT_TRUE(st3.ok());
-    ASSERT_EQ(input->size(), 3);
-
-    // Test case 4
-    std::string str4 = "{\"key4\": [1, 2, 3]}";
-    Slice slice4 = Slice(str4.data(), str4.length());
-    auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options);
-    ASSERT_TRUE(st4.ok());
-    ASSERT_EQ(input->size(), 4);
-
-    // Test case 5
-    std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}";
-    Slice slice5 = Slice(str5.data(), str5.length());
-    auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options);
-    ASSERT_TRUE(st5.ok());
-    ASSERT_EQ(input->size(), 5);
-
-    // 2. put column into block
-    vectorized::ColumnWithTypeAndName argument(input->assume_mutable(), 
dataTypeJsonb,
-                                               "jsonb_column");
-    Block block;
-    block.insert(argument);
-
-    // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert 
column data to segment file data
-    auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>();
-    TabletColumn jsonb_column = TabletColumn();
-    jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB);
-    _olap_data_convertor->add_column_data_convertor(jsonb_column);
-    _olap_data_convertor->set_source_content(&block, 0, 5);
-    auto [status, column] = _olap_data_convertor->convert_column_data(0);
-    ASSERT_TRUE(status.ok());
-    ASSERT_NE(column, nullptr);
-
-    // test with null map
-    auto nullable_col = ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create());
-    auto nullable_dataTypeJsonb = 
make_nullable(std::make_shared<DataTypeJsonb>());
-    auto serde1 = nullable_dataTypeJsonb->get_serde();
-
-    auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(1, nullable_col->size());
-
-    // insert null
-    nullable_col->insert_default();
-    ASSERT_EQ(2, nullable_col->size());
-
-    st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(3, nullable_col->size());
-
-    // deserialize null
-    Slice slice_null = "NULL";
-    st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(4, nullable_col->size());
-
-    st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(5, nullable_col->size());
-
-    // 2. put column into block
-    vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(),
-                                                nullable_dataTypeJsonb, 
"jsonb_column_null");
-    block.clear();
-    block.insert(argument1);
-
-    // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert 
column data to segment file data
-    _olap_data_convertor->reset();
-    _olap_data_convertor->add_column_data_convertor(jsonb_column);
-    _olap_data_convertor->set_source_content(&block, 0, 5);
-    auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
-    ASSERT_TRUE(status1.ok()) << status1.to_string();
-    ASSERT_NE(column1, nullptr);
-}
-
-TEST(JsonbValueConvertorTest, JsonbValueInvalid) {
-    // 1. create jsonb column with serde
-    auto input = ColumnString::create();
-    auto dataTypeJsonb = std::make_shared<DataTypeJsonb>();
-    auto serde = dataTypeJsonb->get_serde();
-    vectorized::DataTypeSerDe::FormatOptions options;
-
-    // Test case 1
-    std::string str1 = "{\"key1\": \"value1\"}";
-    Slice slice1 = Slice(str1.data(), str1.length());
-    auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options);
-    ASSERT_TRUE(st1.ok());
-    ASSERT_EQ(input->size(), 1);
-
-    // Test case 2
-    std::string str2 = "{\"key2\": 12345}";
-    Slice slice2 = Slice(str2.data(), str2.length());
-    auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options);
-    ASSERT_TRUE(st2.ok());
-    ASSERT_EQ(input->size(), 2);
-
-    // Test case 3
-    std::string str3 = "{\"key3\": true}";
-    Slice slice3 = Slice(str3.data(), str3.length());
-    auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options);
-    ASSERT_TRUE(st3.ok());
-    ASSERT_EQ(input->size(), 3);
-
-    // Test case 4
-    std::string str4 = "{\"key4\": [1, 2, 3]}";
-    Slice slice4 = Slice(str4.data(), str4.length());
-    auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options);
-    ASSERT_TRUE(st4.ok());
-    ASSERT_EQ(input->size(), 4);
-    // invalid jsonb data
-    auto& data = input->get_chars();
-    data.emplace_back('s');
-
-    // Test case 5
-    std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}";
-    Slice slice5 = Slice(str5.data(), str5.length());
-    auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options);
-    ASSERT_TRUE(st5.ok());
-    ASSERT_EQ(input->size(), 5);
-
-    // 2. put column into block
-    vectorized::ColumnWithTypeAndName argument(input->assume_mutable(), 
dataTypeJsonb,
-                                               "jsonb_column");
-    Block block;
-    block.insert(argument);
-
-    // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert 
column data to segment file data
-    auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>();
-    TabletColumn jsonb_column = TabletColumn();
-    jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB);
-    _olap_data_convertor->add_column_data_convertor(jsonb_column);
-    _olap_data_convertor->set_source_content(&block, 0, 5);
-    auto [status, column] = _olap_data_convertor->convert_column_data(0);
-    // invalid will make error
-    ASSERT_FALSE(status.ok());
-    ASSERT_TRUE(status.to_string().find("invalid json binary value") != 
std::string::npos);
-    ASSERT_NE(column, nullptr);
-
-    // test with null map
-    auto nullable_col = ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create());
-    auto nullable_dataTypeJsonb = 
make_nullable(std::make_shared<DataTypeJsonb>());
-    auto serde1 = nullable_dataTypeJsonb->get_serde();
-
-    auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(1, nullable_col->size());
-
-    // insert null
-    nullable_col->insert_default();
-    ASSERT_EQ(2, nullable_col->size());
-
-    st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(3, nullable_col->size());
-    // invalid jsonb data
-    auto string_data = 
assert_cast<ColumnString*>(nullable_col->get_nested_column_ptr().get());
-    auto& dat = string_data->get_chars();
-    dat.emplace_back('s');
-
-    // deserialize null
-    Slice slice_null = "NULL";
-    st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(4, nullable_col->size());
-
-    st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3, 
options);
-    ASSERT_TRUE(st.ok());
-    ASSERT_EQ(5, nullable_col->size());
-
-    // 2. put column into block
-    vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(),
-                                                nullable_dataTypeJsonb, 
"jsonb_column_null");
-    block.clear();
-    block.insert(argument1);
-
-    // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert 
column data to segment file data
-    _olap_data_convertor->reset();
-    _olap_data_convertor->add_column_data_convertor(jsonb_column);
-    _olap_data_convertor->set_source_content(&block, 0, 5);
-    auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
-    ASSERT_FALSE(status.ok());
-    ASSERT_TRUE(status.to_string().find("invalid json binary value") != 
std::string::npos);
-    ASSERT_NE(column, nullptr);
-}
-
-} // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to