This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new b62273acb45 [refactor](jsonb) Make the checkAndCreateDocument function 
return a Status (#51673)
b62273acb45 is described below

commit b62273acb4504f1f76723e39161500ea66e3d594
Author: Jerry Hu <hushengg...@selectdb.com>
AuthorDate: Tue Jun 17 09:40:57 2025 +0800

    [refactor](jsonb) Make the checkAndCreateDocument function return a Status 
(#51673)
---
 be/src/util/jsonb_document.h                       | 250 ++++++++++-----------
 be/src/util/jsonb_utils.h                          |  24 +-
 be/src/util/jsonb_writer.h                         |  11 +-
 .../exprs/table_function/vexplode_json_array.cpp   |   5 +-
 .../exprs/table_function/vexplode_json_object.cpp  |   5 +-
 be/src/vec/functions/function_cast.h               |  12 +-
 be/src/vec/functions/function_jsonb.cpp            |  40 ++--
 be/src/vec/jsonb/serialize.cpp                     |   5 +-
 be/src/vec/olap/olap_data_convertor.cpp            |  16 +-
 .../vec/data_types/common_data_type_serder_test.h  |   5 +-
 .../vec/data_types/serde/data_type_serde_test.cpp  |   8 +-
 be/test/vec/olap/jsonb_value_test.cpp              |   6 +-
 12 files changed, 198 insertions(+), 189 deletions(-)

diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index fb87114f886..7f50c4012ba 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -66,10 +66,6 @@
 #ifndef JSONB_JSONBDOCUMENT_H
 #define JSONB_JSONBDOCUMENT_H
 
-#include <assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-
 #include <algorithm>
 #include <cctype>
 #include <charconv>
@@ -78,6 +74,7 @@
 #include <type_traits>
 
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/status.h"
 
 // #include "util/string_parser.hpp"
 
@@ -172,17 +169,14 @@ constexpr unsigned int ARRAY_CODE = 1;
  */
 class JsonbDocument {
 public:
-    // Prepare a document in the buffer
-    static JsonbDocument* makeDocument(char* pb, uint32_t size, JsonbType 
type);
-    static JsonbDocument* makeDocument(char* pb, uint32_t size, const 
JsonbValue* rval);
-
     // create an JsonbDocument object from JSONB packed bytes
-    static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size);
+    [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t 
size,
+                                                       JsonbDocument** doc);
 
     // create an JsonbValue from JSONB packed bytes
     static JsonbValue* createValue(const char* pb, uint32_t size);
 
-    uint8_t version() { return header_.ver_; }
+    uint8_t version() const { return header_.ver_; }
 
     JsonbValue* getValue() { return ((JsonbValue*)payload_); }
 
@@ -194,7 +188,6 @@ public:
 
     const ObjectVal* operator->() const { return ((const ObjectVal*)payload_); 
}
 
-public:
     bool operator==(const JsonbDocument& other) const {
         assert(false);
         return false;
@@ -409,11 +402,10 @@ private:
 template <class Iter_Type, class Cont_Type>
 class JsonbFwdIteratorT {
 public:
-    typedef Iter_Type iterator;
-    typedef typename std::iterator_traits<Iter_Type>::pointer pointer;
-    typedef typename std::iterator_traits<Iter_Type>::reference reference;
+    using iterator = Iter_Type;
+    using pointer = typename std::iterator_traits<Iter_Type>::pointer;
+    using reference = typename std::iterator_traits<Iter_Type>::reference;
 
-public:
     explicit JsonbFwdIteratorT() : current_(nullptr) {}
     explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
 
@@ -452,10 +444,10 @@ private:
     iterator current_;
 };
 
-typedef int (*hDictInsert)(const char* key, unsigned len);
-typedef int (*hDictFind)(const char* key, unsigned len);
+using hDictInsert = int (*)(const char*, unsigned int);
+using hDictFind = int (*)(const char*, unsigned int);
 
-typedef std::underlying_type<JsonbType>::type JsonbTypeUnder;
+using JsonbTypeUnder = std::underlying_type_t<JsonbType>;
 
 /*
  * JsonbKeyValue class defines JSONB key type, as described below.
@@ -484,7 +476,9 @@ class JsonbKeyValue {
 public:
     // now we use sMaxKeyId to represent an empty key
     static const int sMaxKeyId = 65535;
-    typedef uint16_t keyid_type;
+    using keyid_type = uint16_t;
+
+    JsonbKeyValue() = delete;
 
     static const uint8_t sMaxKeyLen = 64;
 
@@ -512,8 +506,6 @@ private:
         keyid_type id_;
         char str_[1];
     } key_;
-
-    JsonbKeyValue();
 };
 
 /*
@@ -620,7 +612,7 @@ private:
     NumberValT();
 };
 
-typedef NumberValT<int8_t> JsonbInt8Val;
+using JsonbInt8Val = NumberValT<int8_t>;
 
 // override setVal for Int8Val
 template <>
@@ -633,7 +625,7 @@ inline bool JsonbInt8Val::setVal(int8_t value) {
     return true;
 }
 
-typedef NumberValT<int16_t> JsonbInt16Val;
+using JsonbInt16Val = NumberValT<int16_t>;
 
 // override setVal for Int16Val
 template <>
@@ -645,7 +637,7 @@ inline bool JsonbInt16Val::setVal(int16_t value) {
     num_ = value;
     return true;
 }
-typedef NumberValT<int32_t> JsonbInt32Val;
+using JsonbInt32Val = NumberValT<int32_t>;
 
 // override setVal for Int32Val
 template <>
@@ -658,7 +650,7 @@ inline bool JsonbInt32Val::setVal(int32_t value) {
     return true;
 }
 
-typedef NumberValT<int64_t> JsonbInt64Val;
+using JsonbInt64Val = NumberValT<int64_t>;
 
 // override setVal for Int64Val
 template <>
@@ -671,7 +663,7 @@ inline bool JsonbInt64Val::setVal(int64_t value) {
     return true;
 }
 
-typedef NumberValT<int128_t> JsonbInt128Val;
+using JsonbInt128Val = NumberValT<int128_t>;
 
 // override setVal for Int128Val
 template <>
@@ -684,7 +676,7 @@ inline bool JsonbInt128Val::setVal(int128_t value) {
     return true;
 }
 
-typedef NumberValT<double> JsonbDoubleVal;
+using JsonbDoubleVal = NumberValT<double>;
 
 // override setVal for DoubleVal
 template <>
@@ -697,7 +689,7 @@ inline bool JsonbDoubleVal::setVal(double value) {
     return true;
 }
 
-typedef NumberValT<float> JsonbFloatVal;
+using JsonbFloatVal = NumberValT<float>;
 
 // override setVal for DoubleVal
 template <>
@@ -733,18 +725,21 @@ public:
         switch (type_) {
         case JsonbType::T_Int8:
             if (val < std::numeric_limits<int8_t>::min() ||
-                val > std::numeric_limits<int8_t>::max())
+                val > std::numeric_limits<int8_t>::max()) {
                 return false;
+            }
             return ((JsonbInt8Val*)this)->setVal((int8_t)val);
         case JsonbType::T_Int16:
             if (val < std::numeric_limits<int16_t>::min() ||
-                val > std::numeric_limits<int16_t>::max())
+                val > std::numeric_limits<int16_t>::max()) {
                 return false;
+            }
             return ((JsonbInt16Val*)this)->setVal((int16_t)val);
         case JsonbType::T_Int32:
             if (val < std::numeric_limits<int32_t>::min() ||
-                val > std::numeric_limits<int32_t>::max())
+                val > std::numeric_limits<int32_t>::max()) {
                 return false;
+            }
             return ((JsonbInt32Val*)this)->setVal((int32_t)val);
         case JsonbType::T_Int64:
             return ((JsonbInt64Val*)this)->setVal((int64_t)val);
@@ -833,7 +828,9 @@ public:
   */
     size_t length() {
         // It's an empty string
-        if (0 == size_) return size_;
+        if (0 == size_) {
+            return size_;
+        }
         // The string stored takes all the spaces in payload_
         if (payload_[size_ - 1] != 0) {
             return size_;
@@ -847,14 +844,15 @@ public:
     // all other strings: -1
     int getBoolVal() {
         if (size_ == 4 && tolower(payload_[0]) == 't' && tolower(payload_[1]) 
== 'r' &&
-            tolower(payload_[2]) == 'u' && tolower(payload_[3]) == 'e')
+            tolower(payload_[2]) == 'u' && tolower(payload_[3]) == 'e') {
             return 1;
-        else if (size_ == 5 && tolower(payload_[0]) == 'f' && 
tolower(payload_[1]) == 'a' &&
-                 tolower(payload_[2]) == 'l' && tolower(payload_[3]) == 's' &&
-                 tolower(payload_[4]) == 'e')
+        } else if (size_ == 5 && tolower(payload_[0]) == 'f' && 
tolower(payload_[1]) == 'a' &&
+                   tolower(payload_[2]) == 'l' && tolower(payload_[3]) == 's' 
&&
+                   tolower(payload_[4]) == 'e') {
             return 0;
-        else
+        } else {
             return -1;
+        }
     }
 
 private:
@@ -889,13 +887,12 @@ protected:
  */
 class ObjectVal : public ContainerVal {
 public:
-    typedef JsonbKeyValue value_type;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-    typedef JsonbFwdIteratorT<pointer, ObjectVal> iterator;
-    typedef JsonbFwdIteratorT<const_pointer, ObjectVal> const_iterator;
+    using value_type = JsonbKeyValue;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using iterator = JsonbFwdIteratorT<pointer, ObjectVal>;
+    using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>;
 
-public:
     const_iterator search(const char* key, hDictFind handler = nullptr) const {
         return const_cast<ObjectVal*>(this)->search(key, handler);
     }
@@ -913,7 +910,9 @@ public:
     }
 
     iterator search(const char* key, unsigned int klen, hDictFind handler = 
nullptr) {
-        if (!key || !klen) return end();
+        if (!key || !klen) {
+            return end();
+        }
 
         int key_id = -1;
         if (handler && (key_id = handler(key, klen)) >= 0) {
@@ -923,13 +922,15 @@ public:
     }
 
     iterator search(int key_id) {
-        if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) return end();
+        if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) {
+            return end();
+        }
 
         const char* pch = payload_;
         const char* fence = payload_ + size_;
 
         while (pch < fence) {
-            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
+            auto* pkey = (JsonbKeyValue*)(pch);
             if (!pkey->klen() && key_id == pkey->getKeyId()) {
                 return iterator(pkey);
             }
@@ -947,7 +948,7 @@ public:
 
         unsigned int num = 0;
         while (pch < fence) {
-            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
+            auto* pkey = (JsonbKeyValue*)(pch);
             ++num;
             pch += pkey->numPackedBytes();
         }
@@ -963,8 +964,10 @@ public:
 
         unsigned int num = 0;
         while (pch < fence) {
-            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
-            if (num == i) return pkey;
+            auto* pkey = (JsonbKeyValue*)(pch);
+            if (num == i) {
+                return pkey;
+            }
             ++num;
             pch += pkey->numPackedBytes();
         }
@@ -985,21 +988,27 @@ public:
 
     // find the JSONB value by a key string (null terminated)
     JsonbValue* find(const char* key, hDictFind handler = nullptr) {
-        if (!key) return nullptr;
+        if (!key) {
+            return nullptr;
+        }
         return find(key, (unsigned int)strlen(key), handler);
     }
 
     // find the JSONB value by a key string (with length)
     JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = 
nullptr) {
         iterator kv = search(key, klen, handler);
-        if (end() == kv) return nullptr;
+        if (end() == kv) {
+            return nullptr;
+        }
         return kv->value();
     }
 
     // find the JSONB value by a key dictionary ID
     JsonbValue* find(int key_id) {
         iterator kv = search(key_id);
-        if (end() == kv) return nullptr;
+        if (end() == kv) {
+            return nullptr;
+        }
         return kv->value();
     }
 
@@ -1017,7 +1026,7 @@ private:
         const char* fence = payload_ + size_;
 
         while (pch < fence) {
-            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
+            auto* pkey = (JsonbKeyValue*)(pch);
             if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) 
== 0) {
                 return iterator(pkey);
             }
@@ -1038,15 +1047,27 @@ private:
  */
 class ArrayVal : public ContainerVal {
 public:
+    using value_type = JsonbValue;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using iterator = JsonbFwdIteratorT<pointer, ArrayVal>;
+    using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>;
+
     // get the JSONB value at index
     JsonbValue* get(int idx) const {
-        if (idx < 0) return nullptr;
+        if (idx < 0) {
+            return nullptr;
+        }
 
         const char* pch = payload_;
         const char* fence = payload_ + size_;
 
-        while (pch < fence && idx-- > 0) pch += 
((JsonbValue*)pch)->numPackedBytes();
-        if (idx > 0 || pch == fence) return nullptr;
+        while (pch < fence && idx-- > 0) {
+            pch += ((JsonbValue*)pch)->numPackedBytes();
+        }
+        if (idx > 0 || pch == fence) {
+            return nullptr;
+        }
 
         return (JsonbValue*)pch;
     }
@@ -1067,12 +1088,6 @@ public:
         return num;
     }
 
-    typedef JsonbValue value_type;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-    typedef JsonbFwdIteratorT<pointer, ArrayVal> iterator;
-    typedef JsonbFwdIteratorT<const_pointer, ArrayVal> const_iterator;
-
     iterator begin() { return iterator((pointer)payload_); }
 
     const_iterator begin() const { return const_iterator((pointer)payload_); }
@@ -1085,76 +1100,29 @@ private:
     ArrayVal();
 };
 
-// Prepare an empty document
-// input: pb - buuffer/packed bytes for jsonb document
-//        size - size of the buffer
-//        type - value type in the document
-inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, 
JsonbType type) {
+inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t 
size,
+                                                    JsonbDocument** doc) {
+    *doc = nullptr;
     if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
-        return nullptr;
+        return Status::InvalidArgument("Invalid JSONB document: too small 
size({}) or null pointer",
+                                       size);
     }
 
-    if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) {
-        return nullptr;
-    }
-    JsonbDocument* doc = (JsonbDocument*)pb;
-    // Write header
-    doc->header_.ver_ = JSONB_VER;
-    JsonbValue* value = doc->getValue();
-    // Write type
-    value->type_ = type;
-
-    // Set empty JsonbValue
-    if (type == JsonbType::T_Object || type == JsonbType::T_Array)
-        ((ContainerVal*)value)->size_ = 0;
-    if (type == JsonbType::T_String || type == JsonbType::T_Binary)
-        ((JsonbBlobVal*)value)->size_ = 0;
-    return doc;
-}
-
-// Prepare a document from an JsonbValue
-// input: pb - buuffer/packed bytes for jsonb document
-//        size - size of the buffer
-//        rval - jsonb value to be copied into the document
-inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, 
const JsonbValue* rval) {
-    // checking if the buffer is big enough to store the value
-    if (!pb || !rval || size < sizeof(JsonbHeader) + rval->numPackedBytes()) {
-        return nullptr;
-    }
-
-    JsonbType type = rval->type();
-    if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) {
-        return nullptr;
+    auto* doc_ptr = (JsonbDocument*)pb;
+    if (doc_ptr->header_.ver_ != JSONB_VER) {
+        return Status::InvalidArgument("Invalid JSONB document: invalid 
version({})",
+                                       doc_ptr->header_.ver_);
     }
-    JsonbDocument* doc = (JsonbDocument*)pb;
-    // Write header
-    doc->header_.ver_ = JSONB_VER;
-    // get the starting byte of the value
-    JsonbValue* value = doc->getValue();
-    // binary copy of the rval
-    if (value != rval) // copy not necessary if values are the same
-        memmove(value, rval, rval->numPackedBytes());
 
-    return doc;
-}
-
-inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb, 
size_t size) {
-    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
-        return nullptr;
-    }
-
-    JsonbDocument* doc = (JsonbDocument*)pb;
-    if (doc->header_.ver_ != JSONB_VER) {
-        return nullptr;
-    }
-
-    JsonbValue* val = (JsonbValue*)doc->payload_;
+    auto* val = (JsonbValue*)doc_ptr->payload_;
     if (val->type() < JsonbType::T_Null || val->type() >= JsonbType::NUM_TYPES 
||
         size != sizeof(JsonbHeader) + val->numPackedBytes()) {
-        return nullptr;
+        return Status::InvalidArgument("Invalid JSONB document: invalid 
type({}) or size({})",
+                                       
static_cast<JsonbTypeUnder>(val->type()), size);
     }
 
-    return doc;
+    *doc = doc_ptr;
+    return Status::OK();
 }
 inline void JsonbDocument::setValue(const JsonbValue* value) {
     memcpy(payload_, value, value->numPackedBytes());
@@ -1165,12 +1133,12 @@ inline JsonbValue* JsonbDocument::createValue(const 
char* pb, uint32_t size) {
         return nullptr;
     }
 
-    JsonbDocument* doc = (JsonbDocument*)pb;
+    auto* doc = (JsonbDocument*)pb;
     if (doc->header_.ver_ != JSONB_VER) {
         return nullptr;
     }
 
-    JsonbValue* val = (JsonbValue*)doc->payload_;
+    auto* val = (JsonbValue*)doc->payload_;
     if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
         return nullptr;
     }
@@ -1184,7 +1152,7 @@ inline unsigned int JsonbDocument::numPackedBytes() const 
{
 
 inline unsigned int JsonbKeyValue::numPackedBytes() const {
     unsigned int ks = keyPackedBytes();
-    JsonbValue* val = (JsonbValue*)(((char*)this) + ks);
+    auto* val = (JsonbValue*)(((char*)this) + ks);
     return ks + val->numPackedBytes();
 }
 
@@ -1323,8 +1291,8 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const {
     case JsonbType::T_String:
     case JsonbType::T_Binary: {
         if (rhs->isString()) {
-            auto str_value1 = (JsonbStringVal*)this;
-            auto str_value2 = (JsonbStringVal*)rhs;
+            auto* str_value1 = (JsonbStringVal*)this;
+            auto* str_value2 = (JsonbStringVal*)rhs;
             return str_value1->length() == str_value2->length() &&
                    std::memcmp(str_value1->getBlob(), str_value2->getBlob(),
                                str_value1->length()) == 0;
@@ -1335,7 +1303,9 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const {
         int lhs_num = ((ArrayVal*)this)->numElem();
         if (rhs->isArray()) {
             int rhs_num = ((ArrayVal*)rhs)->numElem();
-            if (rhs_num > lhs_num) return false;
+            if (rhs_num > lhs_num) {
+                return false;
+            }
             int contains_num = 0;
             for (int i = 0; i < lhs_num; ++i) {
                 for (int j = 0; j < rhs_num; ++j) {
@@ -1356,13 +1326,14 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const 
{
     }
     case JsonbType::T_Object: {
         if (rhs->isObject()) {
-            auto str_value1 = (ObjectVal*)this;
-            auto str_value2 = (ObjectVal*)rhs;
+            auto* str_value1 = (ObjectVal*)this;
+            auto* str_value2 = (ObjectVal*)rhs;
             for (int i = 0; i < str_value2->numElem(); ++i) {
                 JsonbKeyValue* key = str_value2->getJsonbKeyValue(i);
                 JsonbValue* value = str_value1->find(key->getKeyStr(), 
key->klen());
-                if (key != nullptr && value != nullptr && 
!value->contains(key->value()))
+                if (key != nullptr && value != nullptr && 
!value->contains(key->value())) {
                     return false;
+                }
             }
             return true;
         }
@@ -1411,7 +1382,9 @@ inline const char* JsonbValue::getValuePtr() const {
 
 inline bool JsonbPath::seek(const char* key_path, size_t kp_len) {
     //path invalid
-    if (!key_path || kp_len == 0) return false;
+    if (!key_path || kp_len == 0) {
+        return false;
+    }
     Stream stream(key_path, kp_len);
     stream.skip_whitespace();
     if (stream.exhausted() || stream.read() != SCOPE) {
@@ -1447,7 +1420,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path, 
hDictFind handler) {
                                ->find(path.get_leg_from_leg_vector(i)->leg_ptr,
                                       
path.get_leg_from_leg_vector(i)->leg_len, handler);
 
-                if (!pval) return nullptr;
+                if (!pval) {
+                    return nullptr;
+                }
                 continue;
             } else {
                 return nullptr;
@@ -1470,8 +1445,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path, 
hDictFind handler) {
 
             if (pval->type_ != JsonbType::T_Array ||
                 path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
-                path.get_leg_from_leg_vector(i)->leg_len != 0)
+                path.get_leg_from_leg_vector(i)->leg_len != 0) {
                 return nullptr;
+            }
 
             if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
                 pval = 
((ArrayVal*)pval)->get(path.get_leg_from_leg_vector(i)->array_index);
@@ -1481,7 +1457,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path, 
hDictFind handler) {
                                      
path.get_leg_from_leg_vector(i)->array_index);
             }
 
-            if (!pval) return nullptr;
+            if (!pval) {
+                return nullptr;
+            }
             continue;
         }
         }
diff --git a/be/src/util/jsonb_utils.h b/be/src/util/jsonb_utils.h
index c32588e2610..a16d8ba69d2 100644
--- a/be/src/util/jsonb_utils.h
+++ b/be/src/util/jsonb_utils.h
@@ -21,8 +21,7 @@
 #ifndef JSONB_JSONBUTIL_H
 #define JSONB_JSONBUTIL_H
 
-#include <sstream>
-
+#include "common/exception.h"
 #include "jsonb_document.h"
 #include "jsonb_stream.h"
 #include "jsonb_writer.h"
@@ -39,15 +38,17 @@ public:
     JsonbToJson() : os_(buffer_, OUT_BUF_SIZE) {}
 
     // get json string
-    const std::string to_json_string(const char* data, size_t size) {
-        JsonbDocument* pdoc = 
doris::JsonbDocument::checkAndCreateDocument(data, size);
-        if (!pdoc) {
-            LOG(FATAL) << "invalid json binary value: " << 
std::string_view(data, size);
+
+    std::string to_json_string(const char* data, size_t size) {
+        JsonbDocument* pdoc;
+        auto st = doris::JsonbDocument::checkAndCreateDocument(data, size, 
&pdoc);
+        if (!st.ok()) {
+            throw Exception(st);
         }
         return to_json_string(pdoc->getValue());
     }
 
-    const std::string to_json_string(const JsonbValue* val) {
+    std::string to_json_string(const JsonbValue* val) {
         os_.clear();
         os_.seekp(0);
 
@@ -61,7 +62,7 @@ public:
         return json_string;
     }
 
-    static const std::string jsonb_to_json_string(const char* data, size_t 
size) {
+    static std::string jsonb_to_json_string(const char* data, size_t size) {
         JsonbToJson jsonb_to_json;
         return jsonb_to_json.to_json_string(data, size);
     }
@@ -141,9 +142,9 @@ private:
         }
         char char_buffer[16];
         for (const char* ptr = str; ptr != str + len && *ptr; ++ptr) {
-            if ((unsigned char)*ptr > 31 && *ptr != '\"' && *ptr != '\\')
+            if ((unsigned char)*ptr > 31 && *ptr != '\"' && *ptr != '\\') {
                 os_.put(*ptr);
-            else {
+            } else {
                 os_.put('\\');
                 unsigned char token;
                 switch (token = *ptr) {
@@ -235,7 +236,6 @@ private:
         os_.put(']');
     }
 
-private:
     JsonbOutStream os_;
     char buffer_[OUT_BUF_SIZE];
 };
@@ -294,7 +294,7 @@ private:
     }
     JsonbWriterT<OS_TYPE> writer_;
 };
-typedef JsonbValueCreaterT<JsonbOutStream> JsonbValueCreater;
+using JsonbValueCreater = JsonbValueCreaterT<JsonbOutStream>;
 } // namespace doris
 
 #endif // JSONB_JSONBUTIL_H
diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h
index f92d8a4096b..b71b6ef6aa4 100644
--- a/be/src/util/jsonb_writer.h
+++ b/be/src/util/jsonb_writer.h
@@ -40,6 +40,7 @@
 #include <stack>
 #include <string>
 
+#include "common/exception.h"
 #include "jsonb_document.h"
 #include "jsonb_stream.h"
 
@@ -479,8 +480,14 @@ public:
 
     OS_TYPE* getOutput() { return os_; }
     JsonbDocument* getDocument() {
-        return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
-                                                     getOutput()->getSize());
+        JsonbDocument* doc = nullptr;
+        auto st = 
JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
+                                                        
getOutput()->getSize(), &doc);
+        if (st.ok()) {
+            return doc;
+        } else {
+            throw doris::Exception(st);
+        }
     }
 
     JsonbValue* getValue() {
diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp 
b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
index 7594d9a5cc6..0742d497c7b 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
@@ -63,8 +63,9 @@ void 
VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) {
     StringRef text = _text_column->get_data_at(row_idx);
     if (text.data != nullptr) {
         if (WhichDataType(_text_datatype).is_json()) {
-            JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(text.data, text.size);
-            if (doc && doc->getValue() && doc->getValue()->isArray()) {
+            JsonbDocument* doc = nullptr;
+            auto st = JsonbDocument::checkAndCreateDocument(text.data, 
text.size, &doc);
+            if (st.ok() && doc && doc->getValue() && 
doc->getValue()->isArray()) {
                 auto* a = (ArrayVal*)doc->getValue();
                 if (a->numElem() > 0) {
                     _cur_size = _parsed_data.set_output(*a, a->numElem());
diff --git a/be/src/vec/exprs/table_function/vexplode_json_object.cpp 
b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
index 38a00d60b19..aa92d8238ae 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_object.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
@@ -54,8 +54,9 @@ void VExplodeJsonObjectTableFunction::process_row(size_t 
row_idx) {
 
     StringRef text = _json_object_column->get_data_at(row_idx);
     if (text.data != nullptr) {
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, 
text.size);
-        if (!doc || !doc->getValue()) [[unlikely]] {
+        JsonbDocument* doc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(text.data, text.size, 
&doc);
+        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
             // error jsonb, put null into output, cur_size = 0 , we will 
insert_default
             return;
         }
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 251a711e64c..99f5f625971 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -718,8 +718,9 @@ struct ConvertImplGenericFromJsonb {
             const bool is_dst_string = is_string_or_fixed_string(data_type_to);
             for (size_t i = 0; i < size; ++i) {
                 const auto& val = col_from_string->get_data_at(i);
-                JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(val.data, val.size);
-                if (UNLIKELY(!doc || !doc->getValue())) {
+                JsonbDocument* doc = nullptr;
+                auto st = JsonbDocument::checkAndCreateDocument(val.data, 
val.size, &doc);
+                if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
                     (*vec_null_map_to)[i] = 1;
                     col_to->insert_default();
                     continue;
@@ -762,7 +763,7 @@ struct ConvertImplGenericFromJsonb {
                     continue;
                 }
                 ReadBuffer read_buffer((char*)(input_str.data()), 
input_str.size());
-                Status st = data_type_to->from_string(read_buffer, col_to);
+                st = data_type_to->from_string(read_buffer, col_to);
                 // if parsing failed, will return null
                 (*vec_null_map_to)[i] = !st.ok();
                 if (!st.ok()) {
@@ -881,8 +882,9 @@ struct ConvertImplFromJsonb {
                 }
 
                 // doc is NOT necessary to be deleted since JsonbDocument will 
not allocate memory
-                JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(val.data, val.size);
-                if (UNLIKELY(!doc || !doc->getValue())) {
+                JsonbDocument* doc = nullptr;
+                auto st = JsonbDocument::checkAndCreateDocument(val.data, 
val.size, &doc);
+                if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
                     null_map[i] = 1;
                     res[i] = 0;
                     continue;
diff --git a/be/src/vec/functions/function_jsonb.cpp 
b/be/src/vec/functions/function_jsonb.cpp
index 08d2c974db0..af4aeed9e5d 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -557,8 +557,9 @@ private:
                 continue;
             }
             const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
-            JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, 
l_size);
-            if (UNLIKELY(!doc || !doc->getValue())) {
+            JsonbDocument* doc = nullptr;
+            auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, 
&doc);
+            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
                 dst_arr.clear();
                 return Status::InvalidArgument("jsonb data is invalid");
             }
@@ -665,8 +666,9 @@ private:
     static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const 
char* l_raw_str,
                                               int l_str_size, JsonbPath& path) 
{
         // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, 
l_str_size);
-        if (UNLIKELY(!doc || !doc->getValue())) {
+        JsonbDocument* doc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, 
&doc);
+        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
             return;
         }
 
@@ -760,8 +762,9 @@ private:
         }
 
         // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, 
l_size);
-        if (UNLIKELY(!doc || !doc->getValue())) {
+        JsonbDocument* doc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
+        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
             StringOP::push_null_string(i, res_data, res_offsets, null_map);
             return;
         }
@@ -886,10 +889,11 @@ public:
                 writer->writeStartArray();
 
                 // doc is NOT necessary to be deleted since JsonbDocument will 
not allocate memory
-                JsonbDocument* doc = 
JsonbDocument::checkAndCreateDocument(l_raw, l_size);
+                JsonbDocument* doc = nullptr;
+                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, 
&doc);
 
                 for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
-                    if (UNLIKELY(!doc || !doc->getValue())) {
+                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
                         writer->writeNull();
                         continue;
                     }
@@ -1027,8 +1031,9 @@ private:
         }
 
         // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-        JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, 
l_str_size);
-        if (UNLIKELY(!doc || !doc->getValue())) {
+        JsonbDocument* doc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, 
&doc);
+        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
             null_map[i] = 1;
             res[i] = 0;
             return;
@@ -1406,8 +1411,9 @@ struct JsonbLengthUtil {
             }
             auto jsonb_value = jsonb_data_column->get_data_at(i);
             // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-            JsonbDocument* doc =
-                    JsonbDocument::checkAndCreateDocument(jsonb_value.data, 
jsonb_value.size);
+            JsonbDocument* doc = nullptr;
+            
RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data,
+                                                                  
jsonb_value.size, &doc));
             JsonbValue* value = doc->getValue()->findValue(path, nullptr);
             if (UNLIKELY(!value)) {
                 null_map->get_data()[i] = 1;
@@ -1541,10 +1547,12 @@ struct JsonbContainsUtil {
                 continue;
             }
             // doc is NOT necessary to be deleted since JsonbDocument will not 
allocate memory
-            JsonbDocument* doc1 =
-                    JsonbDocument::checkAndCreateDocument(jsonb_value1.data, 
jsonb_value1.size);
-            JsonbDocument* doc2 =
-                    JsonbDocument::checkAndCreateDocument(jsonb_value2.data, 
jsonb_value2.size);
+            JsonbDocument* doc1 = nullptr;
+            
RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
+                                                                  
jsonb_value1.size, &doc1));
+            JsonbDocument* doc2 = nullptr;
+            
RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
+                                                                  
jsonb_value2.size, &doc2));
 
             JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr);
             JsonbValue* value2 = doc2->getValue();
diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp
index d75d332f40c..cd97e3bf5ca 100644
--- a/be/src/vec/jsonb/serialize.cpp
+++ b/be/src/vec/jsonb/serialize.cpp
@@ -24,6 +24,7 @@
 #include <unordered_set>
 #include <vector>
 
+#include "common/status.h"
 #include "olap/tablet_schema.h"
 #include "runtime/descriptors.h"
 #include "runtime/jsonb_value.h"
@@ -91,7 +92,9 @@ void JsonbSerializeUtil::jsonb_to_block(const 
DataTypeSerDeSPtrs& serdes, const
                                         const std::unordered_map<uint32_t, 
uint32_t>& col_id_to_idx,
                                         Block& dst, const 
std::vector<std::string>& default_values,
                                         const std::unordered_set<int>& 
include_cids) {
-    auto pdoc = JsonbDocument::checkAndCreateDocument(data, size);
+    JsonbDocument* pdoc = nullptr;
+    THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(data, size, &pdoc));
+
     JsonbDocument& doc = *pdoc;
     size_t num_rows = dst.rows();
     size_t filled_columns = 0;
diff --git a/be/src/vec/olap/olap_data_convertor.cpp 
b/be/src/vec/olap/olap_data_convertor.cpp
index db441b671e9..79b0866bc77 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -668,10 +668,10 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
                             "`string_type_length_soft_limit_bytes` in vec 
engine.");
                 }
                 // Make sure that the json binary data written in is the 
correct jsonb value.
-                if (_is_jsonb &&
-                    !doris::JsonbDocument::checkAndCreateDocument(slice->data, 
slice->size)) {
-                    return Status::InvalidArgument("invalid json binary value: 
{}",
-                                                   
std::string_view(slice->data, slice->size));
+                if (_is_jsonb) {
+                    JsonbDocument* doc = nullptr;
+                    
RETURN_IF_ERROR(doris::JsonbDocument::checkAndCreateDocument(
+                            slice->data, slice->size, &doc));
                 }
             } else {
                 // TODO: this may not be necessary, check and remove later
@@ -695,10 +695,10 @@ Status 
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
                         " in vec engine.");
             }
             // Make sure that the json binary data written in is the correct 
jsonb value.
-            if (_is_jsonb &&
-                !doris::JsonbDocument::checkAndCreateDocument(slice->data, 
slice->size)) {
-                return Status::InvalidArgument("invalid json binary value: {}",
-                                               std::string_view(slice->data, 
slice->size));
+            if (_is_jsonb) {
+                JsonbDocument* doc = nullptr;
+                
RETURN_IF_ERROR(doris::JsonbDocument::checkAndCreateDocument(slice->data,
+                                                                             
slice->size, &doc));
             }
             string_offset = *offset_cur;
             ++slice;
diff --git a/be/test/vec/data_types/common_data_type_serder_test.h 
b/be/test/vec/data_types/common_data_type_serder_test.h
index ef8d07323df..f8c3488e684 100644
--- a/be/test/vec/data_types/common_data_type_serder_test.h
+++ b/be/test/vec/data_types/common_data_type_serder_test.h
@@ -292,7 +292,10 @@ public:
         EXPECT_EQ(jsonb_column->size(), load_cols[0]->size());
         for (size_t r = 0; r < jsonb_column->size(); ++r) {
             StringRef jsonb_data = jsonb_column->get_data_at(r);
-            auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size);
+            JsonbDocument* pdoc = nullptr;
+            auto st =
+                    JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size, &pdoc);
+            ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " << 
st.to_string();
             JsonbDocument& doc = *pdoc;
             size_t cIdx = 0;
             for (auto it = doc->begin(); it != doc->end(); ++it) {
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index 3c9498f1d6d..24d89953459 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -240,7 +240,9 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
         jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
                                   jsonb_writer.getOutput()->getSize());
         StringRef jsonb_data = jsonb_column->get_data_at(0);
-        auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size);
+        JsonbDocument* pdoc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size, &pdoc);
+        ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " << 
st.to_string();
         JsonbDocument& doc = *pdoc;
         for (auto it = doc->begin(); it != doc->end(); ++it) {
             serde->read_one_cell_from_jsonb(*vec, it->value());
@@ -270,7 +272,9 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
         jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
                                   jsonb_writer.getOutput()->getSize());
         StringRef jsonb_data = jsonb_column->get_data_at(0);
-        auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size);
+        JsonbDocument* pdoc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(jsonb_data.data, 
jsonb_data.size, &pdoc);
+        ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " << 
st.to_string();
         JsonbDocument& doc = *pdoc;
         for (auto it = doc->begin(); it != doc->end(); ++it) {
             serde->read_one_cell_from_jsonb(*vec, it->value());
diff --git a/be/test/vec/olap/jsonb_value_test.cpp 
b/be/test/vec/olap/jsonb_value_test.cpp
index 3111163c0be..d6b5db784e2 100644
--- a/be/test/vec/olap/jsonb_value_test.cpp
+++ b/be/test/vec/olap/jsonb_value_test.cpp
@@ -189,7 +189,8 @@ TEST(JsonbValueConvertorTest, JsonbValueInvalid) {
     auto [status, column] = _olap_data_convertor->convert_column_data(0);
     // invalid will make error
     ASSERT_FALSE(status.ok());
-    ASSERT_TRUE(status.to_string().find("invalid json binary value") != 
std::string::npos);
+    ASSERT_TRUE(status.to_string().find("Invalid JSONB document") != 
std::string::npos)
+            << status.to_string();
     ASSERT_NE(column, nullptr);
 
     // test with null map
@@ -235,7 +236,8 @@ TEST(JsonbValueConvertorTest, JsonbValueInvalid) {
     _olap_data_convertor->set_source_content(&block, 0, 5);
     auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
     ASSERT_FALSE(status.ok());
-    ASSERT_TRUE(status.to_string().find("invalid json binary value") != 
std::string::npos);
+    ASSERT_TRUE(status.to_string().find("Invalid JSONB document") != 
std::string::npos)
+            << status.to_string();
     ASSERT_NE(column, nullptr);
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to