This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new b62273acb45 [refactor](jsonb) Make the checkAndCreateDocument function return a Status (#51673) b62273acb45 is described below commit b62273acb4504f1f76723e39161500ea66e3d594 Author: Jerry Hu <hushengg...@selectdb.com> AuthorDate: Tue Jun 17 09:40:57 2025 +0800 [refactor](jsonb) Make the checkAndCreateDocument function return a Status (#51673) --- be/src/util/jsonb_document.h | 250 ++++++++++----------- be/src/util/jsonb_utils.h | 24 +- be/src/util/jsonb_writer.h | 11 +- .../exprs/table_function/vexplode_json_array.cpp | 5 +- .../exprs/table_function/vexplode_json_object.cpp | 5 +- be/src/vec/functions/function_cast.h | 12 +- be/src/vec/functions/function_jsonb.cpp | 40 ++-- be/src/vec/jsonb/serialize.cpp | 5 +- be/src/vec/olap/olap_data_convertor.cpp | 16 +- .../vec/data_types/common_data_type_serder_test.h | 5 +- .../vec/data_types/serde/data_type_serde_test.cpp | 8 +- be/test/vec/olap/jsonb_value_test.cpp | 6 +- 12 files changed, 198 insertions(+), 189 deletions(-) diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h index fb87114f886..7f50c4012ba 100644 --- a/be/src/util/jsonb_document.h +++ b/be/src/util/jsonb_document.h @@ -66,10 +66,6 @@ #ifndef JSONB_JSONBDOCUMENT_H #define JSONB_JSONBDOCUMENT_H -#include <assert.h> -#include <stdint.h> -#include <stdlib.h> - #include <algorithm> #include <cctype> #include <charconv> @@ -78,6 +74,7 @@ #include <type_traits> #include "common/compiler_util.h" // IWYU pragma: keep +#include "common/status.h" // #include "util/string_parser.hpp" @@ -172,17 +169,14 @@ constexpr unsigned int ARRAY_CODE = 1; */ class JsonbDocument { public: - // Prepare a document in the buffer - static JsonbDocument* makeDocument(char* pb, uint32_t size, JsonbType type); - static JsonbDocument* makeDocument(char* pb, uint32_t size, const JsonbValue* rval); - // create an JsonbDocument object from JSONB packed bytes - static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size); + [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size, + JsonbDocument** doc); // create an JsonbValue from JSONB packed bytes static JsonbValue* createValue(const char* pb, uint32_t size); - uint8_t version() { return header_.ver_; } + uint8_t version() const { return header_.ver_; } JsonbValue* getValue() { return ((JsonbValue*)payload_); } @@ -194,7 +188,6 @@ public: const ObjectVal* operator->() const { return ((const ObjectVal*)payload_); } -public: bool operator==(const JsonbDocument& other) const { assert(false); return false; @@ -409,11 +402,10 @@ private: template <class Iter_Type, class Cont_Type> class JsonbFwdIteratorT { public: - typedef Iter_Type iterator; - typedef typename std::iterator_traits<Iter_Type>::pointer pointer; - typedef typename std::iterator_traits<Iter_Type>::reference reference; + using iterator = Iter_Type; + using pointer = typename std::iterator_traits<Iter_Type>::pointer; + using reference = typename std::iterator_traits<Iter_Type>::reference; -public: explicit JsonbFwdIteratorT() : current_(nullptr) {} explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} @@ -452,10 +444,10 @@ private: iterator current_; }; -typedef int (*hDictInsert)(const char* key, unsigned len); -typedef int (*hDictFind)(const char* key, unsigned len); +using hDictInsert = int (*)(const char*, unsigned int); +using hDictFind = int (*)(const char*, unsigned int); -typedef std::underlying_type<JsonbType>::type JsonbTypeUnder; +using JsonbTypeUnder = std::underlying_type_t<JsonbType>; /* * JsonbKeyValue class defines JSONB key type, as described below. @@ -484,7 +476,9 @@ class JsonbKeyValue { public: // now we use sMaxKeyId to represent an empty key static const int sMaxKeyId = 65535; - typedef uint16_t keyid_type; + using keyid_type = uint16_t; + + JsonbKeyValue() = delete; static const uint8_t sMaxKeyLen = 64; @@ -512,8 +506,6 @@ private: keyid_type id_; char str_[1]; } key_; - - JsonbKeyValue(); }; /* @@ -620,7 +612,7 @@ private: NumberValT(); }; -typedef NumberValT<int8_t> JsonbInt8Val; +using JsonbInt8Val = NumberValT<int8_t>; // override setVal for Int8Val template <> @@ -633,7 +625,7 @@ inline bool JsonbInt8Val::setVal(int8_t value) { return true; } -typedef NumberValT<int16_t> JsonbInt16Val; +using JsonbInt16Val = NumberValT<int16_t>; // override setVal for Int16Val template <> @@ -645,7 +637,7 @@ inline bool JsonbInt16Val::setVal(int16_t value) { num_ = value; return true; } -typedef NumberValT<int32_t> JsonbInt32Val; +using JsonbInt32Val = NumberValT<int32_t>; // override setVal for Int32Val template <> @@ -658,7 +650,7 @@ inline bool JsonbInt32Val::setVal(int32_t value) { return true; } -typedef NumberValT<int64_t> JsonbInt64Val; +using JsonbInt64Val = NumberValT<int64_t>; // override setVal for Int64Val template <> @@ -671,7 +663,7 @@ inline bool JsonbInt64Val::setVal(int64_t value) { return true; } -typedef NumberValT<int128_t> JsonbInt128Val; +using JsonbInt128Val = NumberValT<int128_t>; // override setVal for Int128Val template <> @@ -684,7 +676,7 @@ inline bool JsonbInt128Val::setVal(int128_t value) { return true; } -typedef NumberValT<double> JsonbDoubleVal; +using JsonbDoubleVal = NumberValT<double>; // override setVal for DoubleVal template <> @@ -697,7 +689,7 @@ inline bool JsonbDoubleVal::setVal(double value) { return true; } -typedef NumberValT<float> JsonbFloatVal; +using JsonbFloatVal = NumberValT<float>; // override setVal for DoubleVal template <> @@ -733,18 +725,21 @@ public: switch (type_) { case JsonbType::T_Int8: if (val < std::numeric_limits<int8_t>::min() || - val > std::numeric_limits<int8_t>::max()) + val > std::numeric_limits<int8_t>::max()) { return false; + } return ((JsonbInt8Val*)this)->setVal((int8_t)val); case JsonbType::T_Int16: if (val < std::numeric_limits<int16_t>::min() || - val > std::numeric_limits<int16_t>::max()) + val > std::numeric_limits<int16_t>::max()) { return false; + } return ((JsonbInt16Val*)this)->setVal((int16_t)val); case JsonbType::T_Int32: if (val < std::numeric_limits<int32_t>::min() || - val > std::numeric_limits<int32_t>::max()) + val > std::numeric_limits<int32_t>::max()) { return false; + } return ((JsonbInt32Val*)this)->setVal((int32_t)val); case JsonbType::T_Int64: return ((JsonbInt64Val*)this)->setVal((int64_t)val); @@ -833,7 +828,9 @@ public: */ size_t length() { // It's an empty string - if (0 == size_) return size_; + if (0 == size_) { + return size_; + } // The string stored takes all the spaces in payload_ if (payload_[size_ - 1] != 0) { return size_; @@ -847,14 +844,15 @@ public: // all other strings: -1 int getBoolVal() { if (size_ == 4 && tolower(payload_[0]) == 't' && tolower(payload_[1]) == 'r' && - tolower(payload_[2]) == 'u' && tolower(payload_[3]) == 'e') + tolower(payload_[2]) == 'u' && tolower(payload_[3]) == 'e') { return 1; - else if (size_ == 5 && tolower(payload_[0]) == 'f' && tolower(payload_[1]) == 'a' && - tolower(payload_[2]) == 'l' && tolower(payload_[3]) == 's' && - tolower(payload_[4]) == 'e') + } else if (size_ == 5 && tolower(payload_[0]) == 'f' && tolower(payload_[1]) == 'a' && + tolower(payload_[2]) == 'l' && tolower(payload_[3]) == 's' && + tolower(payload_[4]) == 'e') { return 0; - else + } else { return -1; + } } private: @@ -889,13 +887,12 @@ protected: */ class ObjectVal : public ContainerVal { public: - typedef JsonbKeyValue value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef JsonbFwdIteratorT<pointer, ObjectVal> iterator; - typedef JsonbFwdIteratorT<const_pointer, ObjectVal> const_iterator; + using value_type = JsonbKeyValue; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = JsonbFwdIteratorT<pointer, ObjectVal>; + using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>; -public: const_iterator search(const char* key, hDictFind handler = nullptr) const { return const_cast<ObjectVal*>(this)->search(key, handler); } @@ -913,7 +910,9 @@ public: } iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) { - if (!key || !klen) return end(); + if (!key || !klen) { + return end(); + } int key_id = -1; if (handler && (key_id = handler(key, klen)) >= 0) { @@ -923,13 +922,15 @@ public: } iterator search(int key_id) { - if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) return end(); + if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) { + return end(); + } const char* pch = payload_; const char* fence = payload_ + size_; while (pch < fence) { - JsonbKeyValue* pkey = (JsonbKeyValue*)(pch); + auto* pkey = (JsonbKeyValue*)(pch); if (!pkey->klen() && key_id == pkey->getKeyId()) { return iterator(pkey); } @@ -947,7 +948,7 @@ public: unsigned int num = 0; while (pch < fence) { - JsonbKeyValue* pkey = (JsonbKeyValue*)(pch); + auto* pkey = (JsonbKeyValue*)(pch); ++num; pch += pkey->numPackedBytes(); } @@ -963,8 +964,10 @@ public: unsigned int num = 0; while (pch < fence) { - JsonbKeyValue* pkey = (JsonbKeyValue*)(pch); - if (num == i) return pkey; + auto* pkey = (JsonbKeyValue*)(pch); + if (num == i) { + return pkey; + } ++num; pch += pkey->numPackedBytes(); } @@ -985,21 +988,27 @@ public: // find the JSONB value by a key string (null terminated) JsonbValue* find(const char* key, hDictFind handler = nullptr) { - if (!key) return nullptr; + if (!key) { + return nullptr; + } return find(key, (unsigned int)strlen(key), handler); } // find the JSONB value by a key string (with length) JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) { iterator kv = search(key, klen, handler); - if (end() == kv) return nullptr; + if (end() == kv) { + return nullptr; + } return kv->value(); } // find the JSONB value by a key dictionary ID JsonbValue* find(int key_id) { iterator kv = search(key_id); - if (end() == kv) return nullptr; + if (end() == kv) { + return nullptr; + } return kv->value(); } @@ -1017,7 +1026,7 @@ private: const char* fence = payload_ + size_; while (pch < fence) { - JsonbKeyValue* pkey = (JsonbKeyValue*)(pch); + auto* pkey = (JsonbKeyValue*)(pch); if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) { return iterator(pkey); } @@ -1038,15 +1047,27 @@ private: */ class ArrayVal : public ContainerVal { public: + using value_type = JsonbValue; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = JsonbFwdIteratorT<pointer, ArrayVal>; + using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>; + // get the JSONB value at index JsonbValue* get(int idx) const { - if (idx < 0) return nullptr; + if (idx < 0) { + return nullptr; + } const char* pch = payload_; const char* fence = payload_ + size_; - while (pch < fence && idx-- > 0) pch += ((JsonbValue*)pch)->numPackedBytes(); - if (idx > 0 || pch == fence) return nullptr; + while (pch < fence && idx-- > 0) { + pch += ((JsonbValue*)pch)->numPackedBytes(); + } + if (idx > 0 || pch == fence) { + return nullptr; + } return (JsonbValue*)pch; } @@ -1067,12 +1088,6 @@ public: return num; } - typedef JsonbValue value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef JsonbFwdIteratorT<pointer, ArrayVal> iterator; - typedef JsonbFwdIteratorT<const_pointer, ArrayVal> const_iterator; - iterator begin() { return iterator((pointer)payload_); } const_iterator begin() const { return const_iterator((pointer)payload_); } @@ -1085,76 +1100,29 @@ private: ArrayVal(); }; -// Prepare an empty document -// input: pb - buuffer/packed bytes for jsonb document -// size - size of the buffer -// type - value type in the document -inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, JsonbType type) { +inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, + JsonbDocument** doc) { + *doc = nullptr; if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { - return nullptr; + return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", + size); } - if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) { - return nullptr; - } - JsonbDocument* doc = (JsonbDocument*)pb; - // Write header - doc->header_.ver_ = JSONB_VER; - JsonbValue* value = doc->getValue(); - // Write type - value->type_ = type; - - // Set empty JsonbValue - if (type == JsonbType::T_Object || type == JsonbType::T_Array) - ((ContainerVal*)value)->size_ = 0; - if (type == JsonbType::T_String || type == JsonbType::T_Binary) - ((JsonbBlobVal*)value)->size_ = 0; - return doc; -} - -// Prepare a document from an JsonbValue -// input: pb - buuffer/packed bytes for jsonb document -// size - size of the buffer -// rval - jsonb value to be copied into the document -inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, const JsonbValue* rval) { - // checking if the buffer is big enough to store the value - if (!pb || !rval || size < sizeof(JsonbHeader) + rval->numPackedBytes()) { - return nullptr; - } - - JsonbType type = rval->type(); - if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) { - return nullptr; + auto* doc_ptr = (JsonbDocument*)pb; + if (doc_ptr->header_.ver_ != JSONB_VER) { + return Status::InvalidArgument("Invalid JSONB document: invalid version({})", + doc_ptr->header_.ver_); } - JsonbDocument* doc = (JsonbDocument*)pb; - // Write header - doc->header_.ver_ = JSONB_VER; - // get the starting byte of the value - JsonbValue* value = doc->getValue(); - // binary copy of the rval - if (value != rval) // copy not necessary if values are the same - memmove(value, rval, rval->numPackedBytes()); - return doc; -} - -inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb, size_t size) { - if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { - return nullptr; - } - - JsonbDocument* doc = (JsonbDocument*)pb; - if (doc->header_.ver_ != JSONB_VER) { - return nullptr; - } - - JsonbValue* val = (JsonbValue*)doc->payload_; + auto* val = (JsonbValue*)doc_ptr->payload_; if (val->type() < JsonbType::T_Null || val->type() >= JsonbType::NUM_TYPES || size != sizeof(JsonbHeader) + val->numPackedBytes()) { - return nullptr; + return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", + static_cast<JsonbTypeUnder>(val->type()), size); } - return doc; + *doc = doc_ptr; + return Status::OK(); } inline void JsonbDocument::setValue(const JsonbValue* value) { memcpy(payload_, value, value->numPackedBytes()); @@ -1165,12 +1133,12 @@ inline JsonbValue* JsonbDocument::createValue(const char* pb, uint32_t size) { return nullptr; } - JsonbDocument* doc = (JsonbDocument*)pb; + auto* doc = (JsonbDocument*)pb; if (doc->header_.ver_ != JSONB_VER) { return nullptr; } - JsonbValue* val = (JsonbValue*)doc->payload_; + auto* val = (JsonbValue*)doc->payload_; if (size != sizeof(JsonbHeader) + val->numPackedBytes()) { return nullptr; } @@ -1184,7 +1152,7 @@ inline unsigned int JsonbDocument::numPackedBytes() const { inline unsigned int JsonbKeyValue::numPackedBytes() const { unsigned int ks = keyPackedBytes(); - JsonbValue* val = (JsonbValue*)(((char*)this) + ks); + auto* val = (JsonbValue*)(((char*)this) + ks); return ks + val->numPackedBytes(); } @@ -1323,8 +1291,8 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const { case JsonbType::T_String: case JsonbType::T_Binary: { if (rhs->isString()) { - auto str_value1 = (JsonbStringVal*)this; - auto str_value2 = (JsonbStringVal*)rhs; + auto* str_value1 = (JsonbStringVal*)this; + auto* str_value2 = (JsonbStringVal*)rhs; return str_value1->length() == str_value2->length() && std::memcmp(str_value1->getBlob(), str_value2->getBlob(), str_value1->length()) == 0; @@ -1335,7 +1303,9 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const { int lhs_num = ((ArrayVal*)this)->numElem(); if (rhs->isArray()) { int rhs_num = ((ArrayVal*)rhs)->numElem(); - if (rhs_num > lhs_num) return false; + if (rhs_num > lhs_num) { + return false; + } int contains_num = 0; for (int i = 0; i < lhs_num; ++i) { for (int j = 0; j < rhs_num; ++j) { @@ -1356,13 +1326,14 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const { } case JsonbType::T_Object: { if (rhs->isObject()) { - auto str_value1 = (ObjectVal*)this; - auto str_value2 = (ObjectVal*)rhs; + auto* str_value1 = (ObjectVal*)this; + auto* str_value2 = (ObjectVal*)rhs; for (int i = 0; i < str_value2->numElem(); ++i) { JsonbKeyValue* key = str_value2->getJsonbKeyValue(i); JsonbValue* value = str_value1->find(key->getKeyStr(), key->klen()); - if (key != nullptr && value != nullptr && !value->contains(key->value())) + if (key != nullptr && value != nullptr && !value->contains(key->value())) { return false; + } } return true; } @@ -1411,7 +1382,9 @@ inline const char* JsonbValue::getValuePtr() const { inline bool JsonbPath::seek(const char* key_path, size_t kp_len) { //path invalid - if (!key_path || kp_len == 0) return false; + if (!key_path || kp_len == 0) { + return false; + } Stream stream(key_path, kp_len); stream.skip_whitespace(); if (stream.exhausted() || stream.read() != SCOPE) { @@ -1447,7 +1420,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path, hDictFind handler) { ->find(path.get_leg_from_leg_vector(i)->leg_ptr, path.get_leg_from_leg_vector(i)->leg_len, handler); - if (!pval) return nullptr; + if (!pval) { + return nullptr; + } continue; } else { return nullptr; @@ -1470,8 +1445,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path, hDictFind handler) { if (pval->type_ != JsonbType::T_Array || path.get_leg_from_leg_vector(i)->leg_ptr != nullptr || - path.get_leg_from_leg_vector(i)->leg_len != 0) + path.get_leg_from_leg_vector(i)->leg_len != 0) { return nullptr; + } if (path.get_leg_from_leg_vector(i)->array_index >= 0) { pval = ((ArrayVal*)pval)->get(path.get_leg_from_leg_vector(i)->array_index); @@ -1481,7 +1457,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path, hDictFind handler) { path.get_leg_from_leg_vector(i)->array_index); } - if (!pval) return nullptr; + if (!pval) { + return nullptr; + } continue; } } diff --git a/be/src/util/jsonb_utils.h b/be/src/util/jsonb_utils.h index c32588e2610..a16d8ba69d2 100644 --- a/be/src/util/jsonb_utils.h +++ b/be/src/util/jsonb_utils.h @@ -21,8 +21,7 @@ #ifndef JSONB_JSONBUTIL_H #define JSONB_JSONBUTIL_H -#include <sstream> - +#include "common/exception.h" #include "jsonb_document.h" #include "jsonb_stream.h" #include "jsonb_writer.h" @@ -39,15 +38,17 @@ public: JsonbToJson() : os_(buffer_, OUT_BUF_SIZE) {} // get json string - const std::string to_json_string(const char* data, size_t size) { - JsonbDocument* pdoc = doris::JsonbDocument::checkAndCreateDocument(data, size); - if (!pdoc) { - LOG(FATAL) << "invalid json binary value: " << std::string_view(data, size); + + std::string to_json_string(const char* data, size_t size) { + JsonbDocument* pdoc; + auto st = doris::JsonbDocument::checkAndCreateDocument(data, size, &pdoc); + if (!st.ok()) { + throw Exception(st); } return to_json_string(pdoc->getValue()); } - const std::string to_json_string(const JsonbValue* val) { + std::string to_json_string(const JsonbValue* val) { os_.clear(); os_.seekp(0); @@ -61,7 +62,7 @@ public: return json_string; } - static const std::string jsonb_to_json_string(const char* data, size_t size) { + static std::string jsonb_to_json_string(const char* data, size_t size) { JsonbToJson jsonb_to_json; return jsonb_to_json.to_json_string(data, size); } @@ -141,9 +142,9 @@ private: } char char_buffer[16]; for (const char* ptr = str; ptr != str + len && *ptr; ++ptr) { - if ((unsigned char)*ptr > 31 && *ptr != '\"' && *ptr != '\\') + if ((unsigned char)*ptr > 31 && *ptr != '\"' && *ptr != '\\') { os_.put(*ptr); - else { + } else { os_.put('\\'); unsigned char token; switch (token = *ptr) { @@ -235,7 +236,6 @@ private: os_.put(']'); } -private: JsonbOutStream os_; char buffer_[OUT_BUF_SIZE]; }; @@ -294,7 +294,7 @@ private: } JsonbWriterT<OS_TYPE> writer_; }; -typedef JsonbValueCreaterT<JsonbOutStream> JsonbValueCreater; +using JsonbValueCreater = JsonbValueCreaterT<JsonbOutStream>; } // namespace doris #endif // JSONB_JSONBUTIL_H diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h index f92d8a4096b..b71b6ef6aa4 100644 --- a/be/src/util/jsonb_writer.h +++ b/be/src/util/jsonb_writer.h @@ -40,6 +40,7 @@ #include <stack> #include <string> +#include "common/exception.h" #include "jsonb_document.h" #include "jsonb_stream.h" @@ -479,8 +480,14 @@ public: OS_TYPE* getOutput() { return os_; } JsonbDocument* getDocument() { - return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(), - getOutput()->getSize()); + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(), + getOutput()->getSize(), &doc); + if (st.ok()) { + return doc; + } else { + throw doris::Exception(st); + } } JsonbValue* getValue() { diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 7594d9a5cc6..0742d497c7b 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -63,8 +63,9 @@ void VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) { StringRef text = _text_column->get_data_at(row_idx); if (text.data != nullptr) { if (WhichDataType(_text_datatype).is_json()) { - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size); - if (doc && doc->getValue() && doc->getValue()->isArray()) { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(text.data, text.size, &doc); + if (st.ok() && doc && doc->getValue() && doc->getValue()->isArray()) { auto* a = (ArrayVal*)doc->getValue(); if (a->numElem() > 0) { _cur_size = _parsed_data.set_output(*a, a->numElem()); diff --git a/be/src/vec/exprs/table_function/vexplode_json_object.cpp b/be/src/vec/exprs/table_function/vexplode_json_object.cpp index 38a00d60b19..aa92d8238ae 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_object.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_object.cpp @@ -54,8 +54,9 @@ void VExplodeJsonObjectTableFunction::process_row(size_t row_idx) { StringRef text = _json_object_column->get_data_at(row_idx); if (text.data != nullptr) { - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size); - if (!doc || !doc->getValue()) [[unlikely]] { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(text.data, text.size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { // error jsonb, put null into output, cur_size = 0 , we will insert_default return; } diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 251a711e64c..99f5f625971 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -718,8 +718,9 @@ struct ConvertImplGenericFromJsonb { const bool is_dst_string = is_string_or_fixed_string(data_type_to); for (size_t i = 0; i < size; ++i) { const auto& val = col_from_string->get_data_at(i); - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size); - if (UNLIKELY(!doc || !doc->getValue())) { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { (*vec_null_map_to)[i] = 1; col_to->insert_default(); continue; @@ -762,7 +763,7 @@ struct ConvertImplGenericFromJsonb { continue; } ReadBuffer read_buffer((char*)(input_str.data()), input_str.size()); - Status st = data_type_to->from_string(read_buffer, col_to); + st = data_type_to->from_string(read_buffer, col_to); // if parsing failed, will return null (*vec_null_map_to)[i] = !st.ok(); if (!st.ok()) { @@ -881,8 +882,9 @@ struct ConvertImplFromJsonb { } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size); - if (UNLIKELY(!doc || !doc->getValue())) { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { null_map[i] = 1; res[i] = 0; continue; diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 08d2c974db0..af4aeed9e5d 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -557,8 +557,9 @@ private: continue; } const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size); - if (UNLIKELY(!doc || !doc->getValue())) { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { dst_arr.clear(); return Status::InvalidArgument("jsonb data is invalid"); } @@ -665,8 +666,9 @@ private: static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str, int l_str_size, JsonbPath& path) { // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size); - if (UNLIKELY(!doc || !doc->getValue())) { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { return; } @@ -760,8 +762,9 @@ private: } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size); - if (UNLIKELY(!doc || !doc->getValue())) { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { StringOP::push_null_string(i, res_data, res_offsets, null_map); return; } @@ -886,10 +889,11 @@ public: writer->writeStartArray(); // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size); + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { - if (UNLIKELY(!doc || !doc->getValue())) { + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { writer->writeNull(); continue; } @@ -1027,8 +1031,9 @@ private: } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size); - if (UNLIKELY(!doc || !doc->getValue())) { + JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { null_map[i] = 1; res[i] = 0; return; @@ -1406,8 +1411,9 @@ struct JsonbLengthUtil { } auto jsonb_value = jsonb_data_column->get_data_at(i); // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = - JsonbDocument::checkAndCreateDocument(jsonb_value.data, jsonb_value.size); + JsonbDocument* doc = nullptr; + RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data, + jsonb_value.size, &doc)); JsonbValue* value = doc->getValue()->findValue(path, nullptr); if (UNLIKELY(!value)) { null_map->get_data()[i] = 1; @@ -1541,10 +1547,12 @@ struct JsonbContainsUtil { continue; } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc1 = - JsonbDocument::checkAndCreateDocument(jsonb_value1.data, jsonb_value1.size); - JsonbDocument* doc2 = - JsonbDocument::checkAndCreateDocument(jsonb_value2.data, jsonb_value2.size); + JsonbDocument* doc1 = nullptr; + RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data, + jsonb_value1.size, &doc1)); + JsonbDocument* doc2 = nullptr; + RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data, + jsonb_value2.size, &doc2)); JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr); JsonbValue* value2 = doc2->getValue(); diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp index d75d332f40c..cd97e3bf5ca 100644 --- a/be/src/vec/jsonb/serialize.cpp +++ b/be/src/vec/jsonb/serialize.cpp @@ -24,6 +24,7 @@ #include <unordered_set> #include <vector> +#include "common/status.h" #include "olap/tablet_schema.h" #include "runtime/descriptors.h" #include "runtime/jsonb_value.h" @@ -91,7 +92,9 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const const std::unordered_map<uint32_t, uint32_t>& col_id_to_idx, Block& dst, const std::vector<std::string>& default_values, const std::unordered_set<int>& include_cids) { - auto pdoc = JsonbDocument::checkAndCreateDocument(data, size); + JsonbDocument* pdoc = nullptr; + THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(data, size, &pdoc)); + JsonbDocument& doc = *pdoc; size_t num_rows = dst.rows(); size_t filled_columns = 0; diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index db441b671e9..79b0866bc77 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -668,10 +668,10 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap( "`string_type_length_soft_limit_bytes` in vec engine."); } // Make sure that the json binary data written in is the correct jsonb value. - if (_is_jsonb && - !doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) { - return Status::InvalidArgument("invalid json binary value: {}", - std::string_view(slice->data, slice->size)); + if (_is_jsonb) { + JsonbDocument* doc = nullptr; + RETURN_IF_ERROR(doris::JsonbDocument::checkAndCreateDocument( + slice->data, slice->size, &doc)); } } else { // TODO: this may not be necessary, check and remove later @@ -695,10 +695,10 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap( " in vec engine."); } // Make sure that the json binary data written in is the correct jsonb value. - if (_is_jsonb && - !doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) { - return Status::InvalidArgument("invalid json binary value: {}", - std::string_view(slice->data, slice->size)); + if (_is_jsonb) { + JsonbDocument* doc = nullptr; + RETURN_IF_ERROR(doris::JsonbDocument::checkAndCreateDocument(slice->data, + slice->size, &doc)); } string_offset = *offset_cur; ++slice; diff --git a/be/test/vec/data_types/common_data_type_serder_test.h b/be/test/vec/data_types/common_data_type_serder_test.h index ef8d07323df..f8c3488e684 100644 --- a/be/test/vec/data_types/common_data_type_serder_test.h +++ b/be/test/vec/data_types/common_data_type_serder_test.h @@ -292,7 +292,10 @@ public: EXPECT_EQ(jsonb_column->size(), load_cols[0]->size()); for (size_t r = 0; r < jsonb_column->size(); ++r) { StringRef jsonb_data = jsonb_column->get_data_at(r); - auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument* pdoc = nullptr; + auto st = + JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size, &pdoc); + ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " << st.to_string(); JsonbDocument& doc = *pdoc; size_t cIdx = 0; for (auto it = doc->begin(); it != doc->end(); ++it) { diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp b/be/test/vec/data_types/serde/data_type_serde_test.cpp index 3c9498f1d6d..24d89953459 100644 --- a/be/test/vec/data_types/serde/data_type_serde_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp @@ -240,7 +240,9 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) { jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), jsonb_writer.getOutput()->getSize()); StringRef jsonb_data = jsonb_column->get_data_at(0); - auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument* pdoc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size, &pdoc); + ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " << st.to_string(); JsonbDocument& doc = *pdoc; for (auto it = doc->begin(); it != doc->end(); ++it) { serde->read_one_cell_from_jsonb(*vec, it->value()); @@ -270,7 +272,9 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) { jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), jsonb_writer.getOutput()->getSize()); StringRef jsonb_data = jsonb_column->get_data_at(0); - auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument* pdoc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size, &pdoc); + ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " << st.to_string(); JsonbDocument& doc = *pdoc; for (auto it = doc->begin(); it != doc->end(); ++it) { serde->read_one_cell_from_jsonb(*vec, it->value()); diff --git a/be/test/vec/olap/jsonb_value_test.cpp b/be/test/vec/olap/jsonb_value_test.cpp index 3111163c0be..d6b5db784e2 100644 --- a/be/test/vec/olap/jsonb_value_test.cpp +++ b/be/test/vec/olap/jsonb_value_test.cpp @@ -189,7 +189,8 @@ TEST(JsonbValueConvertorTest, JsonbValueInvalid) { auto [status, column] = _olap_data_convertor->convert_column_data(0); // invalid will make error ASSERT_FALSE(status.ok()); - ASSERT_TRUE(status.to_string().find("invalid json binary value") != std::string::npos); + ASSERT_TRUE(status.to_string().find("Invalid JSONB document") != std::string::npos) + << status.to_string(); ASSERT_NE(column, nullptr); // test with null map @@ -235,7 +236,8 @@ TEST(JsonbValueConvertorTest, JsonbValueInvalid) { _olap_data_convertor->set_source_content(&block, 0, 5); auto [status1, column1] = _olap_data_convertor->convert_column_data(0); ASSERT_FALSE(status.ok()); - ASSERT_TRUE(status.to_string().find("invalid json binary value") != std::string::npos); + ASSERT_TRUE(status.to_string().find("Invalid JSONB document") != std::string::npos) + << status.to_string(); ASSERT_NE(column, nullptr); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org