github-actions[bot] commented on code in PR #24554:
URL: https://github.com/apache/doris/pull/24554#discussion_r1378395778


##########
be/src/vec/columns/column_array.cpp:
##########
@@ -481,6 +481,10 @@ void ColumnArray::insert_range_from(const IColumn& src, 
size_t start, size_t len
     }
 }
 
+double ColumnArray::get_ratio_of_default_rows(double sample_ratio) const {

Review Comment:
   warning: method 'get_ratio_of_default_rows' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   double ColumnArray::get_ratio_of_default_rows(double sample_ratio) {
   ```
   
   be/src/vec/columns/column_array.h:268:
   ```diff
   -     double get_ratio_of_default_rows(double sample_ratio) const override;
   +     static double get_ratio_of_default_rows(double sample_ratio) override;
   ```
   



##########
be/src/olap/rowset/beta_rowset_writer.cpp:
##########
@@ -544,6 +551,24 @@ bool BetaRowsetWriter::_is_segment_overlapping(
     return false;
 }
 
+// update tablet schema when meet variant columns, before commit_txn
+// Eg. rowset schema:       A(int),    B(float),  C(int), D(int)
+// _tabelt->tablet_schema:  A(bigint), B(double)
+//  => update_schema:       A(bigint), B(double), C(int), D(int)
+void BetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr flush_schema) {

Review Comment:
   warning: method 'update_rowset_schema' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/rowset/beta_rowset_writer.h:173:
   ```diff
   -     void update_rowset_schema(TabletSchemaSPtr flush_schema);
   +     static void update_rowset_schema(TabletSchemaSPtr flush_schema);
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -926,10 +1320,85 @@
     num_rows = target_num_rows;
 }
 
+void ColumnObject::create_root() {
+    auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
+                            : std::make_shared<MostCommonType>();
+    add_sub_column({}, type->create_column(), type);
+}
+
+void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& 
column) {
+    if (num_rows == 0) {
+        num_rows = column->size();
+    }
+    add_sub_column({}, std::move(column), type);
+}
+
+bool ColumnObject::is_null_root() const {
+    auto* root = subcolumns.get_root();
+    if (root == nullptr) {
+        return true;
+    }
+    if (root->data.num_of_defaults_in_prefix == 0 &&
+        (root->data.data.empty() || 
is_nothing(root->data.get_least_common_type()))) {
+        return true;
+    }
+    return false;
+}
+
+bool ColumnObject::is_scalar_variant() const {
+    // Only root itself
+    return !is_null_root() && subcolumns.get_leaves().size() == 1;
+}
+
+DataTypePtr ColumnObject::get_root_type() const {
+    return subcolumns.get_root()->data.get_least_common_type();
+}
+
+#define SANITIZE_ROOT()                                                        
                    \
+    if (is_null_root()) {                                                      
                    \
+        return Status::InternalError("No root column, path {}", 
path.get_path());                  \
+    }                                                                          
                    \
+    if 
(!WhichDataType(remove_nullable(subcolumns.get_root()->data.get_least_common_type()))
       \
+                 .is_json()) {                                                 
                    \
+        return Status::InternalError(                                          
                    \
+                "Root column is not jsonb type but {}, path {}",               
                    \
+                
subcolumns.get_root()->data.get_least_common_type()->get_name(), 
path.get_path()); \
+    }
+
+Status ColumnObject::extract_root(const PathInData& path) {

Review Comment:
   warning: method 'extract_root' can be made const 
[readability-make-member-function-const]
   
   be/src/vec/columns/column_object.h:453:
   ```diff
   -     Status extract_root(const PathInData& path);
   +     Status extract_root(const PathInData& path) const;
   ```
   
   ```suggestion
   Status ColumnObject::extract_root(const PathInData& path) const {
   ```
   



##########
be/src/vec/columns/column_object.h:
##########
@@ -188,23 +215,68 @@
     const bool is_nullable;
     Subcolumns subcolumns;
     size_t num_rows;
+    // sparse columns will be merge and encoded into root column
+    Subcolumns sparse_columns;
+    // The rapidjson document format of Subcolumns tree structure
+    // the leaves is null.In order to display whole document, copy
+    // this structure and fill with Subcolumns sub items
+    mutable std::shared_ptr<rapidjson::Document> doc_structure;
 
 public:
     static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
 
-    explicit ColumnObject(bool is_nullable_);
+    explicit ColumnObject(bool is_nullable_, bool create_root = true);
 
     ColumnObject(Subcolumns&& subcolumns_, bool is_nullable_);
 
     ~ColumnObject() override = default;
 
-    bool can_be_inside_nullable() const override { return true; }
+    bool can_be_inside_nullable() const override { return false; }
 
     /// Checks that all subcolumns have consistent sizes.
     void check_consistency() const;
 
+    MutableColumnPtr get_root() {
+        if (subcolumns.empty() || 
is_nothing(subcolumns.get_root()->data.get_least_common_type())) {
+            return nullptr;
+        }
+        return 
subcolumns.get_mutable_root()->data.get_finalized_column_ptr()->assume_mutable();
+    }
+
+    bool serialize_one_row_to_string(int row, std::string* output) const;
+
+    bool serialize_one_row_to_string(int row, BufferWritable& output) const;
+
+    // serialize one row to json format
+    bool serialize_one_row_to_json_format(int row, rapidjson::StringBuffer* 
output,
+                                          bool* is_null) const;
+
+    // merge multiple sub sparse columns into root
+    void merge_sparse_to_root_column();
+
+    // ensure root node is a certain type
+    void ensure_root_node_type(const DataTypePtr& type);
+
+    // create jsonb root if missing
+    void create_root();
+
+    // create root with type and column if missing
+    void create_root(const DataTypePtr& type, MutableColumnPtr&& column);
+
+    // root is null or type nothing
+    bool is_null_root() const;
+
+    // Only single scalar root column
+    bool is_scalar_variant() const;
+
+    ColumnPtr get_root() const { return 
subcolumns.get_root()->data.get_finalized_column_ptr(); }
+
     bool has_subcolumn(const PathInData& key) const;
 
+    DataTypePtr get_root_type() const;
+
+    bool is_variant() const override { return true; }

Review Comment:
   warning: method 'is_variant' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static bool is_variant() override { return true; }
   ```
   



##########
be/src/vec/data_types/data_type_object.h:
##########
@@ -60,27 +61,33 @@ class DataTypeObject : public IDataType {
     TPrimitiveType::type get_type_as_tprimitive_type() const override {
         return TPrimitiveType::VARIANT;
     }
+    doris::FieldType get_type_as_field_type() const override {
+        return doris::FieldType::OLAP_FIELD_TYPE_VARIANT;
+    }
     MutableColumnPtr create_column() const override { return 
ColumnObject::create(is_nullable); }
     bool is_object() const override { return true; }
     bool equals(const IDataType& rhs) const override;
     bool hasNullableSubcolumns() const { return is_nullable; }
     bool get_is_parametric() const override { return true; }
-    bool can_be_inside_nullable() const override { return true; }
     bool have_subtypes() const override { return true; };
+    bool can_be_inside_nullable() const override { return false; }
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
                                               int be_exec_version) const 
override;
-    std::string to_string(const IColumn& column, size_t row_num) const 
override {
-        const auto& column_object = assert_cast<const ColumnObject&>(column);
-        return "Variant: " + column_object.get_keys_str();
-    }
+    std::string to_string(const IColumn& column, size_t row_num) const 
override;
+    void to_string(const IColumn& column, size_t row_num, BufferWritable& 
ostr) const override;
     char* serialize(const IColumn& column, char* buf, int be_exec_version) 
const override;
     const char* deserialize(const char* buf, IColumn* column, int 
be_exec_version) const override;
-    [[noreturn]] Field get_default() const override {
-        LOG(FATAL) << "Method getDefault() is not implemented for data type " 
<< get_name();
-    }
+    Field get_default() const override { return VariantMap(); }
 
-    [[noreturn]] Field get_field(const TExprNode& node) const override {
-        LOG(FATAL) << "Unimplemented get_field for object";
+    Field get_field(const TExprNode& node) const override {

Review Comment:
   warning: method 'get_field' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static Field get_field(const TExprNode& node) override {
   ```
   



##########
be/src/vec/columns/column_string.h:
##########
@@ -589,6 +604,10 @@ class ColumnString final : public COWHelper<IColumn, 
ColumnString> {
     }
 
     ColumnPtr index(const IColumn& indexes, size_t limit) const override;
+
+    double get_ratio_of_default_rows(double sample_ratio) const override {

Review Comment:
   warning: method 'get_ratio_of_default_rows' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static double get_ratio_of_default_rows(double sample_ratio) override {
   ```
   



##########
be/src/vec/data_types/data_type_object.h:
##########
@@ -60,27 +61,33 @@
     TPrimitiveType::type get_type_as_tprimitive_type() const override {
         return TPrimitiveType::VARIANT;
     }
+    doris::FieldType get_type_as_field_type() const override {
+        return doris::FieldType::OLAP_FIELD_TYPE_VARIANT;
+    }
     MutableColumnPtr create_column() const override { return 
ColumnObject::create(is_nullable); }
     bool is_object() const override { return true; }
     bool equals(const IDataType& rhs) const override;
     bool hasNullableSubcolumns() const { return is_nullable; }
     bool get_is_parametric() const override { return true; }
-    bool can_be_inside_nullable() const override { return true; }
     bool have_subtypes() const override { return true; };
+    bool can_be_inside_nullable() const override { return false; }
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
                                               int be_exec_version) const 
override;
-    std::string to_string(const IColumn& column, size_t row_num) const 
override {
-        const auto& column_object = assert_cast<const ColumnObject&>(column);
-        return "Variant: " + column_object.get_keys_str();
-    }
+    std::string to_string(const IColumn& column, size_t row_num) const 
override;
+    void to_string(const IColumn& column, size_t row_num, BufferWritable& 
ostr) const override;
     char* serialize(const IColumn& column, char* buf, int be_exec_version) 
const override;
     const char* deserialize(const char* buf, IColumn* column, int 
be_exec_version) const override;
-    [[noreturn]] Field get_default() const override {
-        LOG(FATAL) << "Method getDefault() is not implemented for data type " 
<< get_name();
-    }
+    Field get_default() const override { return VariantMap(); }
 
-    [[noreturn]] Field get_field(const TExprNode& node) const override {
-        LOG(FATAL) << "Unimplemented get_field for object";
+    Field get_field(const TExprNode& node) const override {
+        if (node.__isset.string_literal) {
+            return node.string_literal.value;
+        }
+        if (node.node_type == TExprNodeType::NULL_LITERAL) {
+            return Field();

Review Comment:
   warning: avoid repeating the return type from the declaration; use a braced 
initializer list instead [modernize-return-braced-init-list]
   
   ```suggestion
               return {};
   ```
   



##########
be/src/vec/data_types/data_type_time_v2.h:
##########
@@ -118,6 +121,9 @@
     TPrimitiveType::type get_type_as_tprimitive_type() const override {
         return TPrimitiveType::DATETIMEV2;
     }
+    doris::FieldType get_type_as_field_type() const override {

Review Comment:
   warning: method 'get_type_as_field_type' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static doris::FieldType get_type_as_field_type() override {
   ```
   



##########
be/src/vec/data_types/serde/data_type_array_serde.cpp:
##########
@@ -225,6 +225,35 @@ void DataTypeArraySerDe::write_one_cell_to_jsonb(const 
IColumn& column, JsonbWri
     result.writeEndBinary();
 }
 
+void DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,

Review Comment:
   warning: method 'write_one_cell_to_json' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static void DataTypeArraySerDe::write_one_cell_to_json(const IColumn& 
column, rapidjson::Value& result,
   ```
   
   be/src/vec/data_types/serde/data_type_array_serde.cpp:229:
   ```diff
   -                                                 int row_num) const {
   +                                                 int row_num) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_array_serde.cpp:
##########
@@ -225,6 +225,35 @@
     result.writeEndBinary();
 }
 
+void DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,
+                                                
rapidjson::Document::AllocatorType& allocator,
+                                                int row_num) const {
+    // vectorized::Field array = column[row_num];
+    // Use allocator instead of stack memory, since rapidjson hold the 
reference of String value
+    // otherwise causes stack use after free
+    auto& column_array = static_cast<const ColumnArray&>(column);
+    void* mem = allocator.Malloc(sizeof(vectorized::Field));
+    vectorized::Field* array = new (mem) 
vectorized::Field(column_array[row_num]);
+
+    convert_field_to_rapidjson(*array, result, allocator);
+}
+
+void DataTypeArraySerDe::read_one_cell_from_json(IColumn& column,
+                                                 const rapidjson::Value& 
result) const {

Review Comment:
   warning: method 'read_one_cell_from_json' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static void DataTypeArraySerDe::read_one_cell_from_json(IColumn& column,
                                                    const rapidjson::Value& 
result) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_jsonb_serde.cpp:
##########
@@ -122,5 +129,107 @@
     return Status::NotSupported("write_column_to_orc with type [{}]", 
column.get_name());
 }
 
+static void convert_jsonb_to_rapidjson(const JsonbValue& val, 
rapidjson::Value& target,
+                                       rapidjson::Document::AllocatorType& 
allocator) {
+    // convert type of jsonb to rapidjson::Value
+    switch (val.type()) {
+    case JsonbType::T_True:
+        target.SetBool(true);
+        break;
+    case JsonbType::T_False:
+        target.SetBool(false);
+        break;
+    case JsonbType::T_Null:
+        target.SetNull();
+        break;
+    case JsonbType::T_Float:
+        target.SetFloat(static_cast<const JsonbFloatVal&>(val).val());
+        break;
+    case JsonbType::T_Double:
+        target.SetDouble(static_cast<const JsonbDoubleVal&>(val).val());
+        break;
+    case JsonbType::T_Int64:
+        target.SetInt64(static_cast<const JsonbInt64Val&>(val).val());
+        break;
+    case JsonbType::T_Int32:
+        target.SetInt(static_cast<const JsonbInt32Val&>(val).val());
+        break;
+    case JsonbType::T_Int16:
+        target.SetInt(static_cast<const JsonbInt16Val&>(val).val());
+        break;
+    case JsonbType::T_Int8:
+        target.SetInt(static_cast<const JsonbInt8Val&>(val).val());
+        break;
+    case JsonbType::T_String:
+        target.SetString(static_cast<const JsonbStringVal&>(val).getBlob(),
+                         static_cast<const JsonbStringVal&>(val).getBlobLen());
+        break;
+    case JsonbType::T_Array: {
+        target.SetArray();
+        const ArrayVal& array = static_cast<const ArrayVal&>(val);
+        if (array.numElem() == 0) {
+            target.SetNull();
+            break;
+        }
+        target.Reserve(array.numElem(), allocator);
+        for (auto it = array.begin(); it != array.end(); ++it) {
+            rapidjson::Value val;
+            convert_jsonb_to_rapidjson(*static_cast<const JsonbValue*>(it), 
val, allocator);
+            target.PushBack(val, allocator);
+        }
+        break;
+    }
+    case JsonbType::T_Object: {
+        target.SetObject();
+        const ObjectVal& obj = static_cast<const ObjectVal&>(val);

Review Comment:
   warning: use auto when initializing with a cast to avoid duplicating the 
type name [modernize-use-auto]
   
   ```suggestion
           const auto& obj = static_cast<const ObjectVal&>(val);
   ```
   



##########
be/src/vec/data_types/data_type_date_time.h:
##########
@@ -80,6 +80,10 @@ class DataTypeDateTime final : public 
DataTypeNumberBase<Int64> {
         return TPrimitiveType::DATETIME;
     }
 
+    doris::FieldType get_type_as_field_type() const override {

Review Comment:
   warning: method 'get_type_as_field_type' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static doris::FieldType get_type_as_field_type() override {
   ```
   



##########
be/src/vec/data_types/data_type_time_v2.h:
##########
@@ -64,6 +64,9 @@ class DataTypeDateV2 final : public 
DataTypeNumberBase<UInt32> {
     TPrimitiveType::type get_type_as_tprimitive_type() const override {
         return TPrimitiveType::DATEV2;
     }
+    doris::FieldType get_type_as_field_type() const override {

Review Comment:
   warning: method 'get_type_as_field_type' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static doris::FieldType get_type_as_field_type() override {
   ```
   



##########
be/src/vec/data_types/data_type_time.h:
##########
@@ -59,6 +59,9 @@ class DataTypeTime final : public DataTypeNumberBase<Float64> 
{
     TPrimitiveType::type get_type_as_tprimitive_type() const override {
         return TPrimitiveType::TIME;
     }
+    doris::FieldType get_type_as_field_type() const override {

Review Comment:
   warning: method 'get_type_as_field_type' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static doris::FieldType get_type_as_field_type() override {
   ```
   



##########
be/src/vec/columns/column_string.cpp:
##########
@@ -207,6 +207,19 @@ size_t ColumnString::filter(const Filter& filter) {
     return filter_arrays_impl<UInt8, Offset>(chars, offsets, filter);
 }
 
+Status ColumnString::filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) {

Review Comment:
   warning: method 'filter_by_selector' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/vec/columns/column_string.h:493:
   ```diff
   -     Status filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) override;
   +     static Status filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) override;
   ```
   



##########
be/src/vec/core/field.h:
##########
@@ -708,6 +674,41 @@ class Field {
         }
     }
 
+private:
+    std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which), Null, 
UInt64, UInt128, Int64,
+                         Int128, Float64, String, JsonbField, Array, Tuple, 
Map, VariantMap,
+                         DecimalField<Decimal32>, DecimalField<Decimal64>, 
DecimalField<Decimal128>,
+                         DecimalField<Decimal128I>, DecimalField<Decimal256>, 
BitmapValue,
+                         HyperLogLog, QuantileState>
+            storage;
+
+    Types::Which which;
+
+    /// Assuming there was no allocated state or it was deallocated (see 
destroy).
+    template <typename T>
+    void create_concrete(T&& x) {
+        using UnqualifiedType = std::decay_t<T>;
+
+        // In both Field and PODArray, small types may be stored as wider 
types,
+        // e.g. char is stored as UInt64. Field can return this extended value
+        // with get<StorageType>(). To avoid uninitialized results from get(),
+        // we must initialize the entire wide stored type, and not just the
+        // nominal type.
+        using StorageType = NearestFieldType<UnqualifiedType>;
+        new (&storage) StorageType(std::forward<T>(x));
+        which = TypeToEnum<UnqualifiedType>::value;
+    }
+
+    /// Assuming same types.
+    template <typename T>
+    void assign_concrete(T&& x) {
+        using JustT = std::decay_t<T>;
+        assert(which == TypeToEnum<JustT>::value);
+        JustT* MAY_ALIAS ptr = reinterpret_cast<JustT*>(&storage);
+        *ptr = std::forward<T>(x);
+    }
+
+private:

Review Comment:
   warning: redundant access specifier has the same accessibility as the 
previous access specifier [readability-redundant-access-specifiers]
   
   ```suggestion
   
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/vec/core/field.h:676:** previously declared here
   ```cpp
   private:
   ^
   ```
   
   </details>
   



##########
be/src/vec/columns/column_set.h:
##########
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnSet.h
+// and modified by Doris
+
+#pragma once
+
+#include "exprs/hybrid_set.h"
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+using ConstSetPtr = std::shared_ptr<HybridSetBase>;
+
+/** A column containing multiple values in the `IN` section.
+  * Behaves like a constant-column (because the set is one, not its own for 
each line).
+  * This column has a nonstandard value, so it can not be obtained via a 
normal interface.
+  */
+class ColumnSet final : public COWHelper<IColumnDummy, ColumnSet> {
+public:
+    friend class COWHelper<IColumnDummy, ColumnSet>;
+
+    ColumnSet(size_t s_, const ConstSetPtr& data_) : data(data_) { s = s_; }
+    ColumnSet(const ColumnSet&) = default;
+
+    const char* get_family_name() const override { return "Set"; }

Review Comment:
   warning: method 'get_family_name' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static const char* get_family_name() override { return "Set"; }
   ```
   



##########
be/src/vec/data_types/serde/data_type_jsonb_serde.cpp:
##########
@@ -122,5 +129,107 @@
     return Status::NotSupported("write_column_to_orc with type [{}]", 
column.get_name());
 }
 
+static void convert_jsonb_to_rapidjson(const JsonbValue& val, 
rapidjson::Value& target,
+                                       rapidjson::Document::AllocatorType& 
allocator) {
+    // convert type of jsonb to rapidjson::Value
+    switch (val.type()) {
+    case JsonbType::T_True:
+        target.SetBool(true);
+        break;
+    case JsonbType::T_False:
+        target.SetBool(false);
+        break;
+    case JsonbType::T_Null:
+        target.SetNull();
+        break;
+    case JsonbType::T_Float:
+        target.SetFloat(static_cast<const JsonbFloatVal&>(val).val());
+        break;
+    case JsonbType::T_Double:
+        target.SetDouble(static_cast<const JsonbDoubleVal&>(val).val());
+        break;
+    case JsonbType::T_Int64:
+        target.SetInt64(static_cast<const JsonbInt64Val&>(val).val());
+        break;
+    case JsonbType::T_Int32:
+        target.SetInt(static_cast<const JsonbInt32Val&>(val).val());
+        break;
+    case JsonbType::T_Int16:
+        target.SetInt(static_cast<const JsonbInt16Val&>(val).val());
+        break;
+    case JsonbType::T_Int8:
+        target.SetInt(static_cast<const JsonbInt8Val&>(val).val());
+        break;
+    case JsonbType::T_String:
+        target.SetString(static_cast<const JsonbStringVal&>(val).getBlob(),
+                         static_cast<const JsonbStringVal&>(val).getBlobLen());
+        break;
+    case JsonbType::T_Array: {
+        target.SetArray();
+        const ArrayVal& array = static_cast<const ArrayVal&>(val);
+        if (array.numElem() == 0) {
+            target.SetNull();
+            break;
+        }
+        target.Reserve(array.numElem(), allocator);
+        for (auto it = array.begin(); it != array.end(); ++it) {
+            rapidjson::Value val;
+            convert_jsonb_to_rapidjson(*static_cast<const JsonbValue*>(it), 
val, allocator);
+            target.PushBack(val, allocator);
+        }
+        break;
+    }
+    case JsonbType::T_Object: {
+        target.SetObject();
+        const ObjectVal& obj = static_cast<const ObjectVal&>(val);
+        for (auto it = obj.begin(); it != obj.end(); ++it) {
+            rapidjson::Value val;
+            convert_jsonb_to_rapidjson(*it->value(), val, allocator);
+            target.AddMember(rapidjson::GenericStringRef(it->getKeyStr(), 
it->klen()), val,
+                             allocator);
+        }
+        break;
+    }
+    default:
+        CHECK(false) << "unkown type " << static_cast<int>(val.type());
+        break;
+    }
+}
+
+void DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,

Review Comment:
   warning: method 'write_one_cell_to_json' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static void DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& 
column, rapidjson::Value& result,
   ```
   
   be/src/vec/data_types/serde/data_type_jsonb_serde.cpp:200:
   ```diff
   -                                                 int row_num) const {
   +                                                 int row_num) {
   ```
   



##########
be/src/vec/data_types/data_type_date.h:
##########
@@ -53,6 +53,10 @@ class DataTypeDate final : public DataTypeNumberBase<Int64> {
     TPrimitiveType::type get_type_as_tprimitive_type() const override {
         return TPrimitiveType::DATE;
     }
+
+    doris::FieldType get_type_as_field_type() const override {

Review Comment:
   warning: method 'get_type_as_field_type' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static doris::FieldType get_type_as_field_type() override {
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -926,10 +1320,85 @@ void ColumnObject::revise_to(int target_num_rows) {
     num_rows = target_num_rows;
 }
 
+void ColumnObject::create_root() {
+    auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
+                            : std::make_shared<MostCommonType>();
+    add_sub_column({}, type->create_column(), type);
+}
+
+void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& 
column) {
+    if (num_rows == 0) {
+        num_rows = column->size();
+    }
+    add_sub_column({}, std::move(column), type);
+}
+
+bool ColumnObject::is_null_root() const {

Review Comment:
   warning: method 'is_null_root' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/vec/columns/column_object.h:266:
   ```diff
   -     bool is_null_root() const;
   +     static bool is_null_root() ;
   ```
   
   ```suggestion
   bool ColumnObject::is_null_root() {
   ```
   



##########
be/src/vec/columns/column_object.h:
##########
@@ -188,23 +215,68 @@ class ColumnObject final : public COWHelper<IColumn, 
ColumnObject> {
     const bool is_nullable;
     Subcolumns subcolumns;
     size_t num_rows;
+    // sparse columns will be merge and encoded into root column
+    Subcolumns sparse_columns;
+    // The rapidjson document format of Subcolumns tree structure
+    // the leaves is null.In order to display whole document, copy
+    // this structure and fill with Subcolumns sub items
+    mutable std::shared_ptr<rapidjson::Document> doc_structure;
 
 public:
     static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
 
-    explicit ColumnObject(bool is_nullable_);
+    explicit ColumnObject(bool is_nullable_, bool create_root = true);
 
     ColumnObject(Subcolumns&& subcolumns_, bool is_nullable_);
 
     ~ColumnObject() override = default;
 
-    bool can_be_inside_nullable() const override { return true; }
+    bool can_be_inside_nullable() const override { return false; }

Review Comment:
   warning: method 'can_be_inside_nullable' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static bool can_be_inside_nullable() override { return false; }
   ```
   



##########
be/src/vec/data_types/serde/data_type_jsonb_serde.cpp:
##########
@@ -122,5 +129,107 @@ Status DataTypeJsonbSerDe::write_column_to_orc(const 
std::string& timezone, cons
     return Status::NotSupported("write_column_to_orc with type [{}]", 
column.get_name());
 }
 
+static void convert_jsonb_to_rapidjson(const JsonbValue& val, 
rapidjson::Value& target,
+                                       rapidjson::Document::AllocatorType& 
allocator) {
+    // convert type of jsonb to rapidjson::Value
+    switch (val.type()) {
+    case JsonbType::T_True:
+        target.SetBool(true);
+        break;
+    case JsonbType::T_False:
+        target.SetBool(false);
+        break;
+    case JsonbType::T_Null:
+        target.SetNull();
+        break;
+    case JsonbType::T_Float:
+        target.SetFloat(static_cast<const JsonbFloatVal&>(val).val());
+        break;
+    case JsonbType::T_Double:
+        target.SetDouble(static_cast<const JsonbDoubleVal&>(val).val());
+        break;
+    case JsonbType::T_Int64:
+        target.SetInt64(static_cast<const JsonbInt64Val&>(val).val());
+        break;
+    case JsonbType::T_Int32:
+        target.SetInt(static_cast<const JsonbInt32Val&>(val).val());
+        break;
+    case JsonbType::T_Int16:
+        target.SetInt(static_cast<const JsonbInt16Val&>(val).val());
+        break;
+    case JsonbType::T_Int8:
+        target.SetInt(static_cast<const JsonbInt8Val&>(val).val());
+        break;
+    case JsonbType::T_String:
+        target.SetString(static_cast<const JsonbStringVal&>(val).getBlob(),
+                         static_cast<const JsonbStringVal&>(val).getBlobLen());
+        break;
+    case JsonbType::T_Array: {
+        target.SetArray();
+        const ArrayVal& array = static_cast<const ArrayVal&>(val);

Review Comment:
   warning: use auto when initializing with a cast to avoid duplicating the 
type name [modernize-use-auto]
   
   ```suggestion
           const auto& array = static_cast<const ArrayVal&>(val);
   ```
   



##########
be/src/vec/data_types/serde/data_type_array_serde.cpp:
##########
@@ -225,6 +225,35 @@
     result.writeEndBinary();
 }
 
+void DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,
+                                                
rapidjson::Document::AllocatorType& allocator,
+                                                int row_num) const {
+    // vectorized::Field array = column[row_num];
+    // Use allocator instead of stack memory, since rapidjson hold the 
reference of String value
+    // otherwise causes stack use after free
+    auto& column_array = static_cast<const ColumnArray&>(column);
+    void* mem = allocator.Malloc(sizeof(vectorized::Field));
+    vectorized::Field* array = new (mem) 
vectorized::Field(column_array[row_num]);

Review Comment:
   warning: use auto when initializing with new to avoid duplicating the type 
name [modernize-use-auto]
   
   ```suggestion
       auto* array = new (mem) vectorized::Field(column_array[row_num]);
   ```
   



##########
be/src/vec/data_types/serde/data_type_jsonb_serde.cpp:
##########
@@ -122,5 +129,107 @@
     return Status::NotSupported("write_column_to_orc with type [{}]", 
column.get_name());
 }
 
+static void convert_jsonb_to_rapidjson(const JsonbValue& val, 
rapidjson::Value& target,
+                                       rapidjson::Document::AllocatorType& 
allocator) {
+    // convert type of jsonb to rapidjson::Value
+    switch (val.type()) {
+    case JsonbType::T_True:
+        target.SetBool(true);
+        break;
+    case JsonbType::T_False:
+        target.SetBool(false);
+        break;
+    case JsonbType::T_Null:
+        target.SetNull();
+        break;
+    case JsonbType::T_Float:
+        target.SetFloat(static_cast<const JsonbFloatVal&>(val).val());
+        break;
+    case JsonbType::T_Double:
+        target.SetDouble(static_cast<const JsonbDoubleVal&>(val).val());
+        break;
+    case JsonbType::T_Int64:
+        target.SetInt64(static_cast<const JsonbInt64Val&>(val).val());
+        break;
+    case JsonbType::T_Int32:
+        target.SetInt(static_cast<const JsonbInt32Val&>(val).val());
+        break;
+    case JsonbType::T_Int16:
+        target.SetInt(static_cast<const JsonbInt16Val&>(val).val());
+        break;
+    case JsonbType::T_Int8:
+        target.SetInt(static_cast<const JsonbInt8Val&>(val).val());
+        break;
+    case JsonbType::T_String:
+        target.SetString(static_cast<const JsonbStringVal&>(val).getBlob(),
+                         static_cast<const JsonbStringVal&>(val).getBlobLen());
+        break;
+    case JsonbType::T_Array: {
+        target.SetArray();
+        const ArrayVal& array = static_cast<const ArrayVal&>(val);
+        if (array.numElem() == 0) {
+            target.SetNull();
+            break;
+        }
+        target.Reserve(array.numElem(), allocator);
+        for (auto it = array.begin(); it != array.end(); ++it) {
+            rapidjson::Value val;
+            convert_jsonb_to_rapidjson(*static_cast<const JsonbValue*>(it), 
val, allocator);
+            target.PushBack(val, allocator);
+        }
+        break;
+    }
+    case JsonbType::T_Object: {
+        target.SetObject();
+        const ObjectVal& obj = static_cast<const ObjectVal&>(val);
+        for (auto it = obj.begin(); it != obj.end(); ++it) {
+            rapidjson::Value val;
+            convert_jsonb_to_rapidjson(*it->value(), val, allocator);
+            target.AddMember(rapidjson::GenericStringRef(it->getKeyStr(), 
it->klen()), val,
+                             allocator);
+        }
+        break;
+    }
+    default:
+        CHECK(false) << "unkown type " << static_cast<int>(val.type());
+        break;
+    }
+}
+
+void DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,
+                                                
rapidjson::Document::AllocatorType& allocator,
+                                                int row_num) const {
+    auto& data = assert_cast<const ColumnString&>(column);
+    const auto jsonb_val = data.get_data_at(row_num);
+    if (jsonb_val.empty()) {
+        result.SetNull();
+    }
+    JsonbValue* val = JsonbDocument::createValue(jsonb_val.data, 
jsonb_val.size);
+    if (val == nullptr) {
+        throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Failed to get json 
document from jsonb");
+    }
+    rapidjson::Value value;
+    convert_jsonb_to_rapidjson(*val, value, allocator);
+    if (val->isObject() && result.IsObject()) {
+        JsonFunctions::merge_objects(result, value, allocator);
+    } else {
+        result = std::move(value);
+    }
+}
+
+void DataTypeJsonbSerDe::read_one_cell_from_json(IColumn& column,
+                                                 const rapidjson::Value& 
result) const {

Review Comment:
   warning: method 'read_one_cell_from_json' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static void DataTypeJsonbSerDe::read_one_cell_from_json(IColumn& column,
                                                    const rapidjson::Value& 
result) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_nullable_serde.cpp:
##########
@@ -342,5 +342,30 @@ Status DataTypeNullableSerDe::write_column_to_orc(const 
std::string& timezone,
 const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N";
 const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_NESTED_TYPE = "null";
 
+void DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,

Review Comment:
   warning: method 'write_one_cell_to_json' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static 
   ```
   
   be/src/vec/data_types/serde/data_type_nullable_serde.cpp:346:
   ```diff
   - ,
   - {
   + ,
   + {
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to