xiaokang commented on code in PR #33493:
URL: https://github.com/apache/doris/pull/33493#discussion_r1560364148


##########
be/src/vec/columns/column.h:
##########
@@ -673,6 +673,10 @@ class IColumn : public COW<IColumn> {
 
     virtual void replace_column_null_data(const uint8_t* __restrict null_map) 
{}
 
+    virtual void replace(const Field& f, size_t self_row) {
+        LOG(FATAL) << "Method replace is not supported for " << get_name();

Review Comment:
   It's better to define it as a pure virtual function like replace_column_data 
to enforce impl at compile time and avoid coredump at runtime.



##########
be/src/vec/columns/column_object.h:
##########
@@ -456,13 +460,11 @@ class ColumnObject final : public COWHelper<IColumn, 
ColumnObject> {
         LOG(FATAL) << "should not call the method in column object";
     }
 
-    void replace_column_data(const IColumn&, size_t row, size_t self_row) 
override {
-        LOG(FATAL) << "should not call the method in column object";
-    }
+    bool is_variable_length() const override { return true; }
 
-    void replace_column_data_default(size_t self_row) override {
-        LOG(FATAL) << "should not call the method in column object";
-    }
+    void replace_column_data(const IColumn&, size_t row, size_t self_row) 
override;
+
+    void replace_column_data_default(size_t self_row) override;

Review Comment:
   replace() is not defined



##########
be/src/olap/tablet_schema.h:
##########
@@ -426,6 +429,22 @@ class TabletSchema {
         return str;
     }
 
+    string dump() const {
+        string str = "[";

Review Comment:
   dump_xxx 



##########
be/src/vec/columns/column.h:
##########
@@ -673,6 +673,10 @@ class IColumn : public COW<IColumn> {
 
     virtual void replace_column_null_data(const uint8_t* __restrict null_map) 
{}
 
+    virtual void replace(const Field& f, size_t self_row) {
+        LOG(FATAL) << "Method replace is not supported for " << get_name();

Review Comment:
   If it's not used by external class, define it as protected.



##########
be/src/vec/columns/column_array.cpp:
##########
@@ -412,6 +412,24 @@ void ColumnArray::insert(const Field& x) {
     }
 }
 
+void ColumnArray::replace(const Field& f, size_t self_row) {
+    const auto& array = f.get<const Array&>();
+    if (self_row == 0) {
+        // we should clear data because we call resize() before 
replace_column_data()
+        data->clear();
+    }
+    if (f.is_null()) {
+        get_data().insert(Null());
+        get_offsets().push_back(get_offsets().back() + 1);

Review Comment:
   I think the offset should not +1, just like insert_default() 



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -735,6 +735,7 @@ void ColumnObject::get(size_t n, Field& res) const {
     if (!is_finalized()) {
         const_cast<ColumnObject*>(this)->finalize();
     }
+    res = VariantMap();

Review Comment:
   Why change it? If you replace res with a new VariantMap(), it's more simple 
to call res = operator[](n)



##########
be/src/vec/columns/column_array.cpp:
##########
@@ -412,6 +412,24 @@ void ColumnArray::insert(const Field& x) {
     }
 }
 
+void ColumnArray::replace(const Field& f, size_t self_row) {
+    const auto& array = f.get<const Array&>();
+    if (self_row == 0) {
+        // we should clear data because we call resize() before 
replace_column_data()
+        data->clear();
+    }
+    if (f.is_null()) {
+        get_data().insert(Null());
+        get_offsets().push_back(get_offsets().back() + 1);
+    } else {
+        get_offsets()[self_row] = get_offsets()[self_row - 1] + array.size();
+        // we make sure call replace_column_data() by order so, here we just 
insert for nested
+        for (const auto& i : array) {

Review Comment:
   i -> item



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -1513,4 +1514,78 @@ Status ColumnObject::sanitize() const {
     return Status::OK();
 }
 
+void ColumnObject::Subcolumn::replace(Field field, int row_num) {
+    finalize();
+    Field new_field;
+    if (!field.is_null()) {
+        convert_field_to_type(field, *least_common_type.get(), &new_field);
+        data.back()->replace(new_field, row_num);
+    } else {
+        data.back()->replace(Null(), row_num);
+    }
+}
+
+void ColumnObject::replace_column_data(const IColumn& col, size_t row, size_t 
self_row) {
+    DCHECK(size() > self_row);
+    DCHECK(col.size() > row);
+#ifndef NDEBUG
+    check_consistency();
+#endif
+    Field var_filed = VariantMap();
+    col.get(row, var_filed);
+    const auto& object = var_filed.get<const VariantMap&>();
+    phmap::flat_hash_set<std::string> inserted;
+    size_t old_size = size();
+    for (const auto& [key_str, value] : object) {
+        PathInData key;
+        if (!key_str.empty()) {
+            key = PathInData(key_str);
+        }
+        if (value.is_null()) {
+            // ignore nulls
+            continue;
+        }
+        inserted.insert(key_str);
+        if (!has_subcolumn(key)) {
+            auto most_common_type = std::make_shared<MostCommonType>();
+            auto type = is_nullable ? make_nullable(most_common_type) : 
most_common_type;
+            auto column = type->create_column();
+            column->resize(old_size);
+            bool succ = add_sub_column(key, std::move(column), type);
+            if (!succ) {
+                throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                                       "Failed to add sub column {}", 
key.get_path());
+            }
+        }
+        auto* subcolumn = get_subcolumn(key);
+        if (!subcolumn) {
+            doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                             fmt::format("Failed to find sub column {}", 
key.get_path()));
+        }
+        subcolumn->replace(value, self_row);
+    }
+    for (auto& entry : subcolumns) {
+        if (!inserted.contains(entry->path.get_path())) {
+            entry->data.replace(Null(), self_row);
+        }
+    }
+
+#ifndef NDEBUG
+    check_consistency();
+#endif
+}
+
+void ColumnObject::replace_column_data_default(size_t self_row) {
+#ifndef NDEBUG
+    check_consistency();
+#endif
+    if (!is_finalized()) {
+        finalize();
+    }
+    Field null;
+    for (auto& entry : subcolumns) {
+        entry->data.data[0]->replace(null, self_row);

Review Comment:
   Should the default be null for entire variant column? If true, you should 
clear subcolumns here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to