This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0-alpha
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 7d2a96468b251e55fc5264bad5986c4158722a23
Author: lihangyu <15605149...@163.com>
AuthorDate: Fri Apr 21 11:19:00 2023 +0800

    [Fix](dynamic table) fix dynamic table with insert into and column al… 
(#18808)
    
    1. The num_rows should be correctly set
    2. insert into has no dynamic column
---
 be/src/olap/memtable.cpp                           | 11 +++++---
 be/src/vec/columns/column_object.cpp               | 15 ++++++-----
 be/src/vec/columns/column_object.h                 | 30 ++++++++++++++++++++++
 .../suites/dynamic_table_p0/load.groovy            |  4 +--
 4 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index db18782dc3..2b4bb79324 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -393,12 +393,14 @@ void MemTable::unfold_variant_column(vectorized::Block& 
block) {
     if (block.rows() == 0) {
         return;
     }
-    vectorized::ColumnWithTypeAndName variant_column =
-            block.get_by_name(BeConsts::DYNAMIC_COLUMN_NAME);
+    vectorized::ColumnWithTypeAndName* variant_column =
+            block.try_get_by_name(BeConsts::DYNAMIC_COLUMN_NAME);
+    if (!variant_column) {
+        return;
+    }
     // remove it
-    block.erase(BeConsts::DYNAMIC_COLUMN_NAME);
     vectorized::ColumnObject& object_column =
-            
assert_cast<vectorized::ColumnObject&>(variant_column.column->assume_mutable_ref());
+            
assert_cast<vectorized::ColumnObject&>(variant_column->column->assume_mutable_ref());
     // extend
     for (auto& entry : object_column.get_subcolumns()) {
         if (entry->path.get_path() == 
vectorized::ColumnObject::COLUMN_NAME_DUMMY) {
@@ -407,6 +409,7 @@ void MemTable::unfold_variant_column(vectorized::Block& 
block) {
         block.insert({entry->data.get_finalized_column().get_ptr(),
                       entry->data.get_least_common_type(), 
entry->path.get_path()});
     }
+    block.erase(BeConsts::DYNAMIC_COLUMN_NAME);
 }
 
 void MemTable::serialize_block_to_row_column(vectorized::Block& block) {
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 1af9f3f630..44acacd535 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -516,6 +516,12 @@ const ColumnPtr& 
ColumnObject::Subcolumn::get_finalized_column_ptr() const {
     return data[0];
 }
 
+void ColumnObject::Subcolumn::remove_nullable() {
+    assert(is_finalized());
+    data[0] = doris::vectorized::remove_nullable(data[0]);
+    least_common_type.remove_nullable();
+}
+
 ColumnObject::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_)
         : type(std::move(type_)),
           base_type(getBaseTypeOfArray(type)),
@@ -903,7 +909,6 @@ void align_variant_by_name_and_type(ColumnObject& dst, 
const ColumnObject& src,
     // if src and dst is empty, we just increase the num_rows of dst and fill
     // num_rows of default values when meet new data
     size_t num_rows = dst.rows();
-    bool need_inc_row_num = true;
     for (auto& entry : dst.get_subcolumns()) {
         const auto* src_subcol = src.get_subcolumn(entry->path);
         if (src_subcol == nullptr) {
@@ -915,6 +920,7 @@ void align_variant_by_name_and_type(ColumnObject& dst, 
const ColumnObject& src,
             const auto& src_column = src_subcol->get_finalized_column();
             inserter(src_column, &entry->data.get_finalized_column());
         }
+        dst.set_num_rows(entry->data.get_finalized_column().size());
     }
     for (const auto& entry : src.get_subcolumns()) {
         // encounter a new column
@@ -924,15 +930,12 @@ void align_variant_by_name_and_type(ColumnObject& dst, 
const ColumnObject& src,
             auto new_column = type->create_column();
             new_column->insert_many_defaults(num_rows);
             inserter(entry->data.get_finalized_column(), new_column.get());
-            if (dst.empty()) {
-                // add_sub_column updated num_rows of dst object
-                need_inc_row_num = false;
-            }
+            dst.set_num_rows(new_column->size());
             dst.add_sub_column(entry->path, std::move(new_column));
         }
     }
     num_rows += row_cnt;
-    if (need_inc_row_num) {
+    if (dst.empty()) {
         dst.incr_num_rows(row_cnt);
     }
 #ifndef NDEBUG
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index af5cb5cc9a..290ec0ec1e 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -29,6 +29,24 @@
 #include <vec/json/json_parser.h>
 
 #include "common/status.h"
+#include "vec/columns/column.h"
+#include "vec/columns/subcolumn_tree.h"
+#include "vec/common/cow.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+class SipHash;
+
+namespace doris {
+namespace vectorized {
+class Arena;
+} // namespace vectorized
+} // namespace doris
+
 namespace doris::vectorized {
 
 /// Info that represents a scalar or array field in a decomposed view.
@@ -120,6 +138,8 @@ public:
 
         const ColumnPtr& get_finalized_column_ptr() const;
 
+        void remove_nullable();
+
         friend class ColumnObject;
 
     private:
@@ -135,6 +155,8 @@ public:
 
             size_t get_dimensions() const { return num_dimensions; }
 
+            void remove_nullable() { type = 
doris::vectorized::remove_nullable(type); }
+
         private:
             DataTypePtr type;
             DataTypePtr base_type;
@@ -183,6 +205,12 @@ public:
     // return null if not found
     const Subcolumn* get_subcolumn(const PathInData& key) const;
 
+    /** More efficient methods of manipulation */
+    [[noreturn]] IColumn& get_data() { LOG(FATAL) << "Not implemented method 
get_data()"; }
+    [[noreturn]] const IColumn& get_data() const {
+        LOG(FATAL) << "Not implemented method get_data()";
+    }
+
     // return null if not found
     Subcolumn* get_subcolumn(const PathInData& key);
 
@@ -190,6 +218,8 @@ public:
 
     void incr_num_rows(size_t n) { num_rows += n; }
 
+    void set_num_rows(size_t n) { num_rows = n; }
+
     size_t rows() const { return num_rows; }
 
     /// Adds a subcolumn from existing IColumn.
diff --git a/regression-test/suites/dynamic_table_p0/load.groovy 
b/regression-test/suites/dynamic_table_p0/load.groovy
index f4bd89aba6..4837ca3cd1 100644
--- a/regression-test/suites/dynamic_table_p0/load.groovy
+++ b/regression-test/suites/dynamic_table_p0/load.groovy
@@ -123,8 +123,8 @@ suite("regression_test_dynamic_table", "dynamic_table"){
     json_load_unique("btc_transactions.json", "test_btc_json")
     json_load_unique("ghdata_sample.json", "test_ghdata_json")
     json_load_unique("nbagames_sample.json", "test_nbagames_json")
-    // sql """insert into test_ghdata_json_unique select * from 
test_ghdata_json_unique"""
-    // sql """insert into test_btc_json_unique select * from 
test_btc_json_unique"""
+    sql """insert into test_ghdata_json_unique select * from 
test_ghdata_json"""
+    sql """insert into test_btc_json_unique select * from test_btc_json"""
 
     // load more
     table_name = "gharchive";


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to