This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.0-alpha in repository https://gitbox.apache.org/repos/asf/doris.git
commit 7d2a96468b251e55fc5264bad5986c4158722a23 Author: lihangyu <15605149...@163.com> AuthorDate: Fri Apr 21 11:19:00 2023 +0800 [Fix](dynamic table) fix dynamic table with insert into and column al… (#18808) 1. The num_rows should be correctly set 2. insert into has no dynamic column --- be/src/olap/memtable.cpp | 11 +++++--- be/src/vec/columns/column_object.cpp | 15 ++++++----- be/src/vec/columns/column_object.h | 30 ++++++++++++++++++++++ .../suites/dynamic_table_p0/load.groovy | 4 +-- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index db18782dc3..2b4bb79324 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -393,12 +393,14 @@ void MemTable::unfold_variant_column(vectorized::Block& block) { if (block.rows() == 0) { return; } - vectorized::ColumnWithTypeAndName variant_column = - block.get_by_name(BeConsts::DYNAMIC_COLUMN_NAME); + vectorized::ColumnWithTypeAndName* variant_column = + block.try_get_by_name(BeConsts::DYNAMIC_COLUMN_NAME); + if (!variant_column) { + return; + } // remove it - block.erase(BeConsts::DYNAMIC_COLUMN_NAME); vectorized::ColumnObject& object_column = - assert_cast<vectorized::ColumnObject&>(variant_column.column->assume_mutable_ref()); + assert_cast<vectorized::ColumnObject&>(variant_column->column->assume_mutable_ref()); // extend for (auto& entry : object_column.get_subcolumns()) { if (entry->path.get_path() == vectorized::ColumnObject::COLUMN_NAME_DUMMY) { @@ -407,6 +409,7 @@ void MemTable::unfold_variant_column(vectorized::Block& block) { block.insert({entry->data.get_finalized_column().get_ptr(), entry->data.get_least_common_type(), entry->path.get_path()}); } + block.erase(BeConsts::DYNAMIC_COLUMN_NAME); } void MemTable::serialize_block_to_row_column(vectorized::Block& block) { diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 1af9f3f630..44acacd535 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -516,6 +516,12 @@ const ColumnPtr& ColumnObject::Subcolumn::get_finalized_column_ptr() const { return data[0]; } +void ColumnObject::Subcolumn::remove_nullable() { + assert(is_finalized()); + data[0] = doris::vectorized::remove_nullable(data[0]); + least_common_type.remove_nullable(); +} + ColumnObject::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_) : type(std::move(type_)), base_type(getBaseTypeOfArray(type)), @@ -903,7 +909,6 @@ void align_variant_by_name_and_type(ColumnObject& dst, const ColumnObject& src, // if src and dst is empty, we just increase the num_rows of dst and fill // num_rows of default values when meet new data size_t num_rows = dst.rows(); - bool need_inc_row_num = true; for (auto& entry : dst.get_subcolumns()) { const auto* src_subcol = src.get_subcolumn(entry->path); if (src_subcol == nullptr) { @@ -915,6 +920,7 @@ void align_variant_by_name_and_type(ColumnObject& dst, const ColumnObject& src, const auto& src_column = src_subcol->get_finalized_column(); inserter(src_column, &entry->data.get_finalized_column()); } + dst.set_num_rows(entry->data.get_finalized_column().size()); } for (const auto& entry : src.get_subcolumns()) { // encounter a new column @@ -924,15 +930,12 @@ void align_variant_by_name_and_type(ColumnObject& dst, const ColumnObject& src, auto new_column = type->create_column(); new_column->insert_many_defaults(num_rows); inserter(entry->data.get_finalized_column(), new_column.get()); - if (dst.empty()) { - // add_sub_column updated num_rows of dst object - need_inc_row_num = false; - } + dst.set_num_rows(new_column->size()); dst.add_sub_column(entry->path, std::move(new_column)); } } num_rows += row_cnt; - if (need_inc_row_num) { + if (dst.empty()) { dst.incr_num_rows(row_cnt); } #ifndef NDEBUG diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index af5cb5cc9a..290ec0ec1e 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -29,6 +29,24 @@ #include <vec/json/json_parser.h> #include "common/status.h" +#include "vec/columns/column.h" +#include "vec/columns/subcolumn_tree.h" +#include "vec/common/cow.h" +#include "vec/common/string_ref.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/json/path_in_data.h" + +class SipHash; + +namespace doris { +namespace vectorized { +class Arena; +} // namespace vectorized +} // namespace doris + namespace doris::vectorized { /// Info that represents a scalar or array field in a decomposed view. @@ -120,6 +138,8 @@ public: const ColumnPtr& get_finalized_column_ptr() const; + void remove_nullable(); + friend class ColumnObject; private: @@ -135,6 +155,8 @@ public: size_t get_dimensions() const { return num_dimensions; } + void remove_nullable() { type = doris::vectorized::remove_nullable(type); } + private: DataTypePtr type; DataTypePtr base_type; @@ -183,6 +205,12 @@ public: // return null if not found const Subcolumn* get_subcolumn(const PathInData& key) const; + /** More efficient methods of manipulation */ + [[noreturn]] IColumn& get_data() { LOG(FATAL) << "Not implemented method get_data()"; } + [[noreturn]] const IColumn& get_data() const { + LOG(FATAL) << "Not implemented method get_data()"; + } + // return null if not found Subcolumn* get_subcolumn(const PathInData& key); @@ -190,6 +218,8 @@ public: void incr_num_rows(size_t n) { num_rows += n; } + void set_num_rows(size_t n) { num_rows = n; } + size_t rows() const { return num_rows; } /// Adds a subcolumn from existing IColumn. diff --git a/regression-test/suites/dynamic_table_p0/load.groovy b/regression-test/suites/dynamic_table_p0/load.groovy index f4bd89aba6..4837ca3cd1 100644 --- a/regression-test/suites/dynamic_table_p0/load.groovy +++ b/regression-test/suites/dynamic_table_p0/load.groovy @@ -123,8 +123,8 @@ suite("regression_test_dynamic_table", "dynamic_table"){ json_load_unique("btc_transactions.json", "test_btc_json") json_load_unique("ghdata_sample.json", "test_ghdata_json") json_load_unique("nbagames_sample.json", "test_nbagames_json") - // sql """insert into test_ghdata_json_unique select * from test_ghdata_json_unique""" - // sql """insert into test_btc_json_unique select * from test_btc_json_unique""" + sql """insert into test_ghdata_json_unique select * from test_ghdata_json""" + sql """insert into test_btc_json_unique select * from test_btc_json""" // load more table_name = "gharchive"; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org