This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 576c087cd41 [opt] optimize insert_range_from (#46675)
576c087cd41 is described below

commit 576c087cd41134f74029f90defced955e4a2528f
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Thu Jan 9 12:36:06 2025 +0800

    [opt] optimize insert_range_from (#46675)
---
 be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp |  2 ++
 be/src/vec/columns/column_object.cpp                         | 12 ++++++++++--
 be/src/vec/data_types/data_type_nothing.h                    |  7 +------
 regression-test/data/variant_p0/test_sub_path_pruning.out    |  4 ++--
 .../variant_p1/compaction/compaction_sparse_column.groovy    |  2 +-
 5 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp 
b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
index a26d3d4eab8..442595eb0c0 100644
--- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
@@ -311,6 +311,8 @@ void VariantStatistics::to_pb(VariantStatisticsPB* stats) 
const {
     for (const auto& [path, value] : sparse_column_non_null_size) {
         stats->mutable_sparse_column_non_null_size()->emplace(path, value);
     }
+    LOG(INFO) << "num subcolumns " << subcolumns_non_null_size.size() << ", 
num sparse columns "
+              << sparse_column_non_null_size.size();
 }
 
 void VariantStatistics::from_pb(const VariantStatisticsPB& stats) {
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index c60146a869f..535e4332bbe 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1318,9 +1318,10 @@ void ColumnObject::insert_range_from(const IColumn& src, 
size_t start, size_t le
     // We can reach the limit of subcolumns, and in this case
     // the rest of subcolumns from src will be inserted into sparse column.
     std::map<std::string_view, Subcolumn> 
src_path_and_subcoumn_for_sparse_column;
+    int idx_hint = 0;
     for (const auto& entry : src_object.subcolumns) {
         // Check if we already have such dense column path.
-        if (auto* subcolumn = get_subcolumn(entry->path); subcolumn != 
nullptr) {
+        if (auto* subcolumn = get_subcolumn(entry->path, idx_hint); subcolumn 
!= nullptr) {
             subcolumn->insert_range_from(entry->data, start, length);
         } else if (try_add_new_subcolumn(entry->path)) {
             subcolumn = get_subcolumn(entry->path);
@@ -1329,6 +1330,7 @@ void ColumnObject::insert_range_from(const IColumn& src, 
size_t start, size_t le
         } else {
             
src_path_and_subcoumn_for_sparse_column.emplace(entry->path.get_path(), 
entry->data);
         }
+        ++idx_hint;
     }
 
     // Paths in sparse column are sorted, so paths from 
src_dense_column_path_for_sparse_column should be inserted properly
@@ -1345,7 +1347,7 @@ void ColumnObject::insert_range_from(const IColumn& src, 
size_t start, size_t le
             src_object, std::move(sorted_src_subcolumn_for_sparse_column), 
start, length);
 
     num_rows += length;
-    finalize();
+    // finalize();
     ENABLE_CHECK_CONSISTENCY(this);
 }
 
@@ -1946,6 +1948,12 @@ void ColumnObject::clear_sparse_column() {
 }
 
 Status ColumnObject::finalize(FinalizeMode mode) {
+    if (is_finalized() && mode == FinalizeMode::READ_MODE) {
+        doc_structure = nullptr;
+        _prev_positions.clear();
+        ENABLE_CHECK_CONSISTENCY(this);
+        return Status::OK();
+    }
     Subcolumns new_subcolumns;
 
     if (auto root = subcolumns.get_mutable_root(); root == nullptr) {
diff --git a/be/src/vec/data_types/data_type_nothing.h 
b/be/src/vec/data_types/data_type_nothing.h
index bb0e095b5a5..6741fbd5031 100644
--- a/be/src/vec/data_types/data_type_nothing.h
+++ b/be/src/vec/data_types/data_type_nothing.h
@@ -78,12 +78,7 @@ public:
     const char* deserialize(const char* buf, MutableColumnPtr* column,
                             int be_exec_version) const override;
 
-    [[noreturn]] Field get_default() const override {
-        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
-                               "Method get_default() is not implemented for 
data type {}.",
-                               get_name());
-        __builtin_unreachable();
-    }
+    Field get_default() const override { return Null(); }
 
     [[noreturn]] Field get_field(const TExprNode& node) const override {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
diff --git a/regression-test/data/variant_p0/test_sub_path_pruning.out 
b/regression-test/data/variant_p0/test_sub_path_pruning.out
index d1089d6f4cd..ae75160a91d 100644
--- a/regression-test/data/variant_p0/test_sub_path_pruning.out
+++ b/regression-test/data/variant_p0/test_sub_path_pruning.out
@@ -229,7 +229,7 @@
 1      {"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14}
 
 -- !sql --
-"1"
+1
 {"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14}
 
 -- !sql --
@@ -241,7 +241,7 @@
 1      {"d":{"e":11}}
 
 -- !sql --
-"1"
+1
 {"d":{"e":11}}
 
 -- !sql --
diff --git 
a/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy 
b/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy
index 82f60e594cf..5d753b97382 100644
--- 
a/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy
+++ 
b/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy
@@ -47,7 +47,7 @@ suite("test_compaction_sparse_column", "p1,nonConcurrent") {
 
     try {
         set_be_config.call("write_buffer_size", "10240")
-        set_be_config.call("variant_max_subcolumns_count", "3")
+        set_be_config.call("variant_max_subcolumns_count", "2")
 
         sql """ DROP TABLE IF EXISTS ${tableName} """
         sql """


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to