This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2cf90ddfc5 [fix](scanner) remove useless _src_block_mem_reuse to avoid 
core dump while loading (#17559)
2cf90ddfc5 is described below

commit 2cf90ddfc590738a2e5ad1d59bc3566fce971317
Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com>
AuthorDate: Thu Mar 9 09:26:32 2023 +0800

    [fix](scanner) remove useless _src_block_mem_reuse to avoid core dump while 
loading (#17559)
    
    The _src_block_mem_reuse variable actually not work, since the _src_block 
is cleared each time when we call get_block.
    But current code may cause core dump, see issue #17587. Because we insert 
some result column generated by expr into dest block, and such a column holds a 
pointer to some column in original schema. When clearing the data of 
_src_block, some column's data in dest block is also cleared.
    
    e.g. coalesce will return a result column which holds a pointer to some 
original column, see issue #17588
---
 be/src/vec/exec/scan/vfile_scanner.cpp | 18 +++---------------
 be/src/vec/exec/scan/vfile_scanner.h   |  1 -
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 72522119f3..48ff807bbd 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -81,7 +81,6 @@ Status VFileScanner::prepare(
     _io_ctx->enable_file_cache = _state->query_options().enable_file_cache;
 
     if (_is_load) {
-        _src_block_mem_reuse = true;
         _src_row_desc.reset(new RowDescriptor(_state->desc_tbl(),
                                               
std::vector<TupleId>({_input_tuple_desc->id()}),
                                               std::vector<bool>({false})));
@@ -403,10 +402,9 @@ Status VFileScanner::_convert_to_output_block(Block* 
block) {
     size_t rows = _src_block.rows();
     auto filter_column = vectorized::ColumnUInt8::create(rows, 1);
     auto& filter_map = filter_column->get_data();
-    auto origin_column_num = _src_block.columns();
 
     // Set block dynamic, block maybe merge or add_rows
-    // in in later process.
+    // in later process.
     if (_is_dynamic_schema) {
         block->set_block_type(BlockType::DYNAMIC);
     }
@@ -437,11 +435,7 @@ Status VFileScanner::_convert_to_output_block(Block* 
block) {
             int result_column_id = -1;
             // PT1 => dest primitive type
             RETURN_IF_ERROR(ctx->execute(&_src_block, &result_column_id));
-            bool is_origin_column = result_column_id < origin_column_num;
-            column_ptr = is_origin_column && _src_block_mem_reuse
-                                 ? _src_block.get_by_position(result_column_id)
-                                           .column->clone_resized(rows)
-                                 : 
_src_block.get_by_position(result_column_id).column;
+            column_ptr = _src_block.get_by_position(result_column_id).column;
         }
         // column_ptr maybe a ColumnConst, convert it to a normal column
         column_ptr = column_ptr->convert_to_full_column_if_const();
@@ -533,11 +527,7 @@ Status VFileScanner::_convert_to_output_block(Block* 
block) {
     }
 
     // after do the dest block insert operation, clear _src_block to remove 
the reference of origin column
-    if (_src_block_mem_reuse) {
-        _src_block.clear_column_data(origin_column_num);
-    } else {
-        _src_block.clear();
-    }
+    _src_block.clear();
 
     size_t dest_size = block->columns();
     // do filter
@@ -809,8 +799,6 @@ Status VFileScanner::_init_expr_ctxes() {
     _is_dynamic_schema =
             _output_tuple_desc && 
_output_tuple_desc->slots().back()->type().is_variant_type();
     if (_is_dynamic_schema) {
-        // should not resuse Block since Block is variable
-        _src_block_mem_reuse = false;
         _full_base_schema_view.reset(new 
vectorized::schema_util::FullBaseSchemaView);
         _full_base_schema_view->db_name = 
_output_tuple_desc->table_desc()->database();
         _full_base_schema_view->table_name = 
_output_tuple_desc->table_desc()->name();
diff --git a/be/src/vec/exec/scan/vfile_scanner.h 
b/be/src/vec/exec/scan/vfile_scanner.h
index 096892e8e6..dc65db6c75 100644
--- a/be/src/vec/exec/scan/vfile_scanner.h
+++ b/be/src/vec/exec/scan/vfile_scanner.h
@@ -106,7 +106,6 @@ protected:
     int _rows = 0;
     int _num_of_columns_from_file;
 
-    bool _src_block_mem_reuse = false;
     bool _strict_mode;
 
     bool _src_block_init = false;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to