This is an automated email from the ASF dual-hosted git repository. zhaoc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 78bf825 Optimize the convert of row block v2 to v1 #2011 (#2058) 78bf825 is described below commit 78bf825e734664ac258e944b2f353c6f92caa044 Author: kangpinghuang <kangpinghu...@126.com> AuthorDate: Thu Oct 24 22:36:30 2019 +0800 Optimize the convert of row block v2 to v1 #2011 (#2058) Use MemPool exchange to avoid string copy Use batch convert to replace row by row --- be/src/olap/row_block2.cpp | 54 +++++++++++++++---------------- be/src/olap/row_block2.h | 12 ++----- be/src/olap/rowset/beta_rowset_reader.cpp | 20 +----------- 3 files changed, 31 insertions(+), 55 deletions(-) diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 69d5c1d..5ba1689 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -56,37 +56,37 @@ RowBlockV2::~RowBlockV2() { delete[] _selection_vector; } -Status RowBlockV2::copy_to_row_cursor(size_t row_idx, RowCursor* cursor) { - if (row_idx >= _num_rows) { - return Status::InvalidArgument( - Substitute("invalid row index $0 (num_rows=$1)", row_idx, _num_rows)); - } +Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) { for (auto cid : _schema.column_ids()) { - bool is_null = _schema.column(cid)->is_nullable() && BitmapTest(_column_null_bitmaps[cid], row_idx); - if (is_null) { - cursor->set_null(cid); + bool is_nullable = _schema.column(cid)->is_nullable(); + if (is_nullable) { + for (uint16_t i = 0; i < _selected_size; ++i) { + uint16_t row_idx = _selection_vector[i]; + dst->get_row(row_idx, helper); + bool is_null = BitmapTest(_column_null_bitmaps[cid], row_idx); + if (is_null) { + helper->set_null(cid); + } else { + helper->set_not_null(cid); + helper->set_field_content_shallow(cid, + reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx))); + } + } } else { - cursor->set_not_null(cid); - cursor->set_field_content_shallow(cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx))); + for (uint16_t i = 0; i < _selected_size; ++i) { + uint16_t row_idx = _selection_vector[i]; + dst->get_row(row_idx, helper); + helper->set_not_null(cid); + helper->set_field_content_shallow(cid, + reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx))); + } } } - return Status::OK(); -} - -Status RowBlockV2::deep_copy_to_row_cursor(size_t row_idx, RowCursor* cursor, MemPool* mem_pool) { - if (row_idx >= _num_rows) { - return Status::InvalidArgument( - Substitute("invalid row index $0 (num_rows=$1)", row_idx, _num_rows)); - } - for (auto cid : _schema.column_ids()) { - bool is_null = _schema.column(cid)->is_nullable() && BitmapTest(_column_null_bitmaps[cid], row_idx); - if (is_null) { - cursor->set_null(cid); - } else { - cursor->set_not_null(cid); - cursor->set_field_content(cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)), mem_pool); - } - } + // swap MemPool to copy string content + dst->mem_pool()->exchange_data(_pool.get()); + dst->set_pos(0); + dst->set_limit(_selected_size); + dst->finalize(_selected_size); return Status::OK(); } diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h index ce3d5b6..27ea633 100644 --- a/be/src/olap/row_block2.h +++ b/be/src/olap/row_block2.h @@ -26,6 +26,7 @@ #include "olap/schema.h" #include "olap/types.h" #include "olap/selection_vector.h" +#include "olap/row_block.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" @@ -63,15 +64,8 @@ public: } } - // Copy the row_idx row's data into given row_cursor. - // This function will use shallow copy, so the client should - // notice the life time of returned value - Status copy_to_row_cursor(size_t row_idx, RowCursor* row_cursor); - - // Copy the row_idx row's data into given row_cursor. - // This function will use deep copy. - // This function is used to convert RowBlockV2 to RowBlock - Status deep_copy_to_row_cursor(size_t row_idx, RowCursor* cursor, MemPool* mem_pool); + // convert RowBlockV2 to RowBlock + Status convert_to_row_block(RowCursor* helper, RowBlock* dst); // Get the column block for one of the columns in this row block. // `cid` must be one of `schema()->column_ids()`. diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index e2620dc..ace0f0a 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -123,28 +123,10 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) { // convert to output block _output_block->clear(); - size_t rows_read = 0; - uint16_t* selection_vector = _input_block->selection_vector(); { SCOPED_RAW_TIMER(&_context->stats->block_convert_ns); - for (uint16_t i = 0; i < _input_block->selected_size(); ++i) { - uint16_t row_idx = selection_vector[i]; - // deep copy row from input block to output block because - // RowBlock use MemPool and RowBlockV2 use Arena - // TODO(hkp): unify RowBlockV2 to use MemPool to boost performance - _output_block->get_row(row_idx, _row.get()); - // convert return_columns to seek_columns - auto s = _input_block->deep_copy_to_row_cursor(row_idx, _row.get(), _output_block->mem_pool()); - if (!s.ok()) { - LOG(WARNING) << "failed to copy row: " << s.to_string(); - return OLAP_ERR_ROWSET_READ_FAILED; - } - ++rows_read; - } + _input_block->convert_to_row_block(_row.get(), _output_block.get()); } - _output_block->set_pos(0); - _output_block->set_limit(rows_read); - _output_block->finalize(rows_read); *block = _output_block.get(); return OLAP_SUCCESS; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org