This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 274a0f2603 [fix] do not read seq column when reading a compacted rowset (#10344) 274a0f2603 is described below commit 274a0f26035dfe1f91a5c3bb0f8213bef9a4d7c1 Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com> AuthorDate: Thu Jun 23 08:44:43 2022 +0800 [fix] do not read seq column when reading a compacted rowset (#10344) SEQ_COL is used on tables with unique key to order data in one transaction(rowset), when there is only one rowset and the rowset is compacted, rows in the rowset is sorted and rows with same keys are resolved by compaction, so a scanner sets direct_mode to optimize read iterator to avoid sorting and aggregating, and iterators does not need SEQ_COL. However, init_return_columns adds SEQ_COL to return_columns, which is passed to SegmentIterator. Then segment Iterator would be called via get_next with a block without SEQ_COL, segment iterator creates columns included in return_columns but not in the block. SEQ_COL is nullable, segment Iterator does not handle it, so a core dump happen. Actually, in the above case, segment iterator does not need to read SEQ_COL. When SEQ_COL is really needed, iterators creates SEQ_COL column in block, so segment Iterator does not need do create SEQ_COL at all. --- be/src/exec/olap_scanner.cpp | 43 ++++++++++++++++++++------------------- be/src/exec/olap_scanner.h | 2 +- be/src/vec/exec/volap_scanner.cpp | 42 ++++++++++++++++++++------------------ be/src/vec/exec/volap_scanner.h | 2 +- 4 files changed, 46 insertions(+), 43 deletions(-) diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 3bd2783ea0..d78c21e996 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -147,7 +147,25 @@ Status OlapScanner::_init_tablet_reader_params( const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters, const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>& bloom_filters) { - RETURN_IF_ERROR(_init_return_columns()); + // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty + bool single_version = + (_tablet_reader_params.rs_readers.size() == 1 && + _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 && + !_tablet_reader_params.rs_readers[0] + ->rowset() + ->rowset_meta() + ->is_segments_overlapping()) || + (_tablet_reader_params.rs_readers.size() == 2 && + _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 && + _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 && + !_tablet_reader_params.rs_readers[1] + ->rowset() + ->rowset_meta() + ->is_segments_overlapping()); + + _tablet_reader_params.direct_mode = single_version || _aggregation; + + RETURN_IF_ERROR(_init_return_columns(!_tablet_reader_params.direct_mode)); _tablet_reader_params.tablet = _tablet; _tablet_reader_params.reader_type = READER_QUERY; @@ -179,28 +197,11 @@ Status OlapScanner::_init_tablet_reader_params( // TODO(zc) _tablet_reader_params.profile = _parent->runtime_profile(); _tablet_reader_params.runtime_state = _runtime_state; - // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty - bool single_version = - (_tablet_reader_params.rs_readers.size() == 1 && - _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 && - !_tablet_reader_params.rs_readers[0] - ->rowset() - ->rowset_meta() - ->is_segments_overlapping()) || - (_tablet_reader_params.rs_readers.size() == 2 && - _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 && - _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 && - !_tablet_reader_params.rs_readers[1] - ->rowset() - ->rowset_meta() - ->is_segments_overlapping()); - _tablet_reader_params.origin_return_columns = &_return_columns; _tablet_reader_params.tablet_columns_convert_to_null_set = &_tablet_columns_convert_to_null_set; - if (_aggregation || single_version) { + if (_tablet_reader_params.direct_mode) { _tablet_reader_params.return_columns = _return_columns; - _tablet_reader_params.direct_mode = true; } else { // we need to fetch all key columns to do the right aggregation on storage engine side. for (size_t i = 0; i < _tablet->num_key_columns(); ++i) { @@ -236,7 +237,7 @@ Status OlapScanner::_init_tablet_reader_params( return Status::OK(); } -Status OlapScanner::_init_return_columns() { +Status OlapScanner::_init_return_columns(bool need_seq_col) { for (auto slot : _tuple_desc->slots()) { if (!slot->is_materialized()) { continue; @@ -255,7 +256,7 @@ Status OlapScanner::_init_return_columns() { } // expand the sequence column - if (_tablet->tablet_schema().has_sequence_col()) { + if (_tablet->tablet_schema().has_sequence_col() && need_seq_col) { bool has_replace_col = false; for (auto col : _return_columns) { if (_tablet->tablet_schema().column(col).aggregation() == diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index 92b8c07205..65086eaceb 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -93,7 +93,7 @@ protected: const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters, const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>& bloom_filters); - Status _init_return_columns(); + Status _init_return_columns(bool need_seq_col); void _convert_row_to_tuple(Tuple* tuple); // Update profile that need to be reported in realtime. diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp index f1c15408e4..6c942dfd0d 100644 --- a/be/src/vec/exec/volap_scanner.cpp +++ b/be/src/vec/exec/volap_scanner.cpp @@ -135,7 +135,25 @@ Status VOlapScanner::_init_tablet_reader_params( const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters, const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>& bloom_filters) { - RETURN_IF_ERROR(_init_return_columns()); + // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty + bool single_version = + (_tablet_reader_params.rs_readers.size() == 1 && + _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 && + !_tablet_reader_params.rs_readers[0] + ->rowset() + ->rowset_meta() + ->is_segments_overlapping()) || + (_tablet_reader_params.rs_readers.size() == 2 && + _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 && + _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 && + !_tablet_reader_params.rs_readers[1] + ->rowset() + ->rowset_meta() + ->is_segments_overlapping()); + + _tablet_reader_params.direct_mode = _aggregation || single_version; + + RETURN_IF_ERROR(_init_return_columns(!_tablet_reader_params.direct_mode)); _tablet_reader_params.tablet = _tablet; _tablet_reader_params.reader_type = READER_QUERY; @@ -166,28 +184,12 @@ Status VOlapScanner::_init_tablet_reader_params( _tablet_reader_params.profile = _parent->runtime_profile(); _tablet_reader_params.runtime_state = _runtime_state; - // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty - bool single_version = - (_tablet_reader_params.rs_readers.size() == 1 && - _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 && - !_tablet_reader_params.rs_readers[0] - ->rowset() - ->rowset_meta() - ->is_segments_overlapping()) || - (_tablet_reader_params.rs_readers.size() == 2 && - _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 && - _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 && - !_tablet_reader_params.rs_readers[1] - ->rowset() - ->rowset_meta() - ->is_segments_overlapping()); _tablet_reader_params.origin_return_columns = &_return_columns; _tablet_reader_params.tablet_columns_convert_to_null_set = &_tablet_columns_convert_to_null_set; - if (_aggregation || single_version) { + if (_tablet_reader_params.direct_mode) { _tablet_reader_params.return_columns = _return_columns; - _tablet_reader_params.direct_mode = true; } else { // we need to fetch all key columns to do the right aggregation on storage engine side. for (size_t i = 0; i < _tablet->num_key_columns(); ++i) { @@ -223,7 +225,7 @@ Status VOlapScanner::_init_tablet_reader_params( return Status::OK(); } -Status VOlapScanner::_init_return_columns() { +Status VOlapScanner::_init_return_columns(bool need_seq_col) { for (auto slot : _tuple_desc->slots()) { if (!slot->is_materialized()) { continue; @@ -242,7 +244,7 @@ Status VOlapScanner::_init_return_columns() { } // expand the sequence column - if (_tablet->tablet_schema().has_sequence_col()) { + if (_tablet->tablet_schema().has_sequence_col() && need_seq_col) { bool has_replace_col = false; for (auto col : _return_columns) { if (_tablet->tablet_schema().column(col).aggregation() == diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h index 22cab48006..5786a388ad 100644 --- a/be/src/vec/exec/volap_scanner.h +++ b/be/src/vec/exec/volap_scanner.h @@ -98,7 +98,7 @@ private: const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters, const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>& bloom_filters); - Status _init_return_columns(); + Status _init_return_columns(bool need_seq_col); // Update profile that need to be reported in realtime. void _update_realtime_counter(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org