xinyiZzz commented on code in PR #15917: URL: https://github.com/apache/doris/pull/15917#discussion_r1116608487
########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1160,58 +1189,86 @@ void SegmentIterator::_vec_init_lazy_materialization() { _is_need_short_eval = true; } - // Step 2: check non-predicate read costs to determine whether need lazy materialization - // fill _non_predicate_columns. - // After some optimization, we suppose lazy materialization is better performance. + // make _schema_block_id_map + _schema_block_id_map.resize(_schema.columns().size()); + for (int i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_id(i); + _schema_block_id_map[cid] = i; + } + + // Step2: extract columns that can execute expr context + _is_remaining_expr_column.resize(_schema.columns().size(), false); + if (_enable_remaining_expr_pushdown && _remaining_vconjunct_root != nullptr) { + _extract_remaining_expr_columns(_remaining_vconjunct_root); + if (!_remaining_expr_columns.empty()) { + _is_need_expr_eval = true; + for (auto cid : _schema.column_ids()) { + // pred column also needs to be filtered by expr + if (_is_remaining_expr_column[cid] || _is_pred_column[cid]) { + auto loc = _schema_block_id_map[cid]; + _columns_to_filter.push_back(loc); + } + } + } + } + + // Step 3: fill non predicate columns and second read columns if (_schema.column_ids().size() > pred_column_ids.size()) { for (auto cid : _schema.column_ids()) { if (!_is_pred_column[cid]) { - _non_predicate_columns.push_back(cid); + if (!_is_remaining_expr_column[cid]) { + _non_predicate_columns.push_back(cid); + } else { + _second_read_column_ids.push_back(cid); + } if (_is_need_vec_eval || _is_need_short_eval) { _lazy_materialization_read = true; } } } } - // Step 3: fill column ids for read and output + // Step 4: fill first read columns if (_lazy_materialization_read) { // insert pred cid to first_read_columns for (auto cid : pred_column_ids) { _first_read_column_ids.push_back(cid); } - } else if (!_is_need_vec_eval && - !_is_need_short_eval) { // no pred exists, just read and output column + } else if (!_is_need_vec_eval && !_is_need_short_eval && + !_is_need_expr_eval) { // no pred exists, just read and output column for (int i = 0; i < _schema.num_column_ids(); i++) { auto cid = _schema.column_id(i); _first_read_column_ids.push_back(cid); } - } else { // pred exits, but we can eliminate lazy materialization - // insert pred/non-pred cid to first read columns - std::set<ColumnId> pred_id_set; - pred_id_set.insert(_short_cir_pred_column_ids.begin(), _short_cir_pred_column_ids.end()); - pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); - std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(), - _non_predicate_columns.end()); - - for (int i = 0; i < _schema.num_column_ids(); i++) { - auto cid = _schema.column_id(i); - if (pred_id_set.find(cid) != pred_id_set.end()) { - _first_read_column_ids.push_back(cid); - } else if (non_pred_set.find(cid) != non_pred_set.end()) { + } else { + if (_is_need_vec_eval || _is_need_short_eval) { + // TODO To refactor, because we suppose lazy materialization is better performance. + // pred exits, but we can eliminate lazy materialization + // insert pred/non-pred cid to first read columns + std::set<ColumnId> pred_id_set; + pred_id_set.insert(_short_cir_pred_column_ids.begin(), + _short_cir_pred_column_ids.end()); + pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); + std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(), + _non_predicate_columns.end()); + + for (int i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_id(i); + if (pred_id_set.find(cid) != pred_id_set.end()) { + _first_read_column_ids.push_back(cid); + } else if (non_pred_set.find(cid) != non_pred_set.end()) { + _first_read_column_ids.push_back(cid); + // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns + _is_pred_column[cid] = true; + } + } + } else if (_is_need_expr_eval) { + DCHECK(!_is_need_vec_eval && !_is_need_short_eval); + for (auto cid : _remaining_expr_columns) { _first_read_column_ids.push_back(cid); - // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns - _is_pred_column[cid] = true; } Review Comment: In this case `_second_read_column_ids` must be empty. Otherwise `_lazy_materialization_read` must not false and will enter the first `if`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org