yiguolei commented on code in PR #15917: URL: https://github.com/apache/doris/pull/15917#discussion_r1126011291
########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1196,58 +1220,91 @@ void SegmentIterator::_vec_init_lazy_materialization() { _is_need_short_eval = true; } - // Step 2: check non-predicate read costs to determine whether need lazy materialization - // fill _non_predicate_columns. - // After some optimization, we suppose lazy materialization is better performance. + // make _schema_block_id_map + _schema_block_id_map.resize(_schema.columns().size()); + for (int i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_id(i); + _schema_block_id_map[cid] = i; + } + + // Step2: extract columns that can execute expr context + _is_common_expr_column.resize(_schema.columns().size(), false); + if (_enable_common_expr_pushdown && _remaining_vconjunct_root != nullptr) { + _extract_common_expr_columns(_remaining_vconjunct_root); + if (!_common_expr_columns.empty()) { + _is_need_expr_eval = true; + for (auto cid : _schema.column_ids()) { + // pred column also needs to be filtered by expr + if (_is_common_expr_column[cid] || _is_pred_column[cid]) { + auto loc = _schema_block_id_map[cid]; + _columns_to_filter.push_back(loc); + } + } + } + } + + // Step 3: fill non predicate columns and second read column + // if _schema columns size equal to pred_column_ids size, lazy_materialization_read is false, + // all columns are lazy materialization columns without non predicte column. + // If common expr pushdown exists, and expr column is not contained in lazy materialization columns, + // add to second read column, which will be read after lazy materialization if (_schema.column_ids().size() > pred_column_ids.size()) { for (auto cid : _schema.column_ids()) { if (!_is_pred_column[cid]) { - _non_predicate_columns.push_back(cid); if (_is_need_vec_eval || _is_need_short_eval) { _lazy_materialization_read = true; } + if (!_is_common_expr_column[cid]) { + _non_predicate_columns.push_back(cid); + } else { + _second_read_column_ids.push_back(cid); + } } } } - // Step 3: fill column ids for read and output + // Step 4: fill first read columns if (_lazy_materialization_read) { // insert pred cid to first_read_columns for (auto cid : pred_column_ids) { _first_read_column_ids.push_back(cid); } - } else if (!_is_need_vec_eval && - !_is_need_short_eval) { // no pred exists, just read and output column + } else if (!_is_need_vec_eval && !_is_need_short_eval && + !_is_need_expr_eval) { // no pred exists, just read and output column for (int i = 0; i < _schema.num_column_ids(); i++) { auto cid = _schema.column_id(i); _first_read_column_ids.push_back(cid); } - } else { // pred exits, but we can eliminate lazy materialization - // insert pred/non-pred cid to first read columns - std::set<ColumnId> pred_id_set; - pred_id_set.insert(_short_cir_pred_column_ids.begin(), _short_cir_pred_column_ids.end()); - pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); - std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(), - _non_predicate_columns.end()); - - for (int i = 0; i < _schema.num_column_ids(); i++) { - auto cid = _schema.column_id(i); - if (pred_id_set.find(cid) != pred_id_set.end()) { - _first_read_column_ids.push_back(cid); - } else if (non_pred_set.find(cid) != non_pred_set.end()) { + } else { + if (_is_need_vec_eval || _is_need_short_eval) { + // TODO To refactor, because we suppose lazy materialization is better performance. + // pred exits, but we can eliminate lazy materialization + // insert pred/non-pred cid to first read columns + std::set<ColumnId> pred_id_set; + pred_id_set.insert(_short_cir_pred_column_ids.begin(), + _short_cir_pred_column_ids.end()); + pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); + std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(), + _non_predicate_columns.end()); + + // _second_read_column_ids must be empty. Otherwise _lazy_materialization_read must not false. Review Comment: ADD CHECK(!_second_read_column_ids.empty()) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org