wangbo commented on code in PR #8834: URL: https://github.com/apache/incubator-doris/pull/8834#discussion_r857408776
########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -646,75 +669,82 @@ void SegmentIterator::_vec_init_lazy_materialization() { if (!del_cond_id_set.empty()) { short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end()); pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end()); - _is_all_column_basic_type = false; for (auto cid : del_cond_id_set) { _is_pred_column[cid] = true; } } - if (_schema.column_ids().size() > pred_column_ids.size()) { - for (auto cid : _schema.column_ids()) { - if (!_is_pred_column[cid]) { - _non_predicate_columns.push_back(cid); - is_non_predicate_column_exists = true; - - // todo(wb) make a cost-based lazy-materialization framework - // check non-pred column type to decide whether using lazy-materialization - FieldType type = _schema.column(cid)->type(); - if (_is_all_column_basic_type && - (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT || - type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || - type == OLAP_FIELD_TYPE_STRING)) { - _is_all_column_basic_type = false; - } - } - } - } - _vec_pred_column_ids.assign(vec_pred_col_id_set.cbegin(), vec_pred_col_id_set.cend()); - _short_cir_pred_column_ids.assign(short_cir_pred_col_id_set.cbegin(), - short_cir_pred_col_id_set.cend()); - } else { - _is_all_column_basic_type = false; - is_non_predicate_column_exists = true; + _short_cir_pred_column_ids.assign(short_cir_pred_col_id_set.cbegin(), short_cir_pred_col_id_set.cend()); + } + + if (!_vec_pred_column_ids.empty()) { + _is_need_vec_eval = true; + } + if (!_short_cir_pred_column_ids.empty()) { + _is_need_short_eval = true; + } + + // Step 2: check non-predicate read costs to determine whether need lazy materialization + // fill _non_predicate_columns. + // note(wb) For block schema, query layer and storage layer may have some diff + // query layer block schema not contains delete column, but storage layer appends delete column to end of block schema + // When output block to query layer, delete column can be skipped. + // _schema.column_ids() stands for storage layer block schema, so it contains delete columnid + // we just regard delete column as common pred column here. + if (_schema.column_ids().size() > pred_column_ids.size()) { for (auto cid : _schema.column_ids()) { - _non_predicate_columns.push_back(cid); + if (!_is_pred_column[cid]) { + _non_predicate_columns.push_back(cid); + FieldType type = _schema.column(cid)->type(); + + // todo(wb) maybe we can make read char type faster + // todo(wb) support map/array type + // todo(wb) consider multiple integer columns cost, such as 1000 columns, maybe lazy materialization faster + if (!_lazy_materialization_read && + (_is_need_vec_eval || _is_need_short_eval) && // only when pred exists, we need to consider lazy materialization + (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT || + type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_STRING || + type == OLAP_FIELD_TYPE_BOOL || type == OLAP_FIELD_TYPE_DATE || type == OLAP_FIELD_TYPE_DATETIME || Review Comment: I didn't do performance test. But bool is read from a runlenDecoder, it means bool can only be read row by row, can not in batch. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org