Copilot commented on code in PR #64010:
URL: https://github.com/apache/doris/pull/64010#discussion_r3339638205


##########
be/src/storage/segment/segment_iterator.cpp:
##########
@@ -586,6 +470,55 @@ Status SegmentIterator::_init_impl(const 
StorageReadOptions& opts) {
     return Status::OK();
 }
 
+Status SegmentIterator::_init_project_schema() {
+    _schema_block_id_map.assign(_schema->columns().size(), -1);
+    for (int i = 0; i < _schema->num_column_ids(); i++) {
+        auto cid = _schema->column_id(i);
+        _schema_block_id_map[cid] = i;
+    }
+
+    _project_schema = _opts.project_columns != nullptr
+                              ? std::make_shared<Schema>(_schema->columns(), 
*_opts.project_columns)
+                              : _schema;
+    return Status::OK();
+}
+
+Status SegmentIterator::_build_project_block(Block* block, uint16_t 
selected_size,
+                                             Block* project_block) {
+    project_block->clear();
+    DORIS_CHECK(_project_schema != nullptr);
+    for (auto cid : _project_schema->column_ids()) {
+        auto loc = _schema_block_id_map[cid];
+        auto& output_column = block->get_by_position(loc);
+        auto type = output_column.type;
+        auto column = output_column.column;
+        auto virtual_it = _vir_cid_to_idx_in_block.find(cid);
+        if (virtual_it != _vir_cid_to_idx_in_block.end()) {
+            auto type_it = _opts.vir_col_idx_to_type.find(virtual_it->second);
+            DORIS_CHECK(type_it != _opts.vir_col_idx_to_type.end());
+            type = type_it->second;
+            if (!column || check_and_get_column<const 
ColumnNothing>(column.get()) ||
+                column->size() != selected_size) {
+                column = ColumnNothing::create(selected_size);
+            }
+        } else {
+            if (!type) {
+                type = Schema::get_data_type_ptr(*_schema->column(cid));
+            }
+            if (!column) {
+                return Status::InternalError(
+                        "project column {} is not materialized before project 
block build", cid);
+            }
+            if (column->size() != selected_size) {
+                return Status::InternalError("project column {} has {} rows, 
expected {}", cid,
+                                             column->size(), selected_size);
+            }
+        }
+        project_block->insert({std::move(column), type, 
_schema->column(cid)->name()});
+    }
+    return Status::OK();
+}
+
 void SegmentIterator::_initialize_predicate_results() {
     // Initialize from _col_predicates
     for (auto pred : _col_predicates) {

Review Comment:
   _build_project_block() currently returns an InternalError when a non-virtual 
projected column is not materialized yet (nullptr) or has size != 
selected_size. During lazy materialization, many projected columns are 
intentionally empty (size 0) at the time common expr pushdown runs, so this can 
fail even though the expr does not reference those columns. Consider filling 
non-materialized columns with a typed placeholder (e.g. const default column) 
so the project block preserves slot ordinals without forcing early 
materialization.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to