Gabriel39 commented on code in PR #15642: URL: https://github.com/apache/doris/pull/15642#discussion_r1071057702
########## be/src/vec/exprs/vslot_ref.cpp: ########## @@ -50,6 +50,12 @@ Status VSlotRef::prepare(doris::RuntimeState* state, const doris::RowDescriptor& if (slot_desc == nullptr) { return Status::InternalError("couldn't resolve slot descriptor {}", _slot_id); } + if (slot_desc->invalid()) { + // invalid slot should be ignored manually + _column_id = -1; + _column_name = &slot_desc->col_name(); Review Comment: move this line to line 53 and remove line 56 and 64 ########## be/src/vec/exec/vexchange_node.cpp: ########## @@ -45,10 +48,14 @@ Status VExchangeNode::init(const TPlanNode& tnode, RuntimeState* state) { if (!_is_merging) { return Status::OK(); } - RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.exchange_node.sort_info, _pool)); _is_asc_order = tnode.exchange_node.sort_info.is_asc_order; _nulls_first = tnode.exchange_node.sort_info.nulls_first; + + if (tnode.exchange_node.__isset.nodes_info) { + _nodes_info = _pool->add(new DorisNodesInfo(tnode.exchange_node.nodes_info)); + } + _use_two_phase_read = tnode.exchange_node.sort_info.use_two_phase_read; Review Comment: ```suggestion _use_two_phase_read = tnode.exchange_node.sort_info.__isset.use_two_phase_read && tnode.exchange_node.sort_info.use_two_phase_read; ``` ########## be/src/vec/exec/vexchange_node.cpp: ########## @@ -122,6 +157,7 @@ Status VExchangeNode::get_next(RuntimeState* state, Block* block, bool* eos) { } COUNTER_SET(_rows_returned_counter, _num_rows_returned); } + RETURN_IF_ERROR(second_phase_fetch_data(state, block)); Review Comment: I think `status` should be check before calling `second_phase_fetch_data`. ########## be/src/vec/exec/vexchange_node.cpp: ########## @@ -89,6 +96,28 @@ Status VExchangeNode::open(RuntimeState* state) { return Status::OK(); } +Status VExchangeNode::second_phase_fetch_data(RuntimeState* state, Block* final_block) { + if (!_use_two_phase_read) { + return Status::OK(); + } + if (final_block->rows() == 0) { + return Status::OK(); + } + auto row_id_col = final_block->get_by_position(final_block->columns() - 1); + MonotonicStopWatch watch; + watch.start(); + auto tuple_desc = _row_descriptor.tuple_descriptors()[0]; + RowIDFetcher id_fetcher(tuple_desc, state); + RETURN_IF_ERROR(id_fetcher.init(_nodes_info)); + MutableBlock materialized_block(_row_descriptor.tuple_descriptors(), final_block->rows()); + // fetch will sort block by sequence of ROWID_COL + RETURN_IF_ERROR(id_fetcher.fetch(row_id_col.column, &materialized_block)); + // Notice swap may change the structure of final_block + final_block->swap(materialized_block.to_block()); + LOG(INFO) << "fetch_id finished, cost(ms):" << watch.elapsed_time() / 1000 / 1000; Review Comment: Use a profiler instead ########## gensrc/thrift/Descriptors.thrift: ########## @@ -51,6 +51,8 @@ struct TSlotDescriptor { 9: required i32 slotIdx 10: required bool isMaterialized 11: optional i32 col_unique_id = -1 + 12: optional bool is_key = false + 13: optional bool is_invalid = false Review Comment: add some comments for `is_invalid`. To be honest, I think this is a bit confusing. How about to rename it to `should_ignore` or `need_materialize`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org