Copilot commented on code in PR #60352:
URL: https://github.com/apache/doris/pull/60352#discussion_r3019826419
##########
be/src/exprs/table_function/vexplode_v2.cpp:
##########
@@ -108,6 +108,29 @@ Status VExplodeV2TableFunction::process_init(Block* block,
RuntimeState* state)
return Status::OK();
}
+bool VExplodeV2TableFunction::support_block_fast_path() const {
+ return !_is_outer && !_generate_row_index;
Review Comment:
`support_block_fast_path()` currently returns true even when the function
has multiple parameters (`_multi_detail.size() != 1`), but
`prepare_block_fast_path()` will return NotSupported in that case. Either align
`support_block_fast_path()` with the prepare-time constraint (e.g. include
`_multi_detail.size() == 1` once initialized) or ensure the operator treats
NotSupported as a normal fallback; otherwise this advertises support and can
trigger an avoidable error path.
```suggestion
return !_is_outer && !_generate_row_index && _multi_detail.size() == 1;
```
##########
be/src/exec/operator/table_function_operator.cpp:
##########
@@ -160,6 +167,209 @@ bool TableFunctionLocalState::_is_inner_and_empty() {
return false;
}
+bool TableFunctionLocalState::_can_use_block_fast_path() const {
+ auto& p = _parent->cast<TableFunctionOperatorX>();
+ // Fast path is only valid when:
+ // - only one table function exists
+ // - there is an active child row to expand
+ // - the child block is non-empty
+ // - the table function can expose nested/offsets via
prepare_block_fast_path()
+ return p._fn_num == 1 && _cur_child_offset != -1 && _child_block->rows() >
0 &&
+ _fns[0]->support_block_fast_path();
+}
+
+void TableFunctionLocalState::_reset_block_fast_path_state() {
+ _block_fast_path_prepared = false;
+ _block_fast_path_enabled = false;
+ _block_fast_path_ctx = {};
+ _block_fast_path_row = 0;
+ _block_fast_path_in_row_offset = 0;
+}
+
+Status TableFunctionLocalState::_prepare_block_fast_path(RuntimeState* state) {
+ if (_block_fast_path_prepared) {
+ return Status::OK();
+ }
+
+ RETURN_IF_ERROR(
+ _fns[0]->prepare_block_fast_path(_child_block.get(), state,
&_block_fast_path_ctx));
+ if (_block_fast_path_ctx.offsets_ptr == nullptr ||
+ _block_fast_path_ctx.nested_col.get() == nullptr) {
+ return Status::InternalError("block fast path context is invalid");
+ }
+
+ const auto child_rows =
cast_set<int64_t>(_block_fast_path_ctx.offsets_ptr->size());
+ if (child_rows != cast_set<int64_t>(_child_block->rows())) {
+ return Status::InternalError("block fast path offsets size mismatch");
+ }
+
+ _block_fast_path_row = _cur_child_offset;
+ _block_fast_path_in_row_offset = 0;
+ _block_fast_path_enabled = _has_contiguous_block_fast_path_suffix();
+ _block_fast_path_prepared = true;
+ return Status::OK();
+}
+
+bool TableFunctionLocalState::_has_contiguous_block_fast_path_suffix() const {
+ const auto& offsets = *_block_fast_path_ctx.offsets_ptr;
+ const auto child_rows = cast_set<int64_t>(offsets.size());
+ int64_t child_row = _block_fast_path_row;
+ uint64_t in_row_offset = _block_fast_path_in_row_offset;
+ uint64_t expected_next_nested_idx = 0;
+ bool found_nested_range = false;
+
+ while (child_row < child_rows) {
+ if (_block_fast_path_ctx.array_nullmap_data &&
+ _block_fast_path_ctx.array_nullmap_data[child_row]) {
+ child_row++;
+ in_row_offset = 0;
+ continue;
+ }
+
+ const uint64_t prev_off = child_row == 0 ? 0 : offsets[child_row - 1];
+ const uint64_t cur_off = offsets[child_row];
+ const uint64_t nested_len = cur_off - prev_off;
+ if (in_row_offset >= nested_len) {
+ child_row++;
+ in_row_offset = 0;
+ continue;
+ }
+
+ const uint64_t nested_start = prev_off + in_row_offset;
+ if (!found_nested_range) {
+ found_nested_range = true;
+ } else if (nested_start != expected_next_nested_idx) {
+ return false;
+ }
+ expected_next_nested_idx = cur_off;
+ child_row++;
+ in_row_offset = 0;
+ }
+
+ return true;
+}
+
+Status TableFunctionLocalState::_get_expanded_block_block_fast_path(
+ RuntimeState* state, std::vector<MutableColumnPtr>& columns) {
+ auto& p = _parent->cast<TableFunctionOperatorX>();
+ DCHECK(_block_fast_path_prepared);
+ DCHECK(_block_fast_path_enabled);
+
+ const auto remaining_capacity =
+ state->batch_size() -
cast_set<int>(columns[p._child_slots.size()]->size());
+ if (remaining_capacity <= 0) {
+ return Status::OK();
+ }
+
+ const auto& offsets = *_block_fast_path_ctx.offsets_ptr;
+ const auto child_rows = cast_set<int64_t>(offsets.size());
+
+ std::vector<uint32_t> row_ids;
+ row_ids.reserve(remaining_capacity);
+ uint64_t first_nested_idx = 0;
+ uint64_t expected_next_nested_idx = 0;
+ bool found_nested_range = false;
+
+ int64_t child_row = _block_fast_path_row;
+ uint64_t in_row_offset = _block_fast_path_in_row_offset;
+ int produced_rows = 0;
+
+ while (produced_rows < remaining_capacity && child_row < child_rows) {
+ if (_block_fast_path_ctx.array_nullmap_data &&
+ _block_fast_path_ctx.array_nullmap_data[child_row]) {
+ // NULL array row: skip it here. Slow path will handle output
semantics if needed.
+ child_row++;
+ in_row_offset = 0;
+ continue;
+ }
+
+ const uint64_t prev_off = child_row == 0 ? 0 : offsets[child_row - 1];
+ const uint64_t cur_off = offsets[child_row];
+ const uint64_t nested_len = cur_off - prev_off;
+
+ if (in_row_offset >= nested_len) {
+ child_row++;
+ in_row_offset = 0;
+ continue;
+ }
+
+ const uint64_t remaining_in_row = nested_len - in_row_offset;
+ const int take_count =
+ std::min<int>(remaining_capacity - produced_rows,
cast_set<int>(remaining_in_row));
+ const uint64_t nested_start = prev_off + in_row_offset;
+
+ DCHECK_LE(nested_start + take_count, cur_off);
+ DCHECK_LE(nested_start + take_count,
_block_fast_path_ctx.nested_col->size());
+
+ if (!found_nested_range) {
+ found_nested_range = true;
+ first_nested_idx = nested_start;
+ expected_next_nested_idx = nested_start;
+ }
+ DCHECK_EQ(nested_start, expected_next_nested_idx);
+
+ // Map each produced output row back to its source child row for
copying non-table-function
+ // columns via insert_indices_from().
+ for (int j = 0; j < take_count; ++j) {
+ row_ids.push_back(cast_set<uint32_t>(child_row));
+ }
Review Comment:
The fast path builds `row_ids` by pushing one entry per produced output row,
then calls `insert_indices_from()` for each copied child column. Since
`take_count` is already grouped by `child_row`, this can be done incrementally
with `insert_many_from(*src_column, child_row, take_count)` per segment,
avoiding the extra `row_ids` materialization and the per-row push loop (which
can become noticeable for large expansions).
```suggestion
row_ids.insert(row_ids.end(), take_count,
cast_set<uint32_t>(child_row));
```
##########
be/src/exec/operator/table_function_operator.cpp:
##########
@@ -160,6 +167,209 @@ bool TableFunctionLocalState::_is_inner_and_empty() {
return false;
}
+bool TableFunctionLocalState::_can_use_block_fast_path() const {
+ auto& p = _parent->cast<TableFunctionOperatorX>();
+ // Fast path is only valid when:
+ // - only one table function exists
+ // - there is an active child row to expand
+ // - the child block is non-empty
+ // - the table function can expose nested/offsets via
prepare_block_fast_path()
+ return p._fn_num == 1 && _cur_child_offset != -1 && _child_block->rows() >
0 &&
+ _fns[0]->support_block_fast_path();
+}
+
+void TableFunctionLocalState::_reset_block_fast_path_state() {
+ _block_fast_path_prepared = false;
+ _block_fast_path_enabled = false;
+ _block_fast_path_ctx = {};
+ _block_fast_path_row = 0;
+ _block_fast_path_in_row_offset = 0;
+}
+
+Status TableFunctionLocalState::_prepare_block_fast_path(RuntimeState* state) {
+ if (_block_fast_path_prepared) {
+ return Status::OK();
+ }
+
+ RETURN_IF_ERROR(
+ _fns[0]->prepare_block_fast_path(_child_block.get(), state,
&_block_fast_path_ctx));
Review Comment:
`prepare_block_fast_path()` can legitimately return
`Status::NotSupported(...)` (e.g. `VExplodeV2TableFunction` when there are
multiple parameters). Propagating that via `RETURN_IF_ERROR` will fail the
query instead of falling back to the existing row-wise path, which contradicts
the intended “fallback when unsupported” behavior. Treat
NOT_IMPLEMENTED/NotSupported as “fast path disabled” (set
`_block_fast_path_prepared=true`, `_block_fast_path_enabled=false`) and
continue with the slow path; only propagate real errors (e.g. INTERNAL_ERROR).
```suggestion
Status st =
_fns[0]->prepare_block_fast_path(_child_block.get(), state,
&_block_fast_path_ctx);
if (st.is_not_supported() || st.is_not_implemented()) {
// Fast path is not available for this function/input; mark as
prepared but disabled
// so that the operator can safely fall back to the row-wise path
without retrying.
_block_fast_path_prepared = true;
_block_fast_path_enabled = false;
return Status::OK();
}
RETURN_IF_ERROR(st);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]