This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8ee24cb379d [chore](explain) Add algorithm item to VSORT explainition and modify dump_data of Block (#38543) 8ee24cb379d is described below commit 8ee24cb379d36ecf08fbfb928ffc53873beb908e Author: zclllhhjj <zhaochan...@selectdb.com> AuthorDate: Thu Aug 1 10:07:59 2024 +0800 [chore](explain) Add algorithm item to VSORT explainition and modify dump_data of Block (#38543) ## Proposed changes Issue Number: close #xxx 1. add algorithm record in `VSORT`'s explain string: before: ```sql | 1:VSORT(101) | | | order by: xxx | | | offset: 0 | | | distribute expr lists: | | | tuple ids: 2 | ``` after: ```sql | 1:VSORT(101) | | | order by: xxx | | | algorithm: full sort | | | offset: 0 | | | distribute expr lists: | | | tuple ids: 2 | ``` 2. add a new parameter to make `Block::dump_data()` could work when the nullity of the type of data is different from that of column. it's useful when in some function call under `default_implementation_for_nulls()` --- be/src/vec/core/block.cpp | 12 ++++++++++-- be/src/vec/core/block.h | 12 +++++++++--- be/src/vec/functions/function.cpp | 3 ++- .../src/main/java/org/apache/doris/planner/SortNode.java | 13 +++++++++++++ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 35b8c0eb69a..fabd468ca4c 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -53,6 +53,7 @@ #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" class SipHash; @@ -476,7 +477,7 @@ std::string Block::dump_types() const { return out; } -std::string Block::dump_data(size_t begin, size_t row_limit) const { +std::string Block::dump_data(size_t begin, size_t row_limit, bool allow_null_mismatch) const { std::vector<std::string> headers; std::vector<size_t> headers_size; for (const auto& it : data) { @@ -515,7 +516,14 @@ std::string Block::dump_data(size_t begin, size_t row_limit) const { } std::string s; if (data[i].column) { - s = data[i].to_string(row_num); + if (data[i].type->is_nullable() && !data[i].column->is_nullable()) { + assert(allow_null_mismatch); + s = assert_cast<const DataTypeNullable*>(data[i].type.get()) + ->get_nested_type() + ->to_string(*data[i].column, row_num); + } else { + s = data[i].to_string(row_num); + } } if (s.length() > headers_size[i]) { s = s.substr(0, headers_size[i] - 3) + "..."; diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 6f50ff0035a..741039e1465 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -255,14 +255,20 @@ public: bool empty() const { return rows() == 0; } - /** Updates SipHash of the Block, using update method of columns. + /** + * Updates SipHash of the Block, using update method of columns. * Returns hash for block, that could be used to differentiate blocks * with same structure, but different data. */ void update_hash(SipHash& hash) const; - /** Get block data in string. */ - std::string dump_data(size_t begin = 0, size_t row_limit = 100) const; + /** + * Get block data in string. + * If code is in default_implementation_for_nulls or something likely, type and column's nullity could + * temporarily be not same. set allow_null_mismatch to true to dump it correctly. + */ + std::string dump_data(size_t begin = 0, size_t row_limit = 100, + bool allow_null_mismatch = false) const; static std::string dump_column(ColumnPtr col, DataTypePtr type) { ColumnWithTypeAndName type_name {col, type, ""}; diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index cfc6a39f397..1fea4c70fc1 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -216,7 +216,8 @@ Status PreparedFunctionImpl::default_implementation_for_nulls( } RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result, block.rows(), dry_run)); - // after run with nested, wrap them in null. + // After run with nested, wrap them in null. Before this, block.get_by_position(result).type + // is not compatible with get_by_position(result).column block.get_by_position(result).column = wrap_in_nullable( block.get_by_position(result).column, block, args, result, input_rows_count); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java index 4cdc04d1f1b..5a8f9f628f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java @@ -198,6 +198,19 @@ public class SortNode extends PlanNode { if (useTwoPhaseReadOpt) { output.append(detailPrefix + "OPT TWO PHASE\n"); } + + output.append(detailPrefix + "algorithm: "); + boolean isFixedLength = info.getOrderingExprs().stream().allMatch(e -> !e.getType().isStringType() + && !e.getType().isCollectionType()); + if (limit > 0 && limit + offset < 1024 && (useTwoPhaseReadOpt || hasRuntimePredicate + || isFixedLength)) { + output.append("heap sort\n"); + } else if (limit > 0 && !isFixedLength && limit + offset < 256) { + output.append("topn sort\n"); + } else { + output.append("full sort\n"); + } + output.append(detailPrefix).append("offset: ").append(offset).append("\n"); return output.toString(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org