This is an automated email from the ASF dual-hosted git repository. yangzhg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7d92bf095a [fix](expr) refractor create_tree_from_thrift to avoid stack overflow (#18214) 7d92bf095a is described below commit 7d92bf095a31233403bd2422f92359024749fa42 Author: camby <104178...@qq.com> AuthorDate: Fri Mar 31 10:38:20 2023 +0800 [fix](expr) refractor create_tree_from_thrift to avoid stack overflow (#18214) --- be/src/vec/exec/vanalytic_eval_node.cpp | 4 +- be/src/vec/exprs/vectorized_agg_fn.cpp | 3 +- be/src/vec/exprs/vexpr.cpp | 60 ++++++++++++++-------- be/src/vec/exprs/vexpr.h | 6 +-- regression-test/pipeline/p0/conf/be.conf | 1 + .../suites/query_p1/test_sql_depth.groovy | 44 ++++++++++++++++ 6 files changed, 90 insertions(+), 28 deletions(-) diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp index f31fc03daf..932baa370d 100644 --- a/be/src/vec/exec/vanalytic_eval_node.cpp +++ b/be/src/vec/exec/vanalytic_eval_node.cpp @@ -115,8 +115,8 @@ Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { ++node_idx; VExpr* expr = nullptr; VExprContext* ctx = nullptr; - RETURN_IF_ERROR(VExpr::create_tree_from_thrift(_pool, desc.nodes, nullptr, &node_idx, - &expr, &ctx)); + RETURN_IF_ERROR( + VExpr::create_tree_from_thrift(_pool, desc.nodes, &node_idx, &expr, &ctx)); _agg_expr_ctxs[i].emplace_back(ctx); } diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp index ec7f2ef6f2..5c5ed94d17 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.cpp +++ b/be/src/vec/exprs/vectorized_agg_fn.cpp @@ -63,8 +63,7 @@ Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, const TSortIn ++node_idx; VExpr* expr = nullptr; VExprContext* ctx = nullptr; - RETURN_IF_ERROR( - VExpr::create_tree_from_thrift(pool, desc.nodes, nullptr, &node_idx, &expr, &ctx)); + RETURN_IF_ERROR(VExpr::create_tree_from_thrift(pool, desc.nodes, &node_idx, &expr, &ctx)); agg_fn_evaluator->_input_exprs_ctxs.push_back(ctx); } diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index b21fa4db67..46297a724f 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -210,32 +210,50 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr } Status VExpr::create_tree_from_thrift(doris::ObjectPool* pool, - const std::vector<doris::TExprNode>& nodes, VExpr* parent, - int* node_idx, VExpr** root_expr, VExprContext** ctx) { + const std::vector<doris::TExprNode>& nodes, int* node_idx, + VExpr** root_expr, VExprContext** ctx) { // propagate error case if (*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); } - int num_children = nodes[*node_idx].num_children; - VExpr* expr = nullptr; - RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); - DCHECK(expr != nullptr); - if (parent != nullptr) { - parent->add_child(expr); - } else { - DCHECK(root_expr != nullptr); - DCHECK(ctx != nullptr); - *root_expr = expr; - *ctx = pool->add(new VExprContext(expr)); - } - for (int i = 0; i < num_children; i++) { - *node_idx += 1; - RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); - // we are expecting a child, but have used all nodes - // this means we have been given a bad tree and must fail - if (*node_idx >= nodes.size()) { + + // create root expr + int root_children = nodes[*node_idx].num_children; + VExpr* root = nullptr; + RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &root)); + DCHECK(root != nullptr); + + DCHECK(root_expr != nullptr); + DCHECK(ctx != nullptr); + *root_expr = root; + *ctx = pool->add(new VExprContext(root)); + // short path for leaf node + if (root_children <= 0) { + return Status::OK(); + } + + // non-recursive traversal + std::stack<std::pair<VExpr*, int>> s; + s.push({root, root_children}); + while (!s.empty()) { + auto& parent = s.top(); + if (parent.second > 1) { + parent.second -= 1; + } else { + s.pop(); + } + + if (++*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); } + VExpr* expr = nullptr; + RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); + DCHECK(expr != nullptr); + parent.first->add_child(expr); + int num_children = nodes[*node_idx].num_children; + if (num_children > 0) { + s.push({expr, num_children}); + } } return Status::OK(); } @@ -248,7 +266,7 @@ Status VExpr::create_expr_tree(doris::ObjectPool* pool, const doris::TExpr& texp } int node_idx = 0; VExpr* e = nullptr; - Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); + Status status = create_tree_from_thrift(pool, texpr.nodes, &node_idx, &e, ctx); if (status.ok() && node_idx + 1 != texpr.nodes.size()) { status = Status::InternalError( "Expression tree only partially reconstructed. Not all thrift nodes were " diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 2a3b24892a..8af3d2ad16 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -127,9 +127,9 @@ public: static Status create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** expr); - static Status create_tree_from_thrift(ObjectPool* pool, const std::vector<TExprNode>& nodes, - VExpr* parent, int* node_idx, VExpr** root_expr, - VExprContext** ctx); + static Status create_tree_from_thrift(doris::ObjectPool* pool, + const std::vector<doris::TExprNode>& nodes, int* node_idx, + VExpr** root_expr, VExprContext** ctx); const std::vector<VExpr*>& children() const { return _children; } void set_children(std::vector<VExpr*> children) { _children = children; } virtual std::string debug_string() const; diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf index 036e8e26fa..a4db97eb77 100644 --- a/regression-test/pipeline/p0/conf/be.conf +++ b/regression-test/pipeline/p0/conf/be.conf @@ -69,3 +69,4 @@ tablet_map_shard_size=256 priority_networks=172.19.0.0/24 fragment_pool_thread_num_max=5000 enable_fuzzy_mode=true +max_depth_of_expr_tree=200 diff --git a/regression-test/suites/query_p1/test_sql_depth.groovy b/regression-test/suites/query_p1/test_sql_depth.groovy new file mode 100644 index 0000000000..12e738e5eb --- /dev/null +++ b/regression-test/suites/query_p1/test_sql_depth.groovy @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_sql_depth") { + def tableName = "tb_test_sql_depth" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `k1` int NULL, + `k2` int NULL, + `k3` bigint NULL, + `k4` varchar(20) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`,`k2`,`k3`,`k4`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + sql "insert into ${tableName} values(1,2,3,'4'),(5,6,7,'8')" + + // set a large number to prevent rewrite OR to IN predicate + sql "set rewrite_or_to_in_predicate_threshold=1000;" + + // try a SQL with too large tree depth + try { + sql "select * from ${tableName} where k1=1 OR k1=2 OR k1=3 OR k1=4 OR k1=5 OR k1=6 OR k1=7 OR k1=8 OR k1=9 OR k1=10 OR k1=11 OR k1=12 OR k1=13 OR k1=14 OR k1=15 OR k1=16 OR k1=17 OR k1=18 OR k1=19 OR k1=20 OR k1=21 OR k1=22 OR k1=23 OR k1=24 OR k1=25 OR k1=26 OR k1=27 OR k1=28 OR k1=29 OR k1=30 OR k1=31 OR k1=32 OR k1=33 OR k1=34 OR k1=35 OR k1=36 OR k1=37 OR k1=38 OR k1=39 OR k1=40 OR k1=41 OR k1=42 OR k1=43 OR k1=44 OR k1=45 OR k1=46 OR k1=47 OR k1=48 OR k1=49 OR k1=50 OR k1=51 [...] + } catch (Exception ex) { + assert("${ex}".contains("The depth of the expression tree is too big")) + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org