This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1e65c24455e [regression](limit) Add group by limit regression test case (#37940) 1e65c24455e is described below commit 1e65c24455ebdf9570cd5f97a8bac1b0956a2f14 Author: HappenLee <happen...@hotmail.com> AuthorDate: Fri Jul 19 09:54:55 2024 +0800 [regression](limit) Add group by limit regression test case (#37940) Add group by limit regression test case --- be/src/common/config.cpp | 2 + be/src/common/config.h | 4 ++ be/src/pipeline/exec/aggregation_sink_operator.cpp | 3 +- .../data/query_p0/limit/test_group_by_limit.out | 66 ++++++++++++++++++++++ .../query_p0/limit/test_group_by_limit.groovy | 64 +++++++++++++++++++++ 5 files changed, 138 insertions(+), 1 deletion(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 3e9203987c2..b152111011e 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1343,6 +1343,8 @@ DEFINE_mBool(ignore_not_found_file_in_external_table, "true"); DEFINE_mBool(enable_hdfs_mem_limiter, "true"); +DEFINE_mInt16(topn_agg_limit_multiplier, "2"); + // clang-format off #ifdef BE_TEST // test s3 diff --git a/be/src/common/config.h b/be/src/common/config.h index 1ce9c66939c..f4ed1decaa0 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1435,6 +1435,10 @@ DECLARE_mBool(ignore_not_found_file_in_external_table); DECLARE_mBool(enable_hdfs_mem_limiter); +// Define how many percent data in hashtable bigger than limit +// we should do agg limit opt +DECLARE_mInt16(topn_agg_limit_multiplier); + #ifdef BE_TEST // test s3 DECLARE_String(test_s3_resource); diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp index 79ca07281d9..f3a6942c33f 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp @@ -503,7 +503,8 @@ Status AggSinkLocalState::_execute_with_serialized_key_helper(vectorized::Block* _shared_state->reach_limit = hash_table_size >= (_shared_state->do_sort_limit - ? Base::_parent->template cast<AggSinkOperatorX>()._limit * 5 + ? Base::_parent->template cast<AggSinkOperatorX>()._limit * + config::topn_agg_limit_multiplier : Base::_parent->template cast<AggSinkOperatorX>()._limit); if (_shared_state->reach_limit && _shared_state->do_sort_limit) { _shared_state->build_limit_heap(hash_table_size); diff --git a/regression-test/data/query_p0/limit/test_group_by_limit.out b/regression-test/data/query_p0/limit/test_group_by_limit.out new file mode 100644 index 00000000000..d9ac2a2481a --- /dev/null +++ b/regression-test/data/query_p0/limit/test_group_by_limit.out @@ -0,0 +1,66 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +253967024 8491 AIR +259556658 8641 FOB +260402265 8669 MAIL + +-- !select -- +449872500 15000 1 +386605746 12900 2 +320758616 10717 3 + +-- !select -- +198674527 6588 0.0 +198679731 6563 0.01 +198501055 6622 0.02 + +-- !select -- +27137 1 1992-02-02 +45697 1 1992-02-04 +114452 5 1992-02-05 + +-- !select -- +27137 1 1992-02-02T00:00 +45697 1 1992-02-04T00:00 +114452 5 1992-02-05T00:00 + +-- !select -- +139015016 4632 1 +130287219 4313 2 +162309750 5334 3 + +-- !select -- +64774969 2166 AIR 1 +54166166 1804 AIR 2 +45538267 1532 AIR 3 + +-- !select -- +6882631 228 AIR 1 0.0 +6756423 228 AIR 1 0.01 +7920028 254 AIR 1 0.02 + +-- !select -- +7618 1 AIR 1 0.0 1992-02-06 +2210 1 AIR 1 0.0 1992-03-24 +16807 1 AIR 1 0.0 1992-03-29 + +-- !select -- +6882631 228 AIR 1 0.0 +6756423 228 AIR 1 0.01 +7920028 254 AIR 1 0.02 + +-- !select -- +6882631 228 AIR 1 0.0 +6756423 228 AIR 1 0.01 +7920028 254 AIR 1 0.02 + +-- !select -- +7707018 238 TRUCK 1 0.0 +7467045 233 TRUCK 1 0.01 +6927206 245 TRUCK 1 0.02 + +-- !select -- +7661562 249 TRUCK 1 0.08 +6673139 228 TRUCK 1 0.07 +8333862 265 TRUCK 1 0.06 + diff --git a/regression-test/suites/query_p0/limit/test_group_by_limit.groovy b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy new file mode 100644 index 00000000000..271619c4a93 --- /dev/null +++ b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_group_by_limit", "query") { + +sql 'set enable_agg_spill=false' + +sql 'set enable_force_spill=false' + +sql 'set topn_opt_limit_threshold=10' + + +// different types +qt_select """ select sum(orderkey), count(partkey), shipmode from tpch_tiny_lineitem group by shipmode limit 3; """ + +qt_select """ select sum(orderkey), count(partkey), linenumber from tpch_tiny_lineitem group by linenumber limit 3; """ + +qt_select """ select sum(orderkey), count(partkey), tax from tpch_tiny_lineitem group by tax limit 3; """ + +qt_select """ select sum(orderkey), count(partkey), commitdate from tpch_tiny_lineitem group by commitdate limit 3; """ + + +// group by functions +qt_select """ select sum(orderkey), count(partkey), cast(commitdate as datetime) from tpch_tiny_lineitem group by cast(commitdate as datetime) limit 3; """ + +qt_select """ select sum(orderkey), count(partkey), month(commitdate) from tpch_tiny_lineitem group by month(commitdate) limit 3; """ + + +// mutli column +qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber from tpch_tiny_lineitem group by shipmode, linenumber limit 3; """ + +qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber , tax from tpch_tiny_lineitem group by shipmode, linenumber, tax limit 3; """ + +qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber , tax , commitdate from tpch_tiny_lineitem group by shipmode, linenumber, tax, commitdate limit 3; """ + + +// group by + order by + +// group by columns eq order by columns +qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber , tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode, linenumber, tax limit 3; """ + +// group by columns contains order by columns +qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber , tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode limit 3; """ + +// desc order by column +qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber , tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode desc, linenumber, tax limit 3; """ + +qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber , tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode desc, linenumber, tax desc limit 3; """ + +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org