This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1e65c24455e [regression](limit) Add group by limit regression test 
case (#37940)
1e65c24455e is described below

commit 1e65c24455ebdf9570cd5f97a8bac1b0956a2f14
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Fri Jul 19 09:54:55 2024 +0800

    [regression](limit) Add group by limit regression test case (#37940)
    
    Add group by limit regression test case
---
 be/src/common/config.cpp                           |  2 +
 be/src/common/config.h                             |  4 ++
 be/src/pipeline/exec/aggregation_sink_operator.cpp |  3 +-
 .../data/query_p0/limit/test_group_by_limit.out    | 66 ++++++++++++++++++++++
 .../query_p0/limit/test_group_by_limit.groovy      | 64 +++++++++++++++++++++
 5 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 3e9203987c2..b152111011e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1343,6 +1343,8 @@ DEFINE_mBool(ignore_not_found_file_in_external_table, 
"true");
 
 DEFINE_mBool(enable_hdfs_mem_limiter, "true");
 
+DEFINE_mInt16(topn_agg_limit_multiplier, "2");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 1ce9c66939c..f4ed1decaa0 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1435,6 +1435,10 @@ DECLARE_mBool(ignore_not_found_file_in_external_table);
 
 DECLARE_mBool(enable_hdfs_mem_limiter);
 
+// Define how many percent data in hashtable bigger than limit
+// we should do agg limit opt
+DECLARE_mInt16(topn_agg_limit_multiplier);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp 
b/be/src/pipeline/exec/aggregation_sink_operator.cpp
index 79ca07281d9..f3a6942c33f 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp
@@ -503,7 +503,8 @@ Status 
AggSinkLocalState::_execute_with_serialized_key_helper(vectorized::Block*
                 _shared_state->reach_limit =
                         hash_table_size >=
                         (_shared_state->do_sort_limit
-                                 ? Base::_parent->template 
cast<AggSinkOperatorX>()._limit * 5
+                                 ? Base::_parent->template 
cast<AggSinkOperatorX>()._limit *
+                                           config::topn_agg_limit_multiplier
                                  : Base::_parent->template 
cast<AggSinkOperatorX>()._limit);
                 if (_shared_state->reach_limit && 
_shared_state->do_sort_limit) {
                     _shared_state->build_limit_heap(hash_table_size);
diff --git a/regression-test/data/query_p0/limit/test_group_by_limit.out 
b/regression-test/data/query_p0/limit/test_group_by_limit.out
new file mode 100644
index 00000000000..d9ac2a2481a
--- /dev/null
+++ b/regression-test/data/query_p0/limit/test_group_by_limit.out
@@ -0,0 +1,66 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+253967024      8491    AIR
+259556658      8641    FOB
+260402265      8669    MAIL
+
+-- !select --
+449872500      15000   1
+386605746      12900   2
+320758616      10717   3
+
+-- !select --
+198674527      6588    0.0
+198679731      6563    0.01
+198501055      6622    0.02
+
+-- !select --
+27137  1       1992-02-02
+45697  1       1992-02-04
+114452 5       1992-02-05
+
+-- !select --
+27137  1       1992-02-02T00:00
+45697  1       1992-02-04T00:00
+114452 5       1992-02-05T00:00
+
+-- !select --
+139015016      4632    1
+130287219      4313    2
+162309750      5334    3
+
+-- !select --
+64774969       2166    AIR     1
+54166166       1804    AIR     2
+45538267       1532    AIR     3
+
+-- !select --
+6882631        228     AIR     1       0.0
+6756423        228     AIR     1       0.01
+7920028        254     AIR     1       0.02
+
+-- !select --
+7618   1       AIR     1       0.0     1992-02-06
+2210   1       AIR     1       0.0     1992-03-24
+16807  1       AIR     1       0.0     1992-03-29
+
+-- !select --
+6882631        228     AIR     1       0.0
+6756423        228     AIR     1       0.01
+7920028        254     AIR     1       0.02
+
+-- !select --
+6882631        228     AIR     1       0.0
+6756423        228     AIR     1       0.01
+7920028        254     AIR     1       0.02
+
+-- !select --
+7707018        238     TRUCK   1       0.0
+7467045        233     TRUCK   1       0.01
+6927206        245     TRUCK   1       0.02
+
+-- !select --
+7661562        249     TRUCK   1       0.08
+6673139        228     TRUCK   1       0.07
+8333862        265     TRUCK   1       0.06
+
diff --git a/regression-test/suites/query_p0/limit/test_group_by_limit.groovy 
b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
new file mode 100644
index 00000000000..271619c4a93
--- /dev/null
+++ b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_group_by_limit", "query") {
+
+sql 'set enable_agg_spill=false'
+
+sql 'set enable_force_spill=false'
+
+sql 'set topn_opt_limit_threshold=10'
+
+
+// different types
+qt_select """ select  sum(orderkey), count(partkey), shipmode from 
tpch_tiny_lineitem group by shipmode limit 3; """
+
+qt_select """ select  sum(orderkey), count(partkey),  linenumber from 
tpch_tiny_lineitem group by linenumber limit 3; """
+
+qt_select """ select  sum(orderkey), count(partkey),  tax from 
tpch_tiny_lineitem group by tax limit 3; """
+
+qt_select """ select  sum(orderkey), count(partkey),  commitdate from 
tpch_tiny_lineitem group by commitdate limit 3; """
+
+
+// group by functions
+qt_select """ select  sum(orderkey), count(partkey),  cast(commitdate as 
datetime) from tpch_tiny_lineitem group by cast(commitdate as datetime) limit 
3; """
+
+qt_select """ select  sum(orderkey), count(partkey),  month(commitdate) from 
tpch_tiny_lineitem group by month(commitdate) limit 3; """
+
+
+// mutli column
+qt_select """ select  sum(orderkey), count(partkey), shipmode, linenumber from 
tpch_tiny_lineitem group by shipmode, linenumber limit 3; """
+
+qt_select """ select  sum(orderkey), count(partkey), shipmode, linenumber , 
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax limit 3; """
+
+qt_select """ select  sum(orderkey), count(partkey), shipmode, linenumber , 
tax , commitdate from tpch_tiny_lineitem group by shipmode, linenumber, tax, 
commitdate  limit 3; """
+
+
+// group by + order by 
+
+// group by columns eq order by columns
+qt_select """ select  sum(orderkey), count(partkey), shipmode, linenumber , 
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by 
shipmode, linenumber, tax limit 3; """
+
+// group by columns contains order by columns
+qt_select """ select  sum(orderkey), count(partkey), shipmode, linenumber , 
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by 
shipmode limit 3; """
+
+// desc order by column
+qt_select """ select  sum(orderkey), count(partkey), shipmode, linenumber , 
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by 
shipmode desc, linenumber, tax limit 3; """
+
+qt_select """ select  sum(orderkey), count(partkey), shipmode, linenumber , 
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by 
shipmode desc, linenumber, tax desc limit 3; """
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to