This is an automated email from the ASF dual-hosted git repository.

jacktengg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d0717242e1e [opt](spill) add session variable of 'enable_force_spill' 
(#34664)
d0717242e1e is described below

commit d0717242e1e4ad76816f149bfe3f9403e0f9a092
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Sat May 11 14:16:03 2024 +0800

    [opt](spill) add session variable of 'enable_force_spill' (#34664)
---
 be/src/pipeline/pipeline_task.cpp                  |   8 +
 be/src/runtime/runtime_state.h                     |  13 +-
 .../java/org/apache/doris/qe/SessionVariable.java  |  13 +-
 gensrc/thrift/PaloInternalService.thrift           |   2 +
 .../data/tpcds_sf1_p1/spill_test/q23.out           |  10 +
 .../suites/tpcds_sf1_p1/spill_test/q23.groovy      | 220 +++++++++++++++++++++
 6 files changed, 262 insertions(+), 4 deletions(-)

diff --git a/be/src/pipeline/pipeline_task.cpp 
b/be/src/pipeline/pipeline_task.cpp
index 3e7e503c18d..f9c5ecf5295 100644
--- a/be/src/pipeline/pipeline_task.cpp
+++ b/be/src/pipeline/pipeline_task.cpp
@@ -302,6 +302,14 @@ bool PipelineTask::should_revoke_memory(RuntimeState* 
state, int64_t revocable_m
         return false;
     }
     const auto min_revocable_mem_bytes = state->min_revocable_mem();
+
+    if (UNLIKELY(state->enable_force_spill())) {
+        if (revocable_mem_bytes >= min_revocable_mem_bytes) {
+            LOG_ONCE(INFO) << "spill force, query: " << 
print_id(state->query_id());
+            return true;
+        }
+    }
+
     bool is_wg_mem_low_water_mark = false;
     bool is_wg_mem_high_water_mark = false;
     wg->check_mem_used(&is_wg_mem_low_water_mark, &is_wg_mem_high_water_mark);
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 7b6b7cc779b..8edfe50429a 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -605,15 +605,22 @@ public:
     bool is_nereids() const;
 
     bool enable_join_spill() const {
-        return _query_options.__isset.enable_join_spill && 
_query_options.enable_join_spill;
+        return (_query_options.__isset.enable_force_spill && 
_query_options.enable_force_spill) ||
+               (_query_options.__isset.enable_join_spill && 
_query_options.enable_join_spill);
     }
 
     bool enable_sort_spill() const {
-        return _query_options.__isset.enable_sort_spill && 
_query_options.enable_sort_spill;
+        return (_query_options.__isset.enable_force_spill && 
_query_options.enable_force_spill) ||
+               (_query_options.__isset.enable_sort_spill && 
_query_options.enable_sort_spill);
     }
 
     bool enable_agg_spill() const {
-        return _query_options.__isset.enable_agg_spill && 
_query_options.enable_agg_spill;
+        return (_query_options.__isset.enable_force_spill && 
_query_options.enable_force_spill) ||
+               (_query_options.__isset.enable_agg_spill && 
_query_options.enable_agg_spill);
+    }
+
+    bool enable_force_spill() const {
+        return _query_options.__isset.enable_force_spill && 
_query_options.enable_force_spill;
     }
 
     int64_t min_revocable_mem() const {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 1e4563962d8..768f54e3d7d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -508,6 +508,7 @@ public class SessionVariable implements Serializable, 
Writable {
     public static final String ENABLE_JOIN_SPILL = "enable_join_spill";
     public static final String ENABLE_SORT_SPILL = "enable_sort_spill";
     public static final String ENABLE_AGG_SPILL = "enable_agg_spill";
+    public static final String ENABLE_FORCE_SPILL = "enable_force_spill";
     public static final String DATA_QUEUE_MAX_BLOCKS = "data_queue_max_blocks";
 
     public static final String GENERATE_STATS_FACTOR = "generate_stats_factor";
@@ -1799,6 +1800,15 @@ public class SessionVariable implements Serializable, 
Writable {
             needForward = true, fuzzy = true)
     public boolean enableAggSpill = false;
 
+    @VariableMgr.VarAttr(
+            name = ENABLE_FORCE_SPILL,
+            description = {"控制是否开启强制落盘(即使在内存足够的情况),默认为 false。",
+                    "Controls whether enable force spill."
+            },
+            needForward = true, fuzzy = true
+    )
+    public boolean enableForceSpill = false;
+
     @VariableMgr.VarAttr(
             name = DATA_QUEUE_MAX_BLOCKS,
             description = {"DataQueue 中每个子队列允许最大的 block 个数",
@@ -1982,7 +1992,7 @@ public class SessionVariable implements Serializable, 
Writable {
                         this.minRevocableMem = 1024 * 1024;
                         break;
                     default:
-                        this.minRevocableMem = 100 * 1024 * 1024 * 1024;
+                        this.minRevocableMem = 100L * 1024 * 1024 * 1024;
                         break;
                 }
             } else {
@@ -3270,6 +3280,7 @@ public class SessionVariable implements Serializable, 
Writable {
         tResult.setEnableJoinSpill(enableJoinSpill);
         tResult.setEnableSortSpill(enableSortSpill);
         tResult.setEnableAggSpill(enableAggSpill);
+        tResult.setEnableForceSpill(enableForceSpill);
         tResult.setMinRevocableMem(minRevocableMem);
         tResult.setDataQueueMaxBlocks(dataQueueMaxBlocks);
 
diff --git a/gensrc/thrift/PaloInternalService.thrift 
b/gensrc/thrift/PaloInternalService.thrift
index 39e5e03f534..8d0c25086fb 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -292,6 +292,8 @@ struct TQueryOptions {
   // expr pushdown for index filter rows
   107: optional bool enable_common_expr_pushdown_for_inverted_index = false;
   108: optional i64 local_exchange_free_blocks_limit;
+
+  109: optional bool enable_force_spill = false;
   
   // For cloud, to control if the content would be written into file cache
   1000: optional bool disable_file_cache = false
diff --git a/regression-test/data/tpcds_sf1_p1/spill_test/q23.out 
b/regression-test/data/tpcds_sf1_p1/spill_test/q23.out
new file mode 100644
index 00000000000..bfe03c337f6
--- /dev/null
+++ b/regression-test/data/tpcds_sf1_p1/spill_test/q23.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select1 --
+17030.91
+
+-- !select2 --
+       Robert  598.86
+Brown  Monika  6031.52
+Collins        Gordon  727.57
+Green  Jesse   9672.96
+
diff --git a/regression-test/suites/tpcds_sf1_p1/spill_test/q23.groovy 
b/regression-test/suites/tpcds_sf1_p1/spill_test/q23.groovy
new file mode 100644
index 00000000000..cc5408d6b81
--- /dev/null
+++ b/regression-test/suites/tpcds_sf1_p1/spill_test/q23.groovy
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("q23") {
+    sql """ set enable_force_spill  =true; """
+    sql """ set min_revocable_mem = 65536; """
+    sql """ use regression_test_tpcds_sf1_p1; """
+
+    qt_select1 """
+    WITH
+      frequent_ss_items AS (
+       SELECT
+         substr(i_item_desc, 1, 30) itemdesc
+       , i_item_sk item_sk
+       , d_date solddate
+       , count(*) cnt
+       FROM
+         store_sales
+       , date_dim
+       , item
+       WHERE (ss_sold_date_sk = d_date_sk)
+          AND (ss_item_sk = i_item_sk)
+          AND (d_year IN (2000   , (2000 + 1)   , (2000 + 2)   , (2000 + 3)))
+       GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
+       HAVING (count(*) > 4)
+    )
+    , max_store_sales AS (
+       SELECT max(csales) tpcds_cmax
+       FROM
+         (
+          SELECT
+            c_customer_sk
+          , sum((ss_quantity * ss_sales_price)) csales
+          FROM
+            store_sales
+          , customer
+          , date_dim
+          WHERE (ss_customer_sk = c_customer_sk)
+             AND (ss_sold_date_sk = d_date_sk)
+             AND (d_year IN (2000      , (2000 + 1)      , (2000 + 2)      , 
(2000 + 3)))
+          GROUP BY c_customer_sk
+       ) x
+    )
+    , best_ss_customer AS (
+       SELECT
+         c_customer_sk
+       , sum((ss_quantity * ss_sales_price)) ssales
+       FROM
+         store_sales
+       , customer
+       WHERE (ss_customer_sk = c_customer_sk)
+       GROUP BY c_customer_sk
+       HAVING (sum((ss_quantity * ss_sales_price)) > ((50 / CAST('100.0' AS 
DECIMAL(5,2))) * (
+                SELECT *
+                FROM
+                  max_store_sales
+             )))
+    )
+    SELECT sum(sales)
+    FROM
+      (
+       SELECT (cs_quantity * cs_list_price) sales
+       FROM
+         catalog_sales
+       , date_dim
+       WHERE (d_year = 2000)
+          AND (d_moy = 2)
+          AND (cs_sold_date_sk = d_date_sk)
+          AND (cs_item_sk IN (
+          SELECT item_sk
+          FROM
+            frequent_ss_items
+       ))
+          AND (cs_bill_customer_sk IN (
+          SELECT c_customer_sk
+          FROM
+            best_ss_customer
+       ))
+    UNION ALL    SELECT (ws_quantity * ws_list_price) sales
+       FROM
+         web_sales
+       , date_dim
+       WHERE (d_year = 2000)
+          AND (d_moy = 2)
+          AND (ws_sold_date_sk = d_date_sk)
+          AND (ws_item_sk IN (
+          SELECT item_sk
+          FROM
+            frequent_ss_items
+       ))
+          AND (ws_bill_customer_sk IN (
+          SELECT c_customer_sk
+          FROM
+            best_ss_customer
+       ))
+    ) y
+    LIMIT 100
+    """
+
+    qt_select2 """
+        WITH
+          frequent_ss_items AS (
+           SELECT
+             substr(i_item_desc, 1, 30) itemdesc
+           , i_item_sk item_sk
+           , d_date solddate
+           , count(*) cnt
+           FROM
+             store_sales
+           , date_dim
+           , item
+           WHERE (ss_sold_date_sk = d_date_sk)
+              AND (ss_item_sk = i_item_sk)
+              AND (d_year IN (2000   , (2000 + 1)   , (2000 + 2)   , (2000 + 
3)))
+           GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
+           HAVING (count(*) > 4)
+        )
+        , max_store_sales AS (
+           SELECT max(csales) tpcds_cmax
+           FROM
+             (
+              SELECT
+                c_customer_sk
+              , sum((ss_quantity * ss_sales_price)) csales
+              FROM
+                store_sales
+              , customer
+              , date_dim
+              WHERE (ss_customer_sk = c_customer_sk)
+                 AND (ss_sold_date_sk = d_date_sk)
+                 AND (d_year IN (2000      , (2000 + 1)      , (2000 + 2)      
, (2000 + 3)))
+              GROUP BY c_customer_sk
+           ) x
+        )
+        , best_ss_customer AS (
+           SELECT
+             c_customer_sk
+           , sum((ss_quantity * ss_sales_price)) ssales
+           FROM
+             store_sales
+           , customer
+           WHERE (ss_customer_sk = c_customer_sk)
+           GROUP BY c_customer_sk
+           HAVING (sum((ss_quantity * ss_sales_price)) > ((50 / CAST('100.0' 
AS DECIMAL(5,2))) * (
+                    SELECT *
+                    FROM
+                      max_store_sales
+                 )))
+        )
+        SELECT
+          c_last_name
+        , c_first_name
+        , sales
+        FROM
+          (
+           SELECT
+             c_last_name
+           , c_first_name
+           , sum((cs_quantity * cs_list_price)) sales
+           FROM
+             catalog_sales
+           , customer
+           , date_dim
+           WHERE (d_year = 2000)
+              AND (d_moy = 2)
+              AND (cs_sold_date_sk = d_date_sk)
+              AND (cs_item_sk IN (
+              SELECT item_sk
+              FROM
+                frequent_ss_items
+           ))
+              AND (cs_bill_customer_sk IN (
+              SELECT c_customer_sk
+              FROM
+                best_ss_customer
+           ))
+              AND (cs_bill_customer_sk = c_customer_sk)
+           GROUP BY c_last_name, c_first_name
+        UNION ALL    SELECT
+             c_last_name
+           , c_first_name
+           , sum((ws_quantity * ws_list_price)) sales
+           FROM
+             web_sales
+           , customer
+           , date_dim
+           WHERE (d_year = 2000)
+              AND (d_moy = 2)
+              AND (ws_sold_date_sk = d_date_sk)
+              AND (ws_item_sk IN (
+              SELECT item_sk
+              FROM
+                frequent_ss_items
+           ))
+              AND (ws_bill_customer_sk IN (
+              SELECT c_customer_sk
+              FROM
+                best_ss_customer
+           ))
+              AND (ws_bill_customer_sk = c_customer_sk)
+           GROUP BY c_last_name, c_first_name
+        ) z
+        ORDER BY c_last_name ASC, c_first_name ASC, sales ASC
+        LIMIT 100
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to