This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7372c99eca1 [Improvement](sort) add session variable 
force_sort_algorithm and adjust some parameter about sort (#39334)
7372c99eca1 is described below

commit 7372c99eca1ce79bae320c0c2caba85bb9e88355
Author: Pxl <pxl...@qq.com>
AuthorDate: Fri Aug 16 15:13:30 2024 +0800

    [Improvement](sort) add session variable force_sort_algorithm and adjust 
some parameter about sort (#39334)
    
    1. add force_sort_algorithm to set sort algorithm
    2. do not use partitial sort on column string
    ```sql
    select count(*) from (select lo_orderpriority from lineorder order by 
lo_orderpriority limit 100000)t;
    partition sort: 22s
    pdq sort: 8s
    ```
    4. enlarge topn_opt_limit_threshold to 10240000
    ```sql
    select count(*) from (select * from lineorder order by lo_linenumber limit 
100000)t;
    heap 1s
    set topn_opt_limit_threshold=10240000; heap  0.4s
    
    select count(*) from (select * from lineorder order by lo_linenumber limit 
10000000)t;
    heap 13s
    set topn_opt_limit_threshold=10240000; heap 12s
    
    select count(*) from (select * from lineorder order by lo_linenumber limit 
100000000)t;
    heap 2min13s
    set topn_opt_limit_threshold=102400000;  heap 2 min 22.56 sec
    
    select count(*) from (select lo_orderpriority from lineorder order by 
lo_orderpriority limit 100000)t;
    heap 2.4s
    set topn_opt_limit_threshold=102400000;  heap 1s
    
    select count(*) from (select lo_orderpriority from lineorder order by 
lo_orderpriority limit 10000000)t;
    heap 21s
    set topn_opt_limit_threshold=102400000; heap 20s
    
    ```
---
 be/src/vec/columns/column_string.cpp               | 17 +++----------
 .../java/org/apache/doris/planner/SortNode.java    | 28 +++++++++++++++-------
 .../java/org/apache/doris/qe/SessionVariable.java  |  9 ++++++-
 3 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index c3cf6dadf0a..d8fd42e36c7 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -483,21 +483,10 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t 
limit, int /*nan_directi
         res[i] = i;
     }
 
-    // std::partial_sort need limit << s can get performance benefit
-    if (limit > (s / 8.0)) limit = 0;
-
-    if (limit) {
-        if (reverse) {
-            std::partial_sort(res.begin(), res.begin() + limit, res.end(), 
less<false>(*this));
-        } else {
-            std::partial_sort(res.begin(), res.begin() + limit, res.end(), 
less<true>(*this));
-        }
+    if (reverse) {
+        pdqsort(res.begin(), res.end(), less<false>(*this));
     } else {
-        if (reverse) {
-            pdqsort(res.begin(), res.end(), less<false>(*this));
-        } else {
-            pdqsort(res.begin(), res.end(), less<true>(*this));
-        }
+        pdqsort(res.begin(), res.end(), less<true>(*this));
     }
 }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index 5a8f9f628f8..34cbcf9f620 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -29,6 +29,7 @@ import org.apache.doris.analysis.SlotRef;
 import org.apache.doris.analysis.SortInfo;
 import org.apache.doris.common.NotImplementedException;
 import org.apache.doris.common.UserException;
+import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.statistics.StatisticalType;
 import org.apache.doris.statistics.StatsRecursiveDerive;
 import org.apache.doris.thrift.TExplainLevel;
@@ -339,16 +340,27 @@ public class SortNode extends PlanNode {
         msg.sort_node.setIsAnalyticSort(isAnalyticSort);
         msg.sort_node.setIsColocate(isColocate);
 
-        boolean isFixedLength = info.getOrderingExprs().stream().allMatch(e -> 
!e.getType().isStringType()
-                && !e.getType().isCollectionType());
+        boolean isFixedLength = info.getOrderingExprs().stream()
+                .allMatch(e -> !e.getType().isStringType() && 
!e.getType().isCollectionType());
+        ConnectContext connectContext = ConnectContext.get();
         TSortAlgorithm algorithm;
-        if (limit > 0 && limit + offset < 1024 && (useTwoPhaseReadOpt || 
hasRuntimePredicate
-                || isFixedLength)) {
-            algorithm = TSortAlgorithm.HEAP_SORT;
-        } else if (limit > 0 && !isFixedLength && limit + offset < 256) {
-            algorithm = TSortAlgorithm.TOPN_SORT;
+        if (connectContext != null && 
!connectContext.getSessionVariable().forceSortAlgorithm.isEmpty()) {
+            String algo = 
connectContext.getSessionVariable().forceSortAlgorithm;
+            if (algo.equals("heap")) {
+                algorithm = TSortAlgorithm.HEAP_SORT;
+            } else if (algo.equals("topn")) {
+                algorithm = TSortAlgorithm.TOPN_SORT;
+            } else {
+                algorithm = TSortAlgorithm.FULL_SORT;
+            }
         } else {
-            algorithm = TSortAlgorithm.FULL_SORT;
+            if (limit > 0 && limit + offset < 1024 && (useTwoPhaseReadOpt || 
hasRuntimePredicate || isFixedLength)) {
+                algorithm = TSortAlgorithm.HEAP_SORT;
+            } else if (limit > 0 && !isFixedLength && limit + offset < 256) {
+                algorithm = TSortAlgorithm.TOPN_SORT;
+            } else {
+                algorithm = TSortAlgorithm.FULL_SORT;
+            }
         }
         msg.sort_node.setAlgorithm(algorithm);
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index f72cfb14e35..350f8329881 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -178,6 +178,8 @@ public class SessionVariable implements Serializable, 
Writable {
     // when true, the partition column must be set to NOT NULL.
     public static final String ALLOW_PARTITION_COLUMN_NULLABLE = 
"allow_partition_column_nullable";
 
+    public static final String FORCE_SORT_ALGORITHM = "force_sort_algorithm";
+
     // runtime filter run mode
     public static final String RUNTIME_FILTER_MODE = "runtime_filter_mode";
     // Size in bytes of Bloom Filters used for runtime filters. Actual size of 
filter will
@@ -1065,6 +1067,11 @@ public class SessionVariable implements Serializable, 
Writable {
 
     @VariableMgr.VarAttr(name = ENABLE_REWRITE_ELEMENT_AT_TO_SLOT, fuzzy = 
true)
     private boolean enableRewriteElementAtToSlot = true;
+
+    @VariableMgr.VarAttr(name = FORCE_SORT_ALGORITHM, needForward = true, 
description = { "强制指定SortNode的排序算法",
+            "Force the sort algorithm of SortNode to be specified" })
+    public String forceSortAlgorithm = "";
+
     @VariableMgr.VarAttr(name = RUNTIME_FILTER_MODE, needForward = true)
     private String runtimeFilterMode = "GLOBAL";
 
@@ -1458,7 +1465,7 @@ public class SessionVariable implements Serializable, 
Writable {
     @VariableMgr.VarAttr(name = ENABLE_TWO_PHASE_READ_OPT, fuzzy = true)
     public boolean enableTwoPhaseReadOpt = true;
     @VariableMgr.VarAttr(name = TOPN_OPT_LIMIT_THRESHOLD)
-    public long topnOptLimitThreshold = 1024;
+    public long topnOptLimitThreshold = 10240000;
     @VariableMgr.VarAttr(name = ENABLE_SNAPSHOT_POINT_QUERY)
     public boolean enableSnapshotPointQuery = true;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to