This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5aee0cc8d12 [fix](planner) query should be cancelled if limit reached 
(#44338)
5aee0cc8d12 is described below

commit 5aee0cc8d122f30499f39c060ff20747ce362cbf
Author: Mingyu Chen (Rayner) <morning...@163.com>
AuthorDate: Mon Dec 9 16:55:19 2024 -0800

    [fix](planner) query should be cancelled if limit reached (#44338)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    When there is a `limit` cluse in SQL, if FE has obtained data with more
    than the `limit` number of rows,
    it should send a cancel command to BE to cancel the query to prevent BE
    from reading more data.
    However, this function has problems in the current code and does not
    work.
    Especially in external table query, this may result in lots of
    unnecessary network io read.
    
    1. `isBlockQuery`
    
    In the old optimizer, if a query statement contains a `sort` or `agg`
    node,
        `isBlockQuery` will be marked as true, otherwise it will be false.
        In the new optimizer, this value is always true.
    
        Regardless of the old or new optimizer, this logic is wrong.
    But only when `isBlockQuery = false` will the reach limit logic be
    triggered.
    
    2. Calling problem of reach limit logic
    
    The reach limit logic judgment will only be performed when `eos = true`
    in the rowBatch returned by BE.
        This is wrong.
    Because for `limit N` queries, each BE's own `limit` is N. But for FE,
    as long as the total number of rows
    returned by all BEs exceeds N, the reach limit logic can be triggered.
        So it should not be processed only when `eos = true`.
    
    The PR mainly changes:
    
    1. Remove `isBlockQuery`
    
    `isBlockQuery` is only used in the reach limit logic. And it is not
    needed. Remove it completely.
    
    2. Modify the judgment position of reach limit.
    
        When the number of rows obtained by FE is greater than the limit,
        it will check the reach limit logic.
    
    3. fix wrong `limitRows` in `QueryProcessor`
    
        the limitRows should be got from the first fragment, not last.
    
    4. In scanner scheduler on BE side, if scanner has limit, ignore the
    scan bytes threshold per round.
    
    ### Release note
    
    [fix](planner) query should be cancelled if limit reached
---
 be/src/vec/exec/scan/scanner_scheduler.cpp         | 13 ++++-
 be/src/vec/exec/scan/vscanner.h                    |  2 +
 .../org/apache/doris/nereids/NereidsPlanner.java   |  5 --
 .../org/apache/doris/planner/OriginalPlanner.java  | 15 ------
 .../java/org/apache/doris/planner/Planner.java     |  6 ---
 .../main/java/org/apache/doris/qe/Coordinator.java | 25 ++++-----
 .../org/apache/doris/qe/CoordinatorContext.java    |  6 +--
 .../main/java/org/apache/doris/qe/LimitUtils.java  | 54 ++++++++++++++++++++
 .../qe/runtime/MultiFragmentsPipelineTask.java     |  5 +-
 .../apache/doris/qe/runtime/QueryProcessor.java    | 25 ++++-----
 .../java/org/apache/doris/qe/LimitUtilsTest.java   | 59 ++++++++++++++++++++++
 11 files changed, 152 insertions(+), 63 deletions(-)

diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp 
b/be/src/vec/exec/scan/scanner_scheduler.cpp
index 7c5aa8db0a7..f419f58037a 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -268,7 +268,7 @@ void 
ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx,
             }
 
             size_t raw_bytes_threshold = config::doris_scanner_row_bytes;
-            size_t raw_bytes_read = 0; bool first_read = true;
+            size_t raw_bytes_read = 0; bool first_read = true; int64_t limit = 
scanner->limit();
             while (!eos && raw_bytes_read < raw_bytes_threshold) {
                 if (UNLIKELY(ctx->done())) {
                     eos = true;
@@ -316,6 +316,17 @@ void 
ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx,
                     ctx->inc_block_usage(free_block->allocated_bytes());
                     
scan_task->cached_blocks.emplace_back(std::move(free_block), free_block_bytes);
                 }
+                if (limit > 0 && limit < ctx->batch_size()) {
+                    // If this scanner has limit, and less than batch size,
+                    // return immediately and no need to wait 
raw_bytes_threshold.
+                    // This can save time that each scanner may only return a 
small number of rows,
+                    // but rows are enough from all scanners.
+                    // If not break, the query like "select * from tbl where 
id=1 limit 10"
+                    // may scan a lot data when the "id=1"'s filter ratio is 
high.
+                    // If limit is larger than batch size, this rule is 
skipped,
+                    // to avoid user specify a large limit and causing too 
much small blocks.
+                    break;
+                }
             } // end for while
 
             if (UNLIKELY(!status.ok())) {
diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h
index 6c4f3294ce1..bb68055e1f0 100644
--- a/be/src/vec/exec/scan/vscanner.h
+++ b/be/src/vec/exec/scan/vscanner.h
@@ -156,6 +156,8 @@ public:
         _query_statistics = query_statistics;
     }
 
+    int64_t limit() const { return _limit; }
+
 protected:
     void _discard_conjuncts() {
         for (auto& conjunct : _conjuncts) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 58af5cd3e92..16fe1353fac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -722,11 +722,6 @@ public class NereidsPlanner extends Planner {
         return plan;
     }
 
-    @Override
-    public boolean isBlockQuery() {
-        return true;
-    }
-
     @Override
     public DescriptorTable getDescTable() {
         return descTable;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
index 503cb181766..feb8c45787a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
@@ -79,10 +79,6 @@ public class OriginalPlanner extends Planner {
         this.analyzer = analyzer;
     }
 
-    public boolean isBlockQuery() {
-        return isBlockQuery;
-    }
-
     public PlannerContext getPlannerContext() {
         return plannerContext;
     }
@@ -276,17 +272,6 @@ public class OriginalPlanner extends Planner {
 
         if (queryStmt instanceof SelectStmt) {
             SelectStmt selectStmt = (SelectStmt) queryStmt;
-            if (queryStmt.getSortInfo() != null || selectStmt.getAggInfo() != 
null) {
-                isBlockQuery = true;
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("this is block query");
-                }
-            } else {
-                isBlockQuery = false;
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("this isn't block query");
-                }
-            }
             if (selectStmt.isTwoPhaseReadOptEnabled()) {
                 // Optimize query like `SELECT ... FROM <tbl> WHERE ... ORDER 
BY ... LIMIT ...`
                 if (singleNodePlan instanceof SortNode
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java
index 5617ad57e8f..cfcd27af8fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java
@@ -44,8 +44,6 @@ public abstract class Planner {
 
     protected ArrayList<PlanFragment> fragments = Lists.newArrayList();
 
-    protected boolean isBlockQuery = false;
-
     protected TQueryOptions queryOptions;
 
     public abstract List<ScanNode> getScanNodes();
@@ -116,10 +114,6 @@ public abstract class Planner {
         return fragments;
     }
 
-    public boolean isBlockQuery() {
-        return isBlockQuery;
-    }
-
     public TQueryOptions getQueryOptions() {
         return queryOptions;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
index 262b5836689..4905050e6e8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
@@ -235,8 +235,6 @@ public class Coordinator implements CoordInterface {
     // same as backend_exec_states_.size() after Exec()
     private final Set<TUniqueId> instanceIds = Sets.newHashSet();
 
-    private final boolean isBlockQuery;
-
     private int numReceivedRows = 0;
 
     private List<String> deltaUrls;
@@ -332,7 +330,6 @@ public class Coordinator implements CoordInterface {
     // Used for query/insert/test
     public Coordinator(ConnectContext context, Analyzer analyzer, Planner 
planner) {
         this.context = context;
-        this.isBlockQuery = planner.isBlockQuery();
         this.queryId = context.queryId();
         this.fragments = planner.getFragments();
         this.scanNodes = planner.getScanNodes();
@@ -375,7 +372,6 @@ public class Coordinator implements CoordInterface {
     // Constructor of Coordinator is too complicated.
     public Coordinator(Long jobId, TUniqueId queryId, DescriptorTable 
descTable, List<PlanFragment> fragments,
             List<ScanNode> scanNodes, String timezone, boolean 
loadZeroTolerance, boolean enableProfile) {
-        this.isBlockQuery = true;
         this.jobId = jobId;
         this.queryId = queryId;
         this.descTable = descTable.toThrift();
@@ -1202,23 +1198,22 @@ public class Coordinator implements CoordInterface {
             numReceivedRows += resultBatch.getBatch().getRowsSize();
         }
 
+        // if reached limit rows, cancel this query immediately
+        // to avoid BE from reading more data.
+        // ATTN: if change here, also need to change the same logic in 
QueryProcessor.getNext();
+        Long limitRows = fragments.get(0).getPlanRoot().getLimit();
+        boolean reachedLimit = LimitUtils.cancelIfReachLimit(
+                resultBatch, limitRows, numReceivedRows, this::cancelInternal);
+
         if (resultBatch.isEos()) {
             receivers.remove(receiver);
             if (receivers.isEmpty()) {
                 returnedAllResults = true;
-            } else {
+            } else if (!reachedLimit) {
+                // if reachedLimit is true, which means this query has been 
cancelled.
+                // so no need to set eos to false again.
                 resultBatch.setEos(false);
             }
-
-            // if this query is a block query do not cancel.
-            Long numLimitRows = fragments.get(0).getPlanRoot().getLimit();
-            boolean hasLimit = numLimitRows > 0;
-            if (!isBlockQuery && instanceIds.size() > 1 && hasLimit && 
numReceivedRows >= numLimitRows) {
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("no block query, return num >= limit rows, need 
cancel");
-                }
-                cancelInternal(new Status(TStatusCode.LIMIT_REACH, "query 
reach limit"));
-            }
         }
 
         if (!returnedAllResults) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java
index aed0fd9c98c..343456c296a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java
@@ -84,7 +84,6 @@ public class CoordinatorContext {
     // these are some constant parameters
     public final NereidsCoordinator coordinator;
     public final List<PlanFragment> fragments;
-    public final boolean isBlockQuery;
     public final DataSink dataSink;
     public final ExecutionProfile executionProfile;
     public final ConnectContext connectContext;
@@ -120,7 +119,6 @@ public class CoordinatorContext {
     private CoordinatorContext(
             NereidsCoordinator coordinator,
             ConnectContext connectContext,
-            boolean isBlockQuery,
             List<PipelineDistributedPlan> distributedPlans,
             List<PlanFragment> fragments,
             List<RuntimeFilter> runtimeFilters,
@@ -131,7 +129,6 @@ public class CoordinatorContext {
             TQueryOptions queryOptions,
             TDescriptorTable descriptorTable) {
         this.connectContext = connectContext;
-        this.isBlockQuery = isBlockQuery;
         this.fragments = fragments;
         this.distributedPlans = distributedPlans;
         this.topDistributedPlan = distributedPlans.get(distributedPlans.size() 
- 1);
@@ -161,7 +158,6 @@ public class CoordinatorContext {
             TDescriptorTable descriptorTable,
             ExecutionProfile executionProfile) {
         this.coordinator = coordinator;
-        this.isBlockQuery = true;
         this.fragments = fragments;
         this.distributedPlans = distributedPlans;
         this.topDistributedPlan = distributedPlans.get(distributedPlans.size() 
- 1);
@@ -290,7 +286,7 @@ public class CoordinatorContext {
                         .collect(Collectors.toList())
         );
         return new CoordinatorContext(
-                coordinator, connectContext, planner.isBlockQuery(),
+                coordinator, connectContext,
                 planner.getDistributedPlans().valueList(),
                 planner.getFragments(), planner.getRuntimeFilters(), 
planner.getTopnFilters(),
                 planner.getScanNodes(), executionProfile, queryGlobals, 
queryOptions, descriptorTable
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/LimitUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/LimitUtils.java
new file mode 100644
index 00000000000..cbbe5c71a0f
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/LimitUtils.java
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.qe;
+
+import org.apache.doris.common.Status;
+import org.apache.doris.thrift.TStatusCode;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.function.Consumer;
+
+/**
+ * This is a utility class for limit related operations.
+ * Because current there are 2 places need to check limit rows, so put the 
logic here for unification.
+ *  - Coordinator.getNext();
+ *  - QueryProcessor.getNext();
+ */
+public class LimitUtils {
+    private static final Logger LOG = LogManager.getLogger(LimitUtils.class);
+    private static final Status LIMIT_REACH_STATUS = new 
Status(TStatusCode.LIMIT_REACH, "query reach limit");
+
+    // if reached limit rows, cancel this query immediately
+    // to avoid BE from reading more data.
+    public static boolean cancelIfReachLimit(RowBatch resultBatch, long 
limitRows, long numReceivedRows,
+            Consumer<Status> cancelFunc) {
+        boolean reachedLimit = false;
+        if (limitRows > 0 && numReceivedRows >= limitRows) {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("reach limit rows: {}, received rows: {}, cancel 
query", limitRows, numReceivedRows);
+            }
+            cancelFunc.accept(LIMIT_REACH_STATUS);
+            // set this
+            resultBatch.setEos(true);
+            reachedLimit = true;
+        }
+        return reachedLimit;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java
 
b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java
index a1a91aab517..92b2a00597b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java
@@ -130,9 +130,10 @@ public class MultiFragmentsPipelineTask extends 
AbstractRuntimeTask<Integer, Sin
                                 LOG.warn("Failed to cancel query {} backend: 
{}, reason: {}",
                                         DebugUtil.printId(queryId), backend, 
status.toString());
                             }
+                        } else {
+                            LOG.warn("Failed to cancel query {} backend: {} 
reason: {}",
+                                    DebugUtil.printId(queryId), backend, 
"without status");
                         }
-                        LOG.warn("Failed to cancel query {} backend: {} 
reason: {}",
-                                DebugUtil.printId(queryId), backend, "without 
status");
                     }
 
                     public void onFailure(Throwable t) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java
index a5a5100faec..6eb19d250ed 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java
@@ -28,12 +28,12 @@ import org.apache.doris.planner.ResultSink;
 import org.apache.doris.qe.AbstractJobProcessor;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.CoordinatorContext;
+import org.apache.doris.qe.LimitUtils;
 import org.apache.doris.qe.ResultReceiver;
 import org.apache.doris.qe.RowBatch;
 import org.apache.doris.rpc.RpcException;
 import org.apache.doris.thrift.TNetworkAddress;
 import org.apache.doris.thrift.TReportExecStatusParams;
-import org.apache.doris.thrift.TStatusCode;
 
 import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
@@ -64,7 +64,7 @@ public class QueryProcessor extends AbstractJobProcessor {
                 Objects.requireNonNull(runningReceivers, "runningReceivers can 
not be null")
         );
 
-        this.limitRows = 
coordinatorContext.fragments.get(coordinatorContext.fragments.size() - 1)
+        this.limitRows = coordinatorContext.fragments.get(0)
                 .getPlanRoot()
                 .getLimit();
     }
@@ -145,22 +145,19 @@ public class QueryProcessor extends AbstractJobProcessor {
             numReceivedRows += resultBatch.getBatch().getRowsSize();
         }
 
+        // if reached limit rows, cancel this query immediately
+        // to avoid BE from reading more data.
+        // ATTN: if change here, also need to change the same logic in 
Coordinator.getNext();
+        boolean reachedLimit = LimitUtils.cancelIfReachLimit(
+                resultBatch, limitRows, numReceivedRows, 
coordinatorContext::cancelSchedule);
+
         if (resultBatch.isEos()) {
             runningReceivers.remove(receiver);
-            if (!runningReceivers.isEmpty()) {
+            // if reachedLimit is true, which means this query has been 
cancelled.
+            // so no need to set eos to false again.
+            if (!runningReceivers.isEmpty() && !reachedLimit) {
                 resultBatch.setEos(false);
             }
-
-            // if this query is a block query do not cancel.
-            boolean hasLimit = limitRows > 0;
-            if (!coordinatorContext.isBlockQuery
-                    && coordinatorContext.instanceNum.get() > 1
-                    && hasLimit && numReceivedRows >= limitRows) {
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("no block query, return num >= limit rows, need 
cancel");
-                }
-                coordinatorContext.cancelSchedule(new 
Status(TStatusCode.LIMIT_REACH, "query reach limit"));
-            }
         }
 
         if (!runningReceivers.isEmpty()) {
diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/LimitUtilsTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/qe/LimitUtilsTest.java
new file mode 100644
index 00000000000..012fbad18a5
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/qe/LimitUtilsTest.java
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.qe;
+
+
+import org.apache.doris.common.Status;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.function.Consumer;
+
+public class LimitUtilsTest {
+
+    private static int res = 0;
+
+    @Test
+    public void testUpperBound() {
+        Consumer<Status> cancelFunc = batch -> res = 666;
+        RowBatch rowBatch = new RowBatch();
+        rowBatch.setEos(false);
+        // - no limit
+        Assert.assertFalse(LimitUtils.cancelIfReachLimit(rowBatch, 0, 10, 
cancelFunc));
+        Assert.assertFalse(rowBatch.isEos());
+        Assert.assertEquals(0, res);
+
+        // - not reach limit
+        Assert.assertFalse(LimitUtils.cancelIfReachLimit(rowBatch, 10, 1, 
cancelFunc));
+        Assert.assertFalse(rowBatch.isEos());
+        Assert.assertEquals(0, res);
+
+        // - reach limit
+        Assert.assertTrue(LimitUtils.cancelIfReachLimit(rowBatch, 10, 10, 
cancelFunc));
+        Assert.assertTrue(rowBatch.isEos());
+        Assert.assertEquals(666, res);
+
+        // - reach limit
+        res = 0;
+        rowBatch.setEos(false);
+        Assert.assertTrue(LimitUtils.cancelIfReachLimit(rowBatch, 10, 100, 
cancelFunc));
+        Assert.assertTrue(rowBatch.isEos());
+        Assert.assertEquals(666, res);
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to