This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5aee0cc8d12 [fix](planner) query should be cancelled if limit reached (#44338) 5aee0cc8d12 is described below commit 5aee0cc8d122f30499f39c060ff20747ce362cbf Author: Mingyu Chen (Rayner) <morning...@163.com> AuthorDate: Mon Dec 9 16:55:19 2024 -0800 [fix](planner) query should be cancelled if limit reached (#44338) ### What problem does this PR solve? Problem Summary: When there is a `limit` cluse in SQL, if FE has obtained data with more than the `limit` number of rows, it should send a cancel command to BE to cancel the query to prevent BE from reading more data. However, this function has problems in the current code and does not work. Especially in external table query, this may result in lots of unnecessary network io read. 1. `isBlockQuery` In the old optimizer, if a query statement contains a `sort` or `agg` node, `isBlockQuery` will be marked as true, otherwise it will be false. In the new optimizer, this value is always true. Regardless of the old or new optimizer, this logic is wrong. But only when `isBlockQuery = false` will the reach limit logic be triggered. 2. Calling problem of reach limit logic The reach limit logic judgment will only be performed when `eos = true` in the rowBatch returned by BE. This is wrong. Because for `limit N` queries, each BE's own `limit` is N. But for FE, as long as the total number of rows returned by all BEs exceeds N, the reach limit logic can be triggered. So it should not be processed only when `eos = true`. The PR mainly changes: 1. Remove `isBlockQuery` `isBlockQuery` is only used in the reach limit logic. And it is not needed. Remove it completely. 2. Modify the judgment position of reach limit. When the number of rows obtained by FE is greater than the limit, it will check the reach limit logic. 3. fix wrong `limitRows` in `QueryProcessor` the limitRows should be got from the first fragment, not last. 4. In scanner scheduler on BE side, if scanner has limit, ignore the scan bytes threshold per round. ### Release note [fix](planner) query should be cancelled if limit reached --- be/src/vec/exec/scan/scanner_scheduler.cpp | 13 ++++- be/src/vec/exec/scan/vscanner.h | 2 + .../org/apache/doris/nereids/NereidsPlanner.java | 5 -- .../org/apache/doris/planner/OriginalPlanner.java | 15 ------ .../java/org/apache/doris/planner/Planner.java | 6 --- .../main/java/org/apache/doris/qe/Coordinator.java | 25 ++++----- .../org/apache/doris/qe/CoordinatorContext.java | 6 +-- .../main/java/org/apache/doris/qe/LimitUtils.java | 54 ++++++++++++++++++++ .../qe/runtime/MultiFragmentsPipelineTask.java | 5 +- .../apache/doris/qe/runtime/QueryProcessor.java | 25 ++++----- .../java/org/apache/doris/qe/LimitUtilsTest.java | 59 ++++++++++++++++++++++ 11 files changed, 152 insertions(+), 63 deletions(-) diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index 7c5aa8db0a7..f419f58037a 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -268,7 +268,7 @@ void ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx, } size_t raw_bytes_threshold = config::doris_scanner_row_bytes; - size_t raw_bytes_read = 0; bool first_read = true; + size_t raw_bytes_read = 0; bool first_read = true; int64_t limit = scanner->limit(); while (!eos && raw_bytes_read < raw_bytes_threshold) { if (UNLIKELY(ctx->done())) { eos = true; @@ -316,6 +316,17 @@ void ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx, ctx->inc_block_usage(free_block->allocated_bytes()); scan_task->cached_blocks.emplace_back(std::move(free_block), free_block_bytes); } + if (limit > 0 && limit < ctx->batch_size()) { + // If this scanner has limit, and less than batch size, + // return immediately and no need to wait raw_bytes_threshold. + // This can save time that each scanner may only return a small number of rows, + // but rows are enough from all scanners. + // If not break, the query like "select * from tbl where id=1 limit 10" + // may scan a lot data when the "id=1"'s filter ratio is high. + // If limit is larger than batch size, this rule is skipped, + // to avoid user specify a large limit and causing too much small blocks. + break; + } } // end for while if (UNLIKELY(!status.ok())) { diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 6c4f3294ce1..bb68055e1f0 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -156,6 +156,8 @@ public: _query_statistics = query_statistics; } + int64_t limit() const { return _limit; } + protected: void _discard_conjuncts() { for (auto& conjunct : _conjuncts) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 58af5cd3e92..16fe1353fac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -722,11 +722,6 @@ public class NereidsPlanner extends Planner { return plan; } - @Override - public boolean isBlockQuery() { - return true; - } - @Override public DescriptorTable getDescTable() { return descTable; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java index 503cb181766..feb8c45787a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java @@ -79,10 +79,6 @@ public class OriginalPlanner extends Planner { this.analyzer = analyzer; } - public boolean isBlockQuery() { - return isBlockQuery; - } - public PlannerContext getPlannerContext() { return plannerContext; } @@ -276,17 +272,6 @@ public class OriginalPlanner extends Planner { if (queryStmt instanceof SelectStmt) { SelectStmt selectStmt = (SelectStmt) queryStmt; - if (queryStmt.getSortInfo() != null || selectStmt.getAggInfo() != null) { - isBlockQuery = true; - if (LOG.isDebugEnabled()) { - LOG.debug("this is block query"); - } - } else { - isBlockQuery = false; - if (LOG.isDebugEnabled()) { - LOG.debug("this isn't block query"); - } - } if (selectStmt.isTwoPhaseReadOptEnabled()) { // Optimize query like `SELECT ... FROM <tbl> WHERE ... ORDER BY ... LIMIT ...` if (singleNodePlan instanceof SortNode diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java index 5617ad57e8f..cfcd27af8fa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java @@ -44,8 +44,6 @@ public abstract class Planner { protected ArrayList<PlanFragment> fragments = Lists.newArrayList(); - protected boolean isBlockQuery = false; - protected TQueryOptions queryOptions; public abstract List<ScanNode> getScanNodes(); @@ -116,10 +114,6 @@ public abstract class Planner { return fragments; } - public boolean isBlockQuery() { - return isBlockQuery; - } - public TQueryOptions getQueryOptions() { return queryOptions; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 262b5836689..4905050e6e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -235,8 +235,6 @@ public class Coordinator implements CoordInterface { // same as backend_exec_states_.size() after Exec() private final Set<TUniqueId> instanceIds = Sets.newHashSet(); - private final boolean isBlockQuery; - private int numReceivedRows = 0; private List<String> deltaUrls; @@ -332,7 +330,6 @@ public class Coordinator implements CoordInterface { // Used for query/insert/test public Coordinator(ConnectContext context, Analyzer analyzer, Planner planner) { this.context = context; - this.isBlockQuery = planner.isBlockQuery(); this.queryId = context.queryId(); this.fragments = planner.getFragments(); this.scanNodes = planner.getScanNodes(); @@ -375,7 +372,6 @@ public class Coordinator implements CoordInterface { // Constructor of Coordinator is too complicated. public Coordinator(Long jobId, TUniqueId queryId, DescriptorTable descTable, List<PlanFragment> fragments, List<ScanNode> scanNodes, String timezone, boolean loadZeroTolerance, boolean enableProfile) { - this.isBlockQuery = true; this.jobId = jobId; this.queryId = queryId; this.descTable = descTable.toThrift(); @@ -1202,23 +1198,22 @@ public class Coordinator implements CoordInterface { numReceivedRows += resultBatch.getBatch().getRowsSize(); } + // if reached limit rows, cancel this query immediately + // to avoid BE from reading more data. + // ATTN: if change here, also need to change the same logic in QueryProcessor.getNext(); + Long limitRows = fragments.get(0).getPlanRoot().getLimit(); + boolean reachedLimit = LimitUtils.cancelIfReachLimit( + resultBatch, limitRows, numReceivedRows, this::cancelInternal); + if (resultBatch.isEos()) { receivers.remove(receiver); if (receivers.isEmpty()) { returnedAllResults = true; - } else { + } else if (!reachedLimit) { + // if reachedLimit is true, which means this query has been cancelled. + // so no need to set eos to false again. resultBatch.setEos(false); } - - // if this query is a block query do not cancel. - Long numLimitRows = fragments.get(0).getPlanRoot().getLimit(); - boolean hasLimit = numLimitRows > 0; - if (!isBlockQuery && instanceIds.size() > 1 && hasLimit && numReceivedRows >= numLimitRows) { - if (LOG.isDebugEnabled()) { - LOG.debug("no block query, return num >= limit rows, need cancel"); - } - cancelInternal(new Status(TStatusCode.LIMIT_REACH, "query reach limit")); - } } if (!returnedAllResults) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java index aed0fd9c98c..343456c296a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/CoordinatorContext.java @@ -84,7 +84,6 @@ public class CoordinatorContext { // these are some constant parameters public final NereidsCoordinator coordinator; public final List<PlanFragment> fragments; - public final boolean isBlockQuery; public final DataSink dataSink; public final ExecutionProfile executionProfile; public final ConnectContext connectContext; @@ -120,7 +119,6 @@ public class CoordinatorContext { private CoordinatorContext( NereidsCoordinator coordinator, ConnectContext connectContext, - boolean isBlockQuery, List<PipelineDistributedPlan> distributedPlans, List<PlanFragment> fragments, List<RuntimeFilter> runtimeFilters, @@ -131,7 +129,6 @@ public class CoordinatorContext { TQueryOptions queryOptions, TDescriptorTable descriptorTable) { this.connectContext = connectContext; - this.isBlockQuery = isBlockQuery; this.fragments = fragments; this.distributedPlans = distributedPlans; this.topDistributedPlan = distributedPlans.get(distributedPlans.size() - 1); @@ -161,7 +158,6 @@ public class CoordinatorContext { TDescriptorTable descriptorTable, ExecutionProfile executionProfile) { this.coordinator = coordinator; - this.isBlockQuery = true; this.fragments = fragments; this.distributedPlans = distributedPlans; this.topDistributedPlan = distributedPlans.get(distributedPlans.size() - 1); @@ -290,7 +286,7 @@ public class CoordinatorContext { .collect(Collectors.toList()) ); return new CoordinatorContext( - coordinator, connectContext, planner.isBlockQuery(), + coordinator, connectContext, planner.getDistributedPlans().valueList(), planner.getFragments(), planner.getRuntimeFilters(), planner.getTopnFilters(), planner.getScanNodes(), executionProfile, queryGlobals, queryOptions, descriptorTable diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/LimitUtils.java b/fe/fe-core/src/main/java/org/apache/doris/qe/LimitUtils.java new file mode 100644 index 00000000000..cbbe5c71a0f --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/LimitUtils.java @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.qe; + +import org.apache.doris.common.Status; +import org.apache.doris.thrift.TStatusCode; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.function.Consumer; + +/** + * This is a utility class for limit related operations. + * Because current there are 2 places need to check limit rows, so put the logic here for unification. + * - Coordinator.getNext(); + * - QueryProcessor.getNext(); + */ +public class LimitUtils { + private static final Logger LOG = LogManager.getLogger(LimitUtils.class); + private static final Status LIMIT_REACH_STATUS = new Status(TStatusCode.LIMIT_REACH, "query reach limit"); + + // if reached limit rows, cancel this query immediately + // to avoid BE from reading more data. + public static boolean cancelIfReachLimit(RowBatch resultBatch, long limitRows, long numReceivedRows, + Consumer<Status> cancelFunc) { + boolean reachedLimit = false; + if (limitRows > 0 && numReceivedRows >= limitRows) { + if (LOG.isDebugEnabled()) { + LOG.debug("reach limit rows: {}, received rows: {}, cancel query", limitRows, numReceivedRows); + } + cancelFunc.accept(LIMIT_REACH_STATUS); + // set this + resultBatch.setEos(true); + reachedLimit = true; + } + return reachedLimit; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java index a1a91aab517..92b2a00597b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/MultiFragmentsPipelineTask.java @@ -130,9 +130,10 @@ public class MultiFragmentsPipelineTask extends AbstractRuntimeTask<Integer, Sin LOG.warn("Failed to cancel query {} backend: {}, reason: {}", DebugUtil.printId(queryId), backend, status.toString()); } + } else { + LOG.warn("Failed to cancel query {} backend: {} reason: {}", + DebugUtil.printId(queryId), backend, "without status"); } - LOG.warn("Failed to cancel query {} backend: {} reason: {}", - DebugUtil.printId(queryId), backend, "without status"); } public void onFailure(Throwable t) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java index a5a5100faec..6eb19d250ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/QueryProcessor.java @@ -28,12 +28,12 @@ import org.apache.doris.planner.ResultSink; import org.apache.doris.qe.AbstractJobProcessor; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.CoordinatorContext; +import org.apache.doris.qe.LimitUtils; import org.apache.doris.qe.ResultReceiver; import org.apache.doris.qe.RowBatch; import org.apache.doris.rpc.RpcException; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TReportExecStatusParams; -import org.apache.doris.thrift.TStatusCode; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -64,7 +64,7 @@ public class QueryProcessor extends AbstractJobProcessor { Objects.requireNonNull(runningReceivers, "runningReceivers can not be null") ); - this.limitRows = coordinatorContext.fragments.get(coordinatorContext.fragments.size() - 1) + this.limitRows = coordinatorContext.fragments.get(0) .getPlanRoot() .getLimit(); } @@ -145,22 +145,19 @@ public class QueryProcessor extends AbstractJobProcessor { numReceivedRows += resultBatch.getBatch().getRowsSize(); } + // if reached limit rows, cancel this query immediately + // to avoid BE from reading more data. + // ATTN: if change here, also need to change the same logic in Coordinator.getNext(); + boolean reachedLimit = LimitUtils.cancelIfReachLimit( + resultBatch, limitRows, numReceivedRows, coordinatorContext::cancelSchedule); + if (resultBatch.isEos()) { runningReceivers.remove(receiver); - if (!runningReceivers.isEmpty()) { + // if reachedLimit is true, which means this query has been cancelled. + // so no need to set eos to false again. + if (!runningReceivers.isEmpty() && !reachedLimit) { resultBatch.setEos(false); } - - // if this query is a block query do not cancel. - boolean hasLimit = limitRows > 0; - if (!coordinatorContext.isBlockQuery - && coordinatorContext.instanceNum.get() > 1 - && hasLimit && numReceivedRows >= limitRows) { - if (LOG.isDebugEnabled()) { - LOG.debug("no block query, return num >= limit rows, need cancel"); - } - coordinatorContext.cancelSchedule(new Status(TStatusCode.LIMIT_REACH, "query reach limit")); - } } if (!runningReceivers.isEmpty()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/LimitUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/LimitUtilsTest.java new file mode 100644 index 00000000000..012fbad18a5 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/LimitUtilsTest.java @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.qe; + + +import org.apache.doris.common.Status; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.function.Consumer; + +public class LimitUtilsTest { + + private static int res = 0; + + @Test + public void testUpperBound() { + Consumer<Status> cancelFunc = batch -> res = 666; + RowBatch rowBatch = new RowBatch(); + rowBatch.setEos(false); + // - no limit + Assert.assertFalse(LimitUtils.cancelIfReachLimit(rowBatch, 0, 10, cancelFunc)); + Assert.assertFalse(rowBatch.isEos()); + Assert.assertEquals(0, res); + + // - not reach limit + Assert.assertFalse(LimitUtils.cancelIfReachLimit(rowBatch, 10, 1, cancelFunc)); + Assert.assertFalse(rowBatch.isEos()); + Assert.assertEquals(0, res); + + // - reach limit + Assert.assertTrue(LimitUtils.cancelIfReachLimit(rowBatch, 10, 10, cancelFunc)); + Assert.assertTrue(rowBatch.isEos()); + Assert.assertEquals(666, res); + + // - reach limit + res = 0; + rowBatch.setEos(false); + Assert.assertTrue(LimitUtils.cancelIfReachLimit(rowBatch, 10, 100, cancelFunc)); + Assert.assertTrue(rowBatch.isEos()); + Assert.assertEquals(666, res); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org