This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c7d8d2  [Bug] Fix bug that isPreAggregation is incorrectly set (#5608)
9c7d8d2 is described below

commit 9c7d8d2e989b5e706e8f940b34e672bfba5d4158
Author: Mingyu Chen <morningman....@gmail.com>
AuthorDate: Fri Apr 9 14:13:06 2021 +0800

    [Bug] Fix bug that isPreAggregation is incorrectly set (#5608)
    
    1. The MaterializedViewSelector should be reset for each scan node
    2. On the BE side, columns with delete conditions must be added to the 
return column.
---
 be/src/exec/olap_scanner.cpp                       |  1 +
 be/src/olap/reader.cpp                             |  3 ++-
 .../doris/planner/MaterializedViewSelector.java    | 21 ++++++++++++----
 .../org/apache/doris/planner/OlapScanNode.java     | 28 +++++++++++++++-------
 4 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 8d9b8a4..197febb 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -172,6 +172,7 @@ Status OlapScanner::_init_params(const 
std::vector<OlapScanRange*>& key_ranges,
     if (_aggregation || single_version) {
         _params.return_columns = _return_columns;
     } else {
+        // we need to fetch all key columns to do the right aggregation on 
storage engine side.
         for (size_t i = 0; i < _tablet->num_key_columns(); ++i) {
             _params.return_columns.push_back(i);
         }
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index cb2c6e2..39081ee 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -458,7 +458,8 @@ OLAPStatus Reader::_init_params(const ReaderParams& 
read_params) {
 OLAPStatus Reader::_init_return_columns(const ReaderParams& read_params) {
     if (read_params.reader_type == READER_QUERY) {
         _return_columns = read_params.return_columns;
-        if (!_delete_handler.empty() && read_params.aggregation) {
+        if (!_delete_handler.empty()) {
+            // We need to fetch columns which there are deletion conditions on 
them.
             set<uint32_t> column_set(_return_columns.begin(), 
_return_columns.end());
             for (const auto& conds : _delete_handler.get_delete_conditions()) {
                 for (const auto& cond_column : conds.del_cond->columns()) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java
index 13a63d4..ef30f91 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java
@@ -83,8 +83,14 @@ public class MaterializedViewSelector {
     private Map<Long, Set<String>> columnNamesInQueryOutput = 
Maps.newHashMap();
 
     private boolean disableSPJGView;
-    private String reasonOfDisable;
+
+    // The Following 2 variables should be reset each time before calling 
selectBestMV();
+    // Unlike the "isPreAggregation" in OlapScanNode which defaults to false,
+    // it defaults to true here. It is because in this class, we started to 
choose MV under the premise
+    // that the default base tables are duplicate key tables. For the 
aggregation key table,
+    // this variable will be set to false compensatively at the end.
     private boolean isPreAggregation = true;
+    private String reasonOfDisable;
 
     public MaterializedViewSelector(SelectStmt selectStmt, Analyzer analyzer) {
         this.selectStmt = selectStmt;
@@ -105,6 +111,7 @@ public class MaterializedViewSelector {
      * @return
      */
     public BestIndexInfo selectBestMV(ScanNode scanNode) throws UserException {
+        resetPreAggregationVariables();
         long start = System.currentTimeMillis();
         Preconditions.checkState(scanNode instanceof OlapScanNode);
         OlapScanNode olapScanNode = (OlapScanNode) scanNode;
@@ -113,11 +120,18 @@ public class MaterializedViewSelector {
             return null;
         }
         long bestIndexId = priorities(olapScanNode, candidateIndexIdToSchema);
-        LOG.debug("The best materialized view is {} for scan node {} in query 
{}, cost {}",
-                 bestIndexId, scanNode.getId(), selectStmt.toSql(), 
(System.currentTimeMillis() - start));
+        LOG.debug("The best materialized view is {} for scan node {} in query 
{}, " +
+                        "isPreAggregation: {}, reasonOfDisable: {}, cost {}",
+                bestIndexId, scanNode.getId(), selectStmt.toSql(), 
isPreAggregation, reasonOfDisable,
+                (System.currentTimeMillis() - start));
         return new BestIndexInfo(bestIndexId, isPreAggregation, 
reasonOfDisable);
     }
 
+    private void resetPreAggregationVariables() {
+        isPreAggregation = true;
+        reasonOfDisable = null;
+    }
+
     private Map<Long, List<Column>> predicates(OlapScanNode scanNode) throws 
AnalysisException {
         // Step1: all of predicates is compensating predicates
         Map<Long, MaterializedIndexMeta> candidateIndexIdToMeta = 
scanNode.getOlapTable().getVisibleIndexIdToMeta();
@@ -448,7 +462,6 @@ public class MaterializedViewSelector {
         for (TupleId tupleId : tupleIds) {
             TupleDescriptor tupleDescriptor = analyzer.getTupleDesc(tupleId);
             tupleDescriptor.getTableIdToColumnNames(columnNamesInQueryOutput);
-
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index b2c4a61..4d260ae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -65,9 +65,6 @@ import org.apache.doris.thrift.TScanRange;
 import org.apache.doris.thrift.TScanRangeLocation;
 import org.apache.doris.thrift.TScanRangeLocations;
 
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
 import com.google.common.base.Joiner;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
@@ -76,6 +73,9 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Range;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -106,6 +106,19 @@ public class OlapScanNode extends ScanNode {
      * Query2: select k1, min(v1) from table group by k1
      * This aggregation function in query is min which different from the 
schema.
      * So the data stored in storage engine need to be merged firstly before 
returning to scan node.
+     *
+     * There are currently two places to modify this variable:
+     * 1. The turnOffPreAgg() method of SingleNodePlanner.
+     *      This method will only be called on the left deepest OlapScanNode 
the plan tree,
+     *      while other nodes are false by default (because the Aggregation 
operation is executed after Join,
+     *      we cannot judge whether other OlapScanNodes can close the 
pre-aggregation).
+     *      So even the Duplicate key table, if it is not the left deepest 
node, it will remain false too.
+     *
+     * 2. After MaterializedViewSelector selects the materialized view, the 
updateScanRangeInfoByNewMVSelector()\
+     *    method of OlapScanNode may be called to update this variable.
+     *      This call will be executed on all ScanNodes in the plan tree. In 
this step,
+     *      for the DuplicateKey table, the variable will be set to true.
+     *      See comment of "isPreAggregation" variable in 
MaterializedViewSelector for details.
      */
     private boolean isPreAggregation = false;
     private String reasonOfPreAggregation = null;
@@ -227,8 +240,7 @@ public class OlapScanNode extends ScanNode {
 
         if (update) {
             this.selectedIndexId = selectedIndexId;
-            this.isPreAggregation = isPreAggregation;
-            this.reasonOfPreAggregation = reasonOfDisable;
+            setIsPreAggregation(isPreAggregation, reasonOfDisable);
             updateColumnType();
             LOG.info("Using the new scan range info instead of the old one. 
{}, {}", situation ,scanRangeInfo);
         } else {
@@ -630,11 +642,11 @@ public class OlapScanNode extends ScanNode {
         }
         msg.node_type = TPlanNodeType.OLAP_SCAN_NODE;
         msg.olap_scan_node =
-              new TOlapScanNode(desc.getId().asInt(), keyColumnNames, 
keyColumnTypes, isPreAggregation);
+                new TOlapScanNode(desc.getId().asInt(), keyColumnNames, 
keyColumnTypes, isPreAggregation);
         if (null != sortColumn) {
             msg.olap_scan_node.setSortColumn(sortColumn);
         }
-       msg.olap_scan_node.setKeyType(olapTable.getKeysType().toThrift());
+        msg.olap_scan_node.setKeyType(olapTable.getKeysType().toThrift());
     }
 
     // export some tablets
@@ -647,7 +659,7 @@ public class OlapScanNode extends ScanNode {
         olapScanNode.selectedPartitionNum = 1;
         olapScanNode.selectedTabletsNum = 1;
         olapScanNode.totalTabletsNum = 1;
-        olapScanNode.isPreAggregation = false;
+        olapScanNode.setIsPreAggregation(false, "Export job");
         olapScanNode.isFinalized = true;
         olapScanNode.result.addAll(locationsList);
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to