This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d67b398b53 [opt](nereids) catch all exceptions in StatsCalculator 
(#49415)
7d67b398b53 is described below

commit 7d67b398b53772e17c267984c24c4ab8bc3a8a1b
Author: minghong <zhoumingh...@selectdb.com>
AuthorDate: Thu Apr 3 17:51:21 2025 +0800

    [opt](nereids) catch all exceptions in StatsCalculator (#49415)
    
    ### What problem does this PR solve?
    catch all exceptions in ExpressionEstimation so that any error in
    ExpressionEstimation does not break the query.
---
 .../doris/nereids/stats/ExpressionEstimation.java  | 22 +++++++++++++-----
 .../doris/nereids/stats/StatsCalculator.java       | 24 ++++++++++++++++++--
 .../doris/nereids/trees/plans/GroupPlan.java       |  2 +-
 .../java/org/apache/doris/qe/SessionVariable.java  |  7 ++++++
 .../apache/doris/statistics/ColumnStatistic.java   | 26 ++++++++++++++++++++++
 .../nereids/stats/ExpressionEstimationTest.java    |  9 ++++++++
 6 files changed, 82 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
index 86c1c08463e..e0779970784 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
@@ -95,12 +95,15 @@ import 
org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.Statistics;
 
 import com.google.common.base.Preconditions;
 import org.apache.commons.collections.CollectionUtils;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
 import java.time.Instant;
 import java.time.LocalDate;
@@ -112,7 +115,7 @@ import java.util.List;
  * Used to estimate for expressions that not producing boolean value.
  */
 public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, 
Statistics> {
-
+    public static final Logger LOG = 
LogManager.getLogger(ExpressionEstimation.class);
     public static final long DAYS_FROM_0_TO_1970 = 719528;
     public static final long DAYS_FROM_0_TO_9999 = 3652424;
     private static final ExpressionEstimation INSTANCE = new 
ExpressionEstimation();
@@ -121,11 +124,20 @@ public class ExpressionEstimation extends 
ExpressionVisitor<ColumnStatistic, Sta
      * returned columnStat is newly created or a copy of stats
      */
     public static ColumnStatistic estimate(Expression expression, Statistics 
stats) {
-        ColumnStatistic columnStatistic = expression.accept(INSTANCE, stats);
-        if (columnStatistic == null) {
-            return ColumnStatistic.UNKNOWN;
+        try {
+            ColumnStatistic columnStatistic = expression.accept(INSTANCE, 
stats);
+            if (columnStatistic == null) {
+                return 
ColumnStatistic.createUnknownByDataType(expression.getDataType());
+            }
+            return columnStatistic;
+        } catch (Exception e) {
+            // in regression test, feDebug is true so that the exception is 
thrown in order to detect problems.
+            if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().feDebug) {
+                throw e;
+            }
+            LOG.warn("ExpressionEstimation failed : " + expression, e);
+            return 
ColumnStatistic.createUnknownByDataType(expression.getDataType());
         }
-        return columnStatistic;
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 72f5b997cba..6640e968c9e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -301,13 +301,33 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
 
     private void estimate() {
         Plan plan = groupExpression.getPlan();
-        Statistics newStats = plan.accept(this, null);
+        Statistics newStats;
+        try {
+            newStats = plan.accept(this, null);
+        } catch (Exception e) {
+            // throw exception in debug mode
+            if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().feDebug) {
+                throw e;
+            }
+            LOG.warn("stats calculation failed, plan " + plan.toString(), e);
+            // use unknown stats or the first child's stats
+            if (plan.children().isEmpty() || !(plan.child(0) instanceof 
GroupPlan)) {
+                Map<Expression, ColumnStatistic> columnStatisticMap = new 
HashMap<>();
+                for (Slot slot : plan.getOutput()) {
+                    columnStatisticMap.put(slot, 
ColumnStatistic.createUnknownByDataType(slot.getDataType()));
+                }
+                newStats = new Statistics(1, 1, columnStatisticMap);
+            } else {
+                newStats = ((GroupPlan) plan.child(0)).getStats();
+            }
+        }
         newStats.normalizeColumnStatistics();
 
         // We ensure that the rowCount remains unchanged in order to make the 
cost of each plan comparable.
+        final Statistics tmpStats = newStats;
         if (groupExpression.getOwnerGroup().getStatistics() == null) {
             boolean isReliable = 
groupExpression.getPlan().getExpressions().stream()
-                    .noneMatch(e -> 
newStats.isInputSlotsUnknown(e.getInputSlots()));
+                    .noneMatch(e -> 
tmpStats.isInputSlotsUnknown(e.getInputSlots()));
             groupExpression.getOwnerGroup().setStatsReliable(isReliable);
             groupExpression.getOwnerGroup().setStatistics(newStats);
         } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java
index 49056add395..b45610da53e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java
@@ -62,7 +62,7 @@ public class GroupPlan extends LogicalLeaf implements 
BlockFuncDepsPropagation {
 
     @Override
     public Statistics getStats() {
-        throw new IllegalStateException("GroupPlan can not invoke getStats()");
+        return group.getStatistics();
     }
 
     @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index be0019c79c9..b7985bcfa53 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2269,6 +2269,12 @@ public class SessionVariable implements Serializable, 
Writable {
                 "use other health replica when the use_fix_replica meet error" 
})
     public boolean fallbackOtherReplicaWhenFixedCorrupt = false;
 
+    public static final String FE_DEBUG = "fe_debug";
+    @VariableMgr.VarAttr(name = FE_DEBUG, needForward = true, fuzzy = true,
+            description = {"when set true, FE will throw exceptions instead 
swallow them. This is used for test",
+                    "when set true, FE will throw exceptions instead swallow 
them. This is used for test"})
+    public boolean feDebug = false;
+
     @VariableMgr.VarAttr(name = SHOW_ALL_FE_CONNECTION,
             description = {"when it's true show processlist statement list all 
fe's connection",
                     "当变量为true时,show processlist命令展示所有fe的连接"})
@@ -2496,6 +2502,7 @@ public class SessionVariable implements Serializable, 
Writable {
     @SuppressWarnings("checkstyle:Indentation")
     public void initFuzzyModeVariables() {
         Random random = new SecureRandom();
+        this.feDebug = true;
         this.parallelPipelineTaskNum = random.nextInt(8);
         this.parallelPrepareThreshold = random.nextInt(32) + 1;
         this.enableCommonExprPushdown = random.nextBoolean();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 43b1a994f72..e0e99fcdfad 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -22,6 +22,8 @@ import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.datasource.InternalCatalog;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.coercion.CharacterType;
 import org.apache.doris.persist.gson.GsonUtils;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
@@ -369,4 +371,28 @@ public class ColumnStatistic {
     public ColumnStatistic withAvgSizeByte(double avgSizeByte) {
         return new 
ColumnStatisticBuilder(this).setAvgSizeByte(avgSizeByte).build();
     }
+
+    public static ColumnStatistic createUnknownByDataType(DataType dataType) {
+        if (dataType instanceof CharacterType) {
+            return new ColumnStatisticBuilder(1)
+                    .setAvgSizeByte(Math.max(1, Math.min(dataType.width(), 
CharacterType.DEFAULT_WIDTH)))
+                    .setNdv(1)
+                    .setNumNulls(1)
+                    .setMaxValue(Double.POSITIVE_INFINITY)
+                    .setMinValue(Double.NEGATIVE_INFINITY)
+                    .setIsUnknown(true)
+                    .setUpdatedTime("")
+                    .build();
+        } else {
+            return new ColumnStatisticBuilder(1)
+                    .setAvgSizeByte(dataType.width())
+                    .setNdv(1)
+                    .setNumNulls(1)
+                    .setMaxValue(Double.POSITIVE_INFINITY)
+                    .setMinValue(Double.NEGATIVE_INFINITY)
+                    .setIsUnknown(true)
+                    .setUpdatedTime("")
+                    .build();
+        }
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java
index 91da5192b48..39ff8a10d4c 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java
@@ -447,4 +447,13 @@ class ExpressionEstimationTest {
         Assertions.assertEquals(est.avgSizeByte, 1);
         Assertions.assertEquals(est.numNulls, 1);
     }
+
+    @Test
+    public void testThrowException() {
+        SlotReference a = new SlotReference("a", StringType.INSTANCE);
+        Cast cast = new Cast(a, DateType.INSTANCE);
+        // do not throw any exception
+        ColumnStatistic est = ExpressionEstimation.estimate(cast, null);
+        Assertions.assertTrue(est.isUnKnown());
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to