This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c74a21c7af8 [fix](mv) Fix sync mv could not be chosen by cbo stable
because stats upload from be not in time (#58720)
c74a21c7af8 is described below
commit c74a21c7af88a9e53e202b743899aeab4c87b227
Author: seawinde <[email protected]>
AuthorDate: Fri Dec 5 18:01:27 2025 +0800
[fix](mv) Fix sync mv could not be chosen by cbo stable because stats
upload from be not in time (#58720)
This PR introduces two key modifications to improve the reliability of
the optimizer (CBO) and the testing pipeline concerning materialized
views:
1. Fixes unstable selection of synchronous materialized views in the
pipeline: The root cause was that the statistical information of the
base table was not reported promptly. This delay could lead the CBO to
estimate selectedPartitionsRowCountas zero for the materialized view,
causing it to be incorrectly bypassed during query planning. The fix
involves manually injecting the statistical information for the
synchronous materialized view when available, ensuring the CBO can make
a stable and accurate assessment of its cost .
2. Removes the fuzzy value setting for
preMaterializedViewRewriteStrategyin SessionVariable: The test cases
for the TRY_IN_RBO strategy have already been extended to cover
scenarios equivalent to both NOT_IN_RBO and FORCE_IN_RBO. Retaining the
fuzzy option can introduce unnecessary instability into the pipeline.
For instance, a PR might pass tests when the strategy is TRY_IN_RBO but
fail if the pipeline randomly selects NOT_IN_RBO, leading to
inconsistent results. Removing this fuzzy setting helps eliminate such
non-deterministic failures and enhances pipeline stability
---
.../org/apache/doris/nereids/stats/StatsCalculator.java | 6 ++++++
.../main/java/org/apache/doris/qe/SessionVariable.java | 15 ---------------
.../mv_p0/ssb/multiple_no_where/multiple_no_where.groovy | 2 ++
.../suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy | 4 ++++
.../ssb/multiple_ssb_between/multiple_ssb_between.groovy | 6 +++++-
regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy | 1 +
regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy | 1 +
regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy | 1 +
regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy | 1 +
regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy | 1 +
10 files changed, 22 insertions(+), 16 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 0d71f690676..652a4801eb2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -543,6 +543,12 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
optStats.isPresent(), tableRowCount,
olapTable.getQualifiedName());
if (optStats.isPresent()) {
double selectedPartitionsRowCount =
getSelectedPartitionRowCount(olapScan, tableRowCount);
+ if (isRegisteredRowCount(olapScan)) {
+ // If a row count is injected for the materialized view,
use it to fix the issue where
+ // the materialized view cannot be selected by cbo stable
due to selectedPartitionsRowCount being 0,
+ // which is caused by delayed statistics reporting.
+ selectedPartitionsRowCount = tableRowCount;
+ }
LOG.info("computeOlapScan optStats is {},
selectedPartitionsRowCount is {}", optStats.get(),
selectedPartitionsRowCount);
// if estimated mv rowCount is more than actual row count,
fall back to base table stats
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 0f23ab57185..3fe022b2244 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -32,7 +32,6 @@ import org.apache.doris.nereids.metrics.Event;
import org.apache.doris.nereids.metrics.EventSwitchParser;
import org.apache.doris.nereids.parser.Dialect;
import org.apache.doris.nereids.rules.RuleType;
-import
org.apache.doris.nereids.rules.exploration.mv.PreMaterializedViewRewriter.PreRewriteStrategy;
import org.apache.doris.nereids.rules.expression.ExpressionRuleType;
import org.apache.doris.planner.GroupCommitBlockSink;
import org.apache.doris.qe.VariableMgr.VarAttr;
@@ -3301,20 +3300,6 @@ public class SessionVariable implements Serializable,
Writable {
this.enableReserveMemory = randomInt % 5 != 0;
}
- // random pre materialized view rewrite strategy
- randomInt = random.nextInt(3);
- switch (randomInt % 3) {
- case 0:
- this.preMaterializedViewRewriteStrategy =
PreRewriteStrategy.NOT_IN_RBO.name();
- break;
- case 1:
- this.preMaterializedViewRewriteStrategy =
PreRewriteStrategy.TRY_IN_RBO.name();
- break;
- case 2:
- default:
- this.preMaterializedViewRewriteStrategy =
PreRewriteStrategy.FORCE_IN_RBO.name();
- break;
- }
setFuzzyForCatalog(random);
}
diff --git
a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy
b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy
index a7b3145dddc..0213a993be0 100644
---
a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy
+++
b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy
@@ -107,6 +107,8 @@ suite ("multiple_no_where") {
qt_select_star "select * from lineorder_flat order by 1,2, P_MFGR;"
sql """analyze table lineorder_flat with sync;"""
+ sql """alter table lineorder_flat modify column C_CITY set stats
('row_count'='7');"""
+ sql """alter table lineorder_flat modify column a3 set stats
('row_count'='1');"""
sql """set enable_stats=false;"""
mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
diff --git a/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy
b/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy
index 22e53a58a2d..97f31ed3b58 100644
--- a/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy
+++ b/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy
@@ -154,6 +154,10 @@ suite ("multiple_ssb") {
sql """analyze table lineorder_flat with sync;"""
sql """alter table lineorder_flat modify column LO_ORDERDATE set stats
('row_count'='8');"""
+ sql """alter table lineorder_flat modify column a1 set stats
('row_count'='1');"""
+ sql """alter table lineorder_flat modify column a4 set stats
('row_count'='1');"""
+ sql """alter table lineorder_flat modify column a6 set stats
('row_count'='1');"""
+ sql """alter table lineorder_flat modify column x2 set stats
('row_count'='1');"""
sql """set enable_stats=false;"""
mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
diff --git
a/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy
b/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy
index 2eec2f8907a..6127b97dbf8 100644
---
a/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy
+++
b/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy
@@ -153,7 +153,11 @@ suite ("multiple_ssb_between") {
sql """set enable_stats=true;"""
sql """alter table lineorder_flat modify column LO_ORDERDATE set stats
('row_count'='8');"""
-
+ sql """alter table lineorder_flat modify column a1 set stats
('row_count'='1');"""
+ sql """alter table lineorder_flat modify column a4 set stats
('row_count'='1');"""
+ sql """alter table lineorder_flat modify column a6 set stats
('row_count'='1');"""
+ sql """alter table lineorder_flat modify column x2 set stats
('row_count'='1');"""
+
mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE
diff --git a/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy
b/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy
index d5abacca118..2ba3eb6d9b4 100644
--- a/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy
+++ b/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy
@@ -102,6 +102,7 @@ suite ("mv_ssb_q_1_1") {
sql "analyze table lineorder_flat with sync;"
sql """alter table lineorder_flat modify column C_CITY set stats
('row_count'='6');"""
+ sql """alter table lineorder_flat modify column a1 set stats
('row_count'='1');"""
mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE
diff --git a/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy
b/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy
index c40d2842228..e40c84f9b1f 100644
--- a/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy
+++ b/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy
@@ -94,6 +94,7 @@ suite ("mv_ssb_q_2_1") {
qt_select_star "select * from lineorder_flat order by 1,2,P_MFGR;"
sql """alter table lineorder_flat modify column LO_ORDERDATE set stats
('row_count'='2');"""
+ sql """alter table lineorder_flat modify column a2 set stats
('row_count'='1');"""
mv_rewrite_success("""SELECT
SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR,
diff --git a/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy
b/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy
index 564a5a1dbc7..0497c3609fd 100644
--- a/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy
+++ b/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy
@@ -99,6 +99,7 @@ suite ("mv_ssb_q_3_1") {
sql """analyze table lineorder_flat with sync;"""
sql """alter table lineorder_flat modify column LO_ORDERDATE set stats
('row_count'='2');"""
+ sql """alter table lineorder_flat modify column a1 set stats
('row_count'='1');"""
mv_rewrite_success("""SELECT
C_NATION,
diff --git a/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy
b/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy
index b5fa2332a9f..0f0bbc50a71 100644
--- a/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy
+++ b/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy
@@ -93,6 +93,7 @@ suite ("mv_ssb_q_4_1") {
sql """analyze table lineorder_flat with sync;"""
sql """alter table lineorder_flat modify column LO_ORDERDATE set stats
('row_count'='2');"""
+ sql """alter table lineorder_flat modify column a2 set stats
('row_count'='1');"""
mv_rewrite_success("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
C_NATION,
diff --git a/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy
b/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy
index e36cd614520..1b9ed83d4ff 100644
--- a/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy
+++ b/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy
@@ -96,6 +96,7 @@ suite ("q_4_1_r1") {
sql """analyze table lineorder_flat with sync;"""
sql """alter table lineorder_flat modify column LO_ORDERDATE set stats
('row_count'='8');"""
+ sql """alter table lineorder_flat modify column a1 set stats
('row_count'='1');"""
sql """set enable_stats=false;"""
mv_rewrite_success("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]