This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new c7f20198e93 [fix](mv) Fix stats unknown when calc sync mv plan
statistics (#58426) (#59232)
c7f20198e93 is described below
commit c7f20198e9339dadc3e9b4b3ca1ddbe48729d099
Author: seawinde <[email protected]>
AuthorDate: Tue Dec 23 16:49:40 2025 +0800
[fix](mv) Fix stats unknown when calc sync mv plan statistics (#58426)
(#59232)
pr: #58426
commitId: f6cae1d8
---
.../doris/nereids/stats/StatsCalculator.java | 4 +-
.../java/org/apache/doris/qe/SessionVariable.java | 2 +-
.../apache/doris/statistics/StatisticsCache.java | 10 +--
.../doris/nereids/stats/StatsCalculatorTest.java | 36 +++++++++
.../doris/statistics/StatisticsCacheTest.java | 90 ++++++++++++++++++++++
.../availability/materialized_view_switch.groovy | 16 +++-
6 files changed, 147 insertions(+), 11 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 652a4801eb2..fc95e3a4078 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -1155,7 +1155,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
}
private ColumnStatistic getColumnStatistic(TableIf table, String colName,
long idxId) {
- if (connectContext != null && connectContext.getState().isInternal()) {
+ if (connectContext != null &&
connectContext.getState().isPlanWithUnKnownColumnStats()) {
return ColumnStatistic.UNKNOWN;
}
long catalogId;
@@ -1187,7 +1187,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
private ColumnStatistic getColumnStatistic(
OlapTableStatistics olapTableStatistics, String colName,
List<String> partitionNames) {
- if (connectContext != null && connectContext.getState().isInternal()) {
+ if (connectContext != null &&
connectContext.getState().isPlanWithUnKnownColumnStats()) {
return ColumnStatistic.UNKNOWN;
}
OlapTable table = olapTableStatistics.olapTable;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index e9a1b534c8b..acd0f33aace 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2495,7 +2495,7 @@ public class SessionVariable implements Serializable,
Writable {
public boolean enableMaterializedViewRewrite = true;
@VariableMgr.VarAttr(name = PRE_MATERIALIZED_VIEW_REWRITE_STRATEGY,
needForward = true, fuzzy = true,
- description = {"在 RBO 阶段基于结构信息的物化视图透明改写的策略,FORCE_IN_ROB:强制在 RBO
阶段透明改写,"
+ description = {"在 RBO 阶段基于结构信息的物化视图透明改写的策略,FORCE_IN_RBO:强制在 RBO
阶段透明改写,"
+ "TRY_IN_RBO:如果在 NEED_PRE_REWRITE_RULE_TYPES
中的规则改写成功了,那么就会尝试在 RBO 阶段透明改写"
+ "NOT_IN_RBO:不尝试在 RBO 阶段改写,只在 CBO 阶段改写",
"Whether to enable pre materialized view rewriting based
on struct info,"
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
index 4ba53cf1d91..e8a5c519660 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
@@ -93,7 +93,7 @@ public class StatisticsCache {
public ColumnStatistic getColumnStatistics(
long catalogId, long dbId, long tblId, long idxId, String colName,
ConnectContext ctx) {
- if (ctx != null && ctx.getState().isInternal()) {
+ if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) {
return ColumnStatistic.UNKNOWN;
}
// Need to change base index id to -1 for OlapTable.
@@ -130,7 +130,7 @@ public class StatisticsCache {
public PartitionColumnStatistic getPartitionColumnStatistics(long
catalogId, long dbId, long tblId, long idxId,
String partName, String
colName, ConnectContext ctx) {
- if (ctx != null && ctx.getState().isInternal()) {
+ if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) {
return PartitionColumnStatistic.UNKNOWN;
}
// Need to change base index id to -1 for OlapTable.
@@ -178,7 +178,7 @@ public class StatisticsCache {
private Optional<Histogram> getHistogram(long ctlId, long dbId, long
tblId, long idxId, String colName) {
ConnectContext ctx = ConnectContext.get();
- if (ctx != null && ctx.getState().isInternal()) {
+ if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) {
return Optional.empty();
}
StatisticsCacheKey k = new StatisticsCacheKey(ctlId, dbId, tblId,
idxId, colName);
@@ -398,7 +398,7 @@ public class StatisticsCache {
// this method can avoid compute table and select index id
public ColumnStatistic getColumnStatistics(String colName,
ConnectContext ctx) {
- if (ctx != null && ctx.getState().isInternal()) {
+ if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) {
return ColumnStatistic.UNKNOWN;
}
return doGetColumnStatistics(
@@ -408,7 +408,7 @@ public class StatisticsCache {
public PartitionColumnStatistic getPartitionColumnStatistics(
String partName, String colName, ConnectContext ctx) {
- if (ctx != null && ctx.getState().isInternal()) {
+ if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) {
return PartitionColumnStatistic.UNKNOWN;
}
return doGetPartitionColumnStatistics(
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
index c5d2f2fa737..075e3731977 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
@@ -51,6 +51,7 @@ import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.util.LogicalPlanBuilder;
import org.apache.doris.nereids.util.MemoTestUtils;
import org.apache.doris.nereids.util.PlanConstructor;
+import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
@@ -572,4 +573,39 @@ public class StatsCalculatorTest {
// because table row count is -1, so disable join reorder
Assertions.assertTrue(cascadesContext.getConnectContext().getSessionVariable().isDisableJoinReorder());
}
+
+ @Test
+ public void testOlapScanWithPlanWithUnknownColumnStats() {
+ boolean prevFlag = false;
+ if (ConnectContext.get() != null) {
+ prevFlag =
ConnectContext.get().getState().isPlanWithUnKnownColumnStats();
+
ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true);
+ }
+ try {
+ long tableId1 = 100;
+ OlapTable table1 = PlanConstructor.newOlapTable(tableId1,
"t_unknown", 0);
+ List<String> qualifier = ImmutableList.of("test", "t");
+ SlotReference slot1 = new SlotReference(new ExprId(0), "c1",
IntegerType.INSTANCE, true, qualifier,
+ table1, new Column("c1", PrimitiveType.INT),
+ table1, new Column("c1", PrimitiveType.INT));
+
+ LogicalOlapScan logicalOlapScan1 = (LogicalOlapScan) new
LogicalOlapScan(
+ StatementScopeIdGenerator.newRelationId(), table1,
+
Collections.emptyList()).withGroupExprLogicalPropChildren(Optional.empty(),
+ Optional.of(new LogicalProperties(() ->
ImmutableList.of(slot1), () -> DataTrait.EMPTY_TRAIT)), ImmutableList.of());
+
+ GroupExpression groupExpression = new
GroupExpression(logicalOlapScan1, ImmutableList.of());
+ Group ownerGroup = new Group(null, groupExpression, null);
+ StatsCalculator.estimate(groupExpression, null);
+ Statistics stats = ownerGroup.getStatistics();
+ Assertions.assertEquals(1, stats.columnStatistics().size());
+ ColumnStatistic colStat = stats.columnStatistics().get(slot1);
+ Assertions.assertTrue(colStat.isUnKnown);
+ } finally {
+ if (ConnectContext.get() != null) {
+
ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag);
+ }
+ }
+ }
+
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java
new file mode 100644
index 00000000000..cb1dffbe106
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.utframe.UtFrameUtils;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class StatisticsCacheTest {
+
+ private ConnectContext ctx;
+
+ @BeforeEach
+ public void setUp() throws Exception {
+ if (ConnectContext.get() == null) {
+ ctx = UtFrameUtils.createDefaultCtx();
+ } else {
+ ctx = ConnectContext.get();
+ }
+ }
+
+ @Test
+ public void testGetColumnStatistics_withPlanWithUnknownColumnStats() {
+ Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext
not available");
+
+ boolean prevFlag =
ConnectContext.get().getState().isPlanWithUnKnownColumnStats();
+ ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true);
+ try {
+ StatisticsCache cache = new StatisticsCache();
+ ColumnStatistic stat = cache.getColumnStatistics(
+ 1L, 1L, 1L, -1L, "col", ConnectContext.get());
+ Assertions.assertEquals(ColumnStatistic.UNKNOWN, stat,
+ "Expect UNKNOWN when plan has unknown column stats");
+ } finally {
+
ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag);
+ }
+ }
+
+ @Test
+ public void testGetHistogram_withPlanWithUnknownColumnStats() {
+ Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext
not available");
+
+ boolean prevFlag =
ConnectContext.get().getState().isPlanWithUnKnownColumnStats();
+ ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true);
+ try {
+ StatisticsCache cache = new StatisticsCache();
+ // public getHistogram returns null when underlying optional is
empty
+ Histogram hist = cache.getHistogram(1L, 1L, 1L, "col");
+ Assertions.assertNull(hist, "Expect null histogram when plan has
unknown column stats");
+ } finally {
+
ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag);
+ }
+ }
+
+ @Test
+ public void
testGetPartitionColumnStatistics_withPlanWithUnknownColumnStats() {
+ Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext
not available");
+
+ boolean prevFlag =
ConnectContext.get().getState().isPlanWithUnKnownColumnStats();
+ ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true);
+ try {
+ StatisticsCache cache = new StatisticsCache();
+ PartitionColumnStatistic pstat =
cache.getPartitionColumnStatistics(
+ 1L, 1L, 1L, -1L, "p", "col", ConnectContext.get());
+ Assertions.assertEquals(PartitionColumnStatistic.UNKNOWN, pstat,
+ "Expect UNKNOWN partition col stat when plan has unknown
column stats");
+ } finally {
+
ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag);
+ }
+ }
+}
diff --git
a/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy
b/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy
index 4af1d778c00..acfc699bbe3 100644
---
a/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy
+++
b/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy
@@ -143,7 +143,10 @@ suite("materialized_view_switch") {
where o_orderdate = '2023-12-10' order by 1, 2, 3, 4, 5;
"""
- async_mv_rewrite_success(db, mv_name, query, "mv_name_1")
+ async_mv_rewrite_success(db, mv_name, query, "mv_name_1", [NOT_IN_RBO])
+ // because compare total tree, mv fitler can not push down to scan base
table in RBO mv rewrite as CBO mv prewrite,
+ // row count would be bigger than before
+ async_mv_rewrite_success_without_check_chosen(db, mv_name, query,
"mv_name_1", [TRY_IN_RBO, FORCE_IN_RBO])
sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_1"""
sql "SET enable_materialized_view_rewrite=false"
@@ -152,7 +155,10 @@ suite("materialized_view_switch") {
sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_2"""
sql "SET enable_materialized_view_rewrite=true"
- async_mv_rewrite_success(db, mv_name, query, "mv_name_3")
+ async_mv_rewrite_success(db, mv_name, query, "mv_name_3", [NOT_IN_RBO])
+ // because compare total tree, mv fitler can not push down to scan base
table in RBO mv rewrite as CBO mv prewrite,
+ // row count would be bigger than before
+ async_mv_rewrite_success_without_check_chosen(db, mv_name, query,
"mv_name_3", [TRY_IN_RBO, FORCE_IN_RBO])
sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_3"""
// test when materialized_view_relation_mapping_max_count is 8
@@ -167,7 +173,11 @@ suite("materialized_view_switch") {
inner join lineitem t2 on t1.L_ORDERKEY = t2.L_ORDERKEY;
"""
order_qt_query1_0_before "${query1_0}"
- async_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0")
+ async_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0", [NOT_IN_RBO])
+ // because compare total tree, mv fitler can not push down to scan base
table in RBO mv rewrite as CBO mv prewrite,
+ // row count would be bigger than before
+ async_mv_rewrite_success_without_check_chosen(db, mv1_0, query1_0,
"mv1_0", [TRY_IN_RBO, FORCE_IN_RBO])
+
order_qt_query1_0_after "${query1_0}"
sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]