github-actions[bot] commented on code in PR #64559:
URL: https://github.com/apache/doris/pull/64559#discussion_r3488871516


##########
fe/fe-core/src/test/java/org/apache/doris/nereids/stats/MemoStatsAndCostRecomputerTest.java:
##########
@@ -0,0 +1,477 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.common.IdGenerator;
+import org.apache.doris.nereids.memo.Group;
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.memo.GroupId;
+import org.apache.doris.nereids.properties.DataTrait;
+import org.apache.doris.nereids.properties.LogicalProperties;
+import org.apache.doris.nereids.trees.expressions.EqualTo;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.plans.JoinType;
+import org.apache.doris.nereids.trees.plans.RelationId;
+import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.statistics.ColumnStatistic;
+import org.apache.doris.statistics.ColumnStatisticBuilder;
+import org.apache.doris.statistics.Statistics;
+import org.apache.doris.statistics.StatisticsBuilder;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Tests for MemoStatsAndCostRecomputer trust join counting.
+ */
+public class MemoStatsAndCostRecomputerTest {
+
+    private MemoStatsAndCostRecomputer recomputer;
+    private IdGenerator<GroupId> idGenerator;
+
+    @BeforeEach
+    public void setUp() {
+        recomputer = new MemoStatsAndCostRecomputer(null, null);
+        idGenerator = GroupId.createGenerator();
+    }
+
+    // Use LogicalOneRowRelation as a leaf plan — it's a real concrete plan 
class
+    // that doesn't need catalog setup and is not a Join (so isTrustJoin 
returns false).
+
+    /**
+     * Create a leaf Group (no join) with row count statistics.
+     */
+    private Group createLeafGroup(int rowCount) {
+        LogicalOneRowRelation leafPlan = new LogicalOneRowRelation(
+                new RelationId(idGenerator.getNextId().asInt()), 
ImmutableList.of());
+        GroupExpression leafExpr = new GroupExpression(leafPlan);
+        Group group = new Group(idGenerator.getNextId(), leafExpr,
+                new LogicalProperties(ArrayList::new, () -> 
DataTrait.EMPTY_TRAIT));
+        group.setStatistics(new 
StatisticsBuilder().setRowCount(rowCount).build());
+        return group;
+    }
+
+    /**
+     * Create a Group containing a LogicalJoin with the given children, join 
type,
+     * equal predicates, and statistics for the children.
+     *
+     * The created Group contains a single logical expression (the join).
+     * Child Groups must already have statistics set.
+     */
+    private Group createJoinGroup(Group leftChild, Group rightChild,
+            JoinType joinType, List<SlotReference> leftKeys, 
List<SlotReference> rightKeys,
+            ColumnStatistic leftKeyStats, ColumnStatistic rightKeyStats) {
+        // Build equal predicates
+        List<EqualTo> equalPredicates = new ArrayList<>();
+        for (int i = 0; i < leftKeys.size(); i++) {
+            equalPredicates.add(new EqualTo(leftKeys.get(i), 
rightKeys.get(i)));
+        }
+
+        // Set column statistics on child groups
+        Statistics leftStats = leftChild.getStatistics();
+        if (leftStats != null && leftKeyStats != null) {
+            for (SlotReference key : leftKeys) {
+                leftStats.addColumnStats(key, leftKeyStats);
+            }
+        }
+        Statistics rightStats = rightChild.getStatistics();
+        if (rightStats != null && rightKeyStats != null) {
+            for (SlotReference key : rightKeys) {
+                rightStats.addColumnStats(key, rightKeyStats);
+            }
+        }
+
+        LogicalJoin join = new LogicalJoin(joinType, equalPredicates,
+                leftChild.getGroupPlan(), rightChild.getGroupPlan(), null);
+        GroupExpression joinExpr = new GroupExpression(join, 
Lists.newArrayList(leftChild, rightChild));
+        Group joinGroup = new Group(idGenerator.getNextId(), joinExpr,
+                new LogicalProperties(ArrayList::new, () -> 
DataTrait.EMPTY_TRAIT));
+        return joinGroup;
+    }
+
+    /**
+     * Create a Group containing the given logical expressions (multiple 
alternatives).
+     */
+    private Group createGroupWithAlternatives(List<GroupExpression> 
expressions) {
+        Assertions.assertFalse(expressions.isEmpty());
+        Group group = new Group(idGenerator.getNextId(), expressions.get(0),
+                new LogicalProperties(ArrayList::new, () -> 
DataTrait.EMPTY_TRAIT));
+        for (int i = 1; i < expressions.size(); i++) {
+            group.addLogicalExpression(expressions.get(i));
+        }
+        return group;
+    }
+
+    // ========== Test cases ==========
+
+    @Test
+    public void testIsTrustJoin_knownStats_returnsTrue() {
+        SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+        SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+
+        Group leftGroup = createLeafGroup(100);
+        Group rightGroup = createLeafGroup(80);
+
+        // NDV/rowCount = 95/100 = 0.95 > 0.9 threshold → trustable
+        ColumnStatistic trustableStats = new ColumnStatisticBuilder(100)
+                .setNdv(95).setAvgSizeByte(4).setNumNulls(0)
+                .setMinValue(1).setMaxValue(100).build();
+
+        Group joinGroup = createJoinGroup(leftGroup, rightGroup,
+                JoinType.INNER_JOIN,
+                ImmutableList.of(a), ImmutableList.of(b),
+                trustableStats, trustableStats);
+
+        GroupExpression joinExpr = joinGroup.getFirstLogicalExpression();
+        Assertions.assertTrue(recomputer.isTrustJoin(joinExpr));
+    }
+
+    @Test
+    public void testIsTrustJoin_crossJoin_returnsFalse() {
+        Group leftGroup = createLeafGroup(100);
+        Group rightGroup = createLeafGroup(80);
+
+        // Cross join has no equal predicates
+        LogicalJoin crossJoin = new LogicalJoin(JoinType.CROSS_JOIN,
+                ImmutableList.of(), leftGroup.getGroupPlan(), 
rightGroup.getGroupPlan(), null);
+        GroupExpression crossJoinExpr = new GroupExpression(crossJoin,
+                Lists.newArrayList(leftGroup, rightGroup));
+        Group crossJoinGroup = new Group(idGenerator.getNextId(), 
crossJoinExpr,
+                new LogicalProperties(ArrayList::new, () -> 
DataTrait.EMPTY_TRAIT));
+
+        GroupExpression expr = crossJoinGroup.getFirstLogicalExpression();
+        Assertions.assertFalse(recomputer.isTrustJoin(expr));
+    }
+
+    @Test
+    public void testIsTrustJoin_unknownStats_returnsFalse() {
+        SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+        SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+
+        Group leftGroup = createLeafGroup(100);
+        Group rightGroup = createLeafGroup(80);
+
+        // NDV/rowCount = 10/100 = 0.1 < 0.9 threshold → not trustable
+        ColumnStatistic lowNdvStats = new ColumnStatisticBuilder(100)

Review Comment:
   This test still does not cover the unknown-stats case that the production 
guard is fixing. `lowNdvStats` is a normal known statistic (`isUnKnown=false`), 
and with the current 100/80 child row counts the NDV ratio stays below the 
trust threshold even if the `eqLeftColStats.isUnKnown || 
eqRightColStats.isUnKnown` check in 
`JoinEstimation.hasTrustableEqualCondition()` is removed. The regression needs 
to use `ColumnStatistic.UNKNOWN` (or `setIsUnknown(true)`) on a one-row side, 
e.g. `leftGroup = createLeafGroup(1)`, so the old behavior would evaluate `1 / 
nonZeroDivisor(1) > 0.9` and incorrectly mark the join as trusted.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to