github-actions[bot] commented on code in PR #64440:
URL: https://github.com/apache/doris/pull/64440#discussion_r3400425055
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java:
##########
@@ -380,20 +380,50 @@ private static boolean
isFunctionNullCheckPath(List<String> suffixPath) {
@Override
public Void visitMapContainsKey(MapContainsKey mapContainsKey,
CollectorContext context) {
+ // MAP_CONTAINS_KEY(<map>, <key>)
+ // Map argument: only the key sub-column is needed.
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_MAP_KEYS);
- return continueCollectAccessPath(mapContainsKey.getArgument(0),
context);
+ continueCollectAccessPath(mapContainsKey.getArgument(0), context);
+ // Key argument: visit with a fresh context to register its data
access paths.
+ // The key may reference nested sub-columns (e.g. element_at(s, 'a'))
whose
+ // full-data paths must be collected; otherwise an IS NULL / OFFSET
path on
+ // the same slot would cause NestedColumnPruning to prune to
metadata-only.
+ Expression keyArg = mapContainsKey.getArgument(1);
+ if (keyArg != null) {
+ CollectorContext keyCtx = new
CollectorContext(context.statementContext, context.bottomFilter);
+ continueCollectAccessPath(keyArg, keyCtx);
+ }
+ return null;
}
@Override
public Void visitMapContainsValue(MapContainsValue mapContainsValue,
CollectorContext context) {
+ // MAP_CONTAINS_VALUE(<map>, <value>)
+ // Map argument: only the value sub-column is needed.
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_MAP_VALUES);
- return continueCollectAccessPath(mapContainsValue.getArgument(0),
context);
+ continueCollectAccessPath(mapContainsValue.getArgument(0), context);
+ // Value argument: visit with a fresh context to register its data
access paths.
+ Expression valueArg = mapContainsValue.getArgument(1);
+ if (valueArg != null) {
+ CollectorContext valueCtx = new
CollectorContext(context.statementContext, context.bottomFilter);
+ continueCollectAccessPath(valueArg, valueCtx);
+ }
+ return null;
}
@Override
public Void visitMapContainsEntry(MapContainsEntry mapContainsEntry,
CollectorContext context) {
+ // MAP_CONTAINS_ENTRY(<map>, <entry>)
+ // Map argument: full access is needed (both keys and values).
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_ALL);
- return continueCollectAccessPath(mapContainsEntry.getArgument(0),
context);
+ continueCollectAccessPath(mapContainsEntry.getArgument(0), context);
+ // Entry argument: visit with a fresh context to register its data
access paths.
+ Expression entryArg = mapContainsEntry.getArgument(1);
Review Comment:
`MapContainsEntry` is a ternary function (`map_contains_entry(map, key,
value)`; the class implements `TernaryExpression` and its signature reads
`getArgument(2)`), but this visitor only collects argument 1. A query such as
`map_contains_entry(m, 'hello', element_at(s, 'b'))` together with
`element_at(s, 'b') IS NULL` still only records the `s.b.NULL` path for the
value expression and can prune `s.b` to null-only access, which is the same
wrong-result class this PR is fixing. Please visit both search arguments, for
example by collecting arguments `1..arity-1` with fresh contexts, and update
the comment to reflect the actual `(map, key, value)` shape.
##########
regression-test/suites/nereids_rules_p0/column_pruning/map_contains_arg_pruning.groovy:
##########
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Regression tests for map_contains_key / map_contains_value /
map_contains_entry
+// when the key/value/entry argument references nested sub-columns.
+//
+// Bug: visitMapContainsKey/Value/Entry only visited the map argument and
skipped
+// the key/value/entry argument. When the key is a nested sub-column expression
+// (e.g. element_at(s, 'a')) whose data path was not registered, and the same
+// sub-column also appears in IS NULL, NestedColumnPruning would prune it to
+// null-only metadata access, causing wrong results.
+
+suite("map_contains_arg_pruning") {
+ sql """ DROP TABLE IF EXISTS map_contains_arg_pruning_tbl """
+ sql """
+ CREATE TABLE map_contains_arg_pruning_tbl (
+ id INT,
+ s STRUCT<a: STRING, b: INT> NULL,
+ m MAP<STRING, INT> NULL,
+ v VARIANT NULL
+ ) ENGINE = OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+ """
+ sql """
+ INSERT INTO map_contains_arg_pruning_tbl VALUES
+ (1, named_struct('a', 'hello', 'b', 100), {'hello': 1, 'world':
2}, '{"k": 42}'),
+ (2, named_struct('a', 'doris', 'b', 200), {'doris': 3},
NULL),
+ (3, named_struct('a', null, 'b', 300), NULL,
'{"x": 1}'),
+ (4, NULL, {},
'{}')
+ """
+
+ // ================================================================
+ // Case 1: map_contains_key + element_at IS NULL (original bug)
+ // map_contains_key(m, element_at(s, 'a')) needs full access to s.a
+ // as the key lookup value. Without fix, only [s.a.NULL] from
+ // element_at(s, 'a') IS NULL is registered.
+ // ================================================================
+ explain {
+ sql """
+ SELECT id,
+ element_at(s, 'a') IS NULL,
+ map_contains_key(m, element_at(s, 'a'))
+ FROM map_contains_arg_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ contains "s.a" // s.a should appear in access
paths
+ notContains "s.a.NULL" // should NOT be null-only
+ contains "m.KEYS" // map_contains_key needs KEYS
path
+ }
+
+ order_qt_case1 """
+ SELECT id,
+ element_at(s, 'a') IS NULL,
+ map_contains_key(m, element_at(s, 'a'))
+ FROM map_contains_arg_pruning_tbl ORDER BY id
+ """
+
+ // ================================================================
+ // Case 2: map_contains_value, value arg references sub-column
+ // map_contains_value(m, element_at(s, 'b')) needs s.b as value
+ // ================================================================
+ explain {
+ sql """
+ SELECT id,
+ element_at(s, 'b') IS NULL,
+ map_contains_value(m, element_at(s, 'b'))
+ FROM map_contains_arg_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ contains "s.b" // s.b should appear in access
paths
+ notContains "s.b.NULL" // should NOT be null-only
+ }
+
+ order_qt_case2 """
+ SELECT id,
+ element_at(s, 'b') IS NULL,
+ map_contains_value(m, element_at(s, 'b'))
+ FROM map_contains_arg_pruning_tbl ORDER BY id
+ """
+
+ // ================================================================
+ // Case 3: map_contains_entry, entry arg references sub-column
+ // map_contains_entry(m, element_at(s, 'a'), element_at(s, 'b'))
+ // needs both s.a and s.b
+ // Note: map_contains_entry syntax may vary; using the 2-arg form
+ // ================================================================
+ explain {
+ sql """
+ SELECT id,
+ element_at(s, 'a') IS NULL,
+ map_contains_key(m, element_at(s, 'a')),
Review Comment:
This case is labeled as `map_contains_entry`, but the query calls
`map_contains_key` and `map_contains_value`; it never exercises
`visitMapContainsEntry`. Because the entry function is ternary
(`map_contains_entry(m, key, value)`), this test would not catch the current
missing traversal of argument 2. Please add a real `map_contains_entry(m,
element_at(s, 'a'), element_at(s, 'b'))` case and assert both nested argument
paths are collected.
##########
regression-test/suites/nereids_rules_p0/column_pruning/map_contains_arg_pruning.groovy:
##########
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Regression tests for map_contains_key / map_contains_value /
map_contains_entry
+// when the key/value/entry argument references nested sub-columns.
+//
+// Bug: visitMapContainsKey/Value/Entry only visited the map argument and
skipped
+// the key/value/entry argument. When the key is a nested sub-column expression
+// (e.g. element_at(s, 'a')) whose data path was not registered, and the same
+// sub-column also appears in IS NULL, NestedColumnPruning would prune it to
+// null-only metadata access, causing wrong results.
+
+suite("map_contains_arg_pruning") {
+ sql """ DROP TABLE IF EXISTS map_contains_arg_pruning_tbl """
+ sql """
+ CREATE TABLE map_contains_arg_pruning_tbl (
+ id INT,
+ s STRUCT<a: STRING, b: INT> NULL,
+ m MAP<STRING, INT> NULL,
+ v VARIANT NULL
+ ) ENGINE = OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+ """
+ sql """
+ INSERT INTO map_contains_arg_pruning_tbl VALUES
+ (1, named_struct('a', 'hello', 'b', 100), {'hello': 1, 'world':
2}, '{"k": 42}'),
+ (2, named_struct('a', 'doris', 'b', 200), {'doris': 3},
NULL),
+ (3, named_struct('a', null, 'b', 300), NULL,
'{"x": 1}'),
+ (4, NULL, {},
'{}')
+ """
+
+ // ================================================================
+ // Case 1: map_contains_key + element_at IS NULL (original bug)
+ // map_contains_key(m, element_at(s, 'a')) needs full access to s.a
+ // as the key lookup value. Without fix, only [s.a.NULL] from
+ // element_at(s, 'a') IS NULL is registered.
+ // ================================================================
+ explain {
+ sql """
+ SELECT id,
+ element_at(s, 'a') IS NULL,
+ map_contains_key(m, element_at(s, 'a'))
+ FROM map_contains_arg_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ contains "s.a" // s.a should appear in access
paths
+ notContains "s.a.NULL" // should NOT be null-only
+ contains "m.KEYS" // map_contains_key needs KEYS
path
+ }
+
+ order_qt_case1 """
Review Comment:
This new suite adds `order_qt_case1`/`case2`/`case3`, but the PR does not
add
`regression-test/data/nereids_rules_p0/column_pruning/map_contains_arg_pruning.out`.
In normal regression mode `quickRunTest` throws `Missing outputFile` when the
suite `.out` file is absent, so `run-regression-test.sh -d
nereids_rules_p0/column_pruning -s map_contains_arg_pruning` will fail unless
run with output generation enabled. Please add the generated `.out` file for
these quick-test blocks.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]