github-actions[bot] commented on code in PR #64535:
URL: https://github.com/apache/doris/pull/64535#discussion_r3418223773


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java:
##########
@@ -409,49 +338,160 @@ && 
containsDataSkippingOnlyAccessPath(collectAccessPathResults)) {
         return result;
     }
 
-    private static boolean containsDataSkippingOnlyAccessPath(
+    private static boolean containsMetaPath(
             List<CollectAccessPathResult> collectAccessPathResults) {
         for (CollectAccessPathResult collectAccessPathResult : 
collectAccessPathResults) {
-            if 
(isDataSkippingOnlyAccessPath(collectAccessPathResult.getPath())) {
+            if (isMetaPath(collectAccessPathResult.getPath())) {
                 return true;
             }
         }
         return false;
     }
 
-    private static boolean isDataSkippingOnlyAccessPath(List<String> path) {
+    private static boolean isMetaPath(List<String> path) {
         if (path.isEmpty()) {
             return false;
         }
         String lastComponent = path.get(path.size() - 1);
         return AccessPathInfo.ACCESS_NULL.equals(lastComponent)
-                || AccessPathInfo.ACCESS_STRING_OFFSET.equals(lastComponent);
+                || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent);
+    }
+
+    private static void normalizeMapValueMetaOnlyAccessPaths(
+            Slot slot, DataTypeAccessTree accessTree,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
accessPaths) {
+        int slotId = slot.getExprId().asInt();
+        normalizeMapValueMetaPathHelper(slotId,
+                
accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_OFFSET),
+                accessPaths, AccessPathInfo.ACCESS_OFFSET);
+        normalizeMapValueMetaPathHelper(slotId,
+                
accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_NULL),
+                accessPaths, AccessPathInfo.ACCESS_NULL);
     }
 
     /**
-     * Decide whether an OFFSET-suffix path can be removed because another 
non-OFFSET path
-     * already covers the same container.
+     * Normalize map value meta-only (OFFSET or NULL) star access paths for a 
single meta type.
+     * <p>For each map prefix where {@code [prefix, *, META]} exists, replaces 
it with
+     * {@code [prefix, KEYS]} plus {@code [prefix, VALUES, META]}, so that 
keys are read fully
+     * for element lookup while values only read the metadata.
+     */
+    private static void normalizeMapValueMetaPathHelper(
+            int slotId, List<List<String>> mapPrefixes,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
accessPaths,
+            String metaSuffix) {
+        if (mapPrefixes.isEmpty()) {
+            return;
+        }
+
+        Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths = 
accessPaths.get(slotId);
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new 
ArrayList<>();
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new 
ArrayList<>();
+        for (List<String> mapPrefix : mapPrefixes) {
+            List<String> starMetaPath = new ArrayList<>(mapPrefix);
+            starMetaPath.add(AccessPathInfo.ACCESS_ALL);
+            starMetaPath.add(metaSuffix);
+
+            for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
+                if (!p.second.equals(starMetaPath)) {
+                    continue;
+                }
+                pathsToRemove.add(p);
+
+                List<String> keysPath = new ArrayList<>(mapPrefix);
+                keysPath.add(AccessPathInfo.ACCESS_MAP_KEYS);
+                pathsToAdd.add(Pair.of(p.first, keysPath));
+
+                List<String> valuesMetaPath = new ArrayList<>(mapPrefix);
+                valuesMetaPath.add(AccessPathInfo.ACCESS_MAP_VALUES);
+                valuesMetaPath.add(metaSuffix);
+                pathsToAdd.add(Pair.of(p.first, valuesMetaPath));
+            }
+        }
+        slotPaths.removeAll(pathsToRemove);
+        slotPaths.addAll(pathsToAdd);
+    }
+
+    /**
+     * Strip redundant metadata-only NULL/OFFSET paths while keeping enough 
real paths for BE readers.
+     *
+     * <p>The strip cases are:
+     * <ul>
+     *   <li>Map element lookup with value meta-only (OFFSET or NULL) access 
is normalized first,
+     *       e.g. {@code [m, *, OFFSET]} becomes {@code [m, KEYS]} plus
+     *       {@code [m, VALUES, OFFSET]}, and {@code [m, *, NULL]} becomes
+     *       {@code [m, KEYS]} plus {@code [m, VALUES, NULL]}. This is handled 
by
+     *       {@link #normalizeMapValueMetaOnlyAccessPaths}.</li>
+     *   <li>Full/data access covers metadata access, e.g. {@code [a]} removes
+     *       {@code [a, NULL]} and {@code [a, OFFSET]}. This is handled by
+     *       {@link #stripExactCoveredDataSkippingSuffixPaths}.</li>
+     *   <li>A deeper data access covers an upper metadata path, e.g. {@code 
[a, b, c]}
+     *       removes {@code [a, b, NULL]}, and {@code [a, *, field]} removes
+     *       {@code [a, OFFSET]}. Exact NULL/data coverage is handled by
+     *       {@link #stripNullSuffixPaths}; OFFSET/container coverage is 
handled by
+     *       {@link #stripCoveredOffsetByDataPaths}.</li>
+     *   <li>A deeper OFFSET access covers an upper OFFSET path, e.g.
+     *       {@code [a, *, OFFSET]} removes {@code [a, OFFSET]}. This is 
handled by
+     *       {@link #stripCoveredOffsetByDeeperOffsetPaths}.</li>
+     *   <li>OFFSET access covers NULL access with the same prefix, e.g.
+     *       {@code [a, OFFSET]} removes {@code [a, NULL]}. This is handled by
+     *       {@link #stripNullSuffixPaths}.</li>
+     *   <li>A deeper NULL access covers an upper NULL path, e.g.
+     *       {@code [a, *, NULL]} removes {@code [a, NULL]}. This is handled by
+     *       {@link #stripNullSuffixPaths}.</li>
+     *   <li>Map value-side coverage may need a supplemental key path, e.g.
+     *       {@code [m, *, OFFSET]} plus {@code [m, VALUES]} becomes
+     *       {@code [m, KEYS]} plus {@code [m, VALUES]}. This is handled by
+     *       {@link #stripCoveredOffsetByDataPaths} via {@link 
#compareOffsetPrefixCoverage}
+     *       and {@link #buildMapKeysOnlyPath}.</li>
+     *   <li>Array NULL-only paths covered by value/data access may also need 
supplemental
+     *       map keys, e.g. {@code [m, *, NULL]} plus {@code [m, VALUES, *, 
field]}
+     *       becomes {@code [m, KEYS]} plus {@code [m, VALUES, *, field]}. 
This is
+     *       handled by {@link #stripCoveredArrayNullSuffixPaths}.</li>
+     * </ul>
+     */
+    private static void stripCoveredMetaSuffixPaths(
+            Slot slot, Multimap<Integer, Pair<ColumnAccessPathType, 
List<String>>> targetAccessPaths,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
coveringAccessPaths) {
+        stripExactCoveredDataSkippingSuffixPaths(slot, targetAccessPaths, 
coveringAccessPaths);
+        stripCoveredOffsetByDataPaths(slot, targetAccessPaths, 
coveringAccessPaths);
+        stripCoveredOffsetByDeeperOffsetPaths(slot, targetAccessPaths, 
coveringAccessPaths);

Review Comment:
   This still leaves an invalid `OFFSET` + deeper `NULL` combination. Reduced 
plan:
   
   ```text
   Project(cardinality(a), is_null(element_at(a, 1)))
     OlapScan(a: ARRAY<ARRAY<INT>>)
   ```
   
   `AccessPathExpressionCollector` emits `[a, OFFSET]` for `cardinality(a)` and 
`[a, *, NULL]` for the `IS NULL`. `stripCoveredOffsetByDataPaths` sees no data 
path, and `stripCoveredOffsetByDeeperOffsetPaths` only considers deeper 
`OFFSET` paths, so both paths survive this cleanup. On the BE side, 
`ArrayFileColumnIterator::set_access_paths` strips the root name and 
`_check_and_set_meta_read_mode` sees `OFFSET`, sets the root array to 
`OFFSET_ONLY`, and skips `_item_iterator`; the `[*, NULL]` path is then never 
applied. The null predicate is evaluated from default-filled inner arrays/null 
maps and can return wrong results.
   
   Please make deeper `NULL` paths cover the shallower `OFFSET` path for the 
same container prefix, and add a regression such as `select cardinality(a), 
element_at(a, 1) is null from nested_array_tbl` that asserts `[a, OFFSET]` is 
stripped while `[a, *, NULL]` remains.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to