github-actions[bot] commented on code in PR #63736:
URL: https://github.com/apache/doris/pull/63736#discussion_r3310758931
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java:
##########
@@ -92,29 +87,206 @@ private Plan computeTopN(PhysicalTopN topN,
CascadesContext ctx) {
if (SessionVariable.getTopNLazyMaterializationThreshold() <
topN.getLimit()) {
return topN;
}
- /*
- topn(output=[x] orderkey=[b])
- ->project(a as x)
- ->T(a, b)
- 'x' can be lazy materialized.
- materializeMap: x->(T, a)
- */
+ try {
+ List<Slot> userVisibleOutput =
ImmutableList.copyOf(topN.getOutput());
+
+ // Find projects below TopN with PreferPushDownProject expressions.
+ // Simplify them and expose hidden columns as lazy candidates.
+ List<NamedExpression> pulledUpExprs = new ArrayList<>();
+ List<Slot> lazyCandidates = new ArrayList<>();
+ PhysicalProject<? extends Plan> leafProject =
findLeafProject(topN);
+ boolean restructured = false;
+
+ if (leafProject != null &&
hasPreferPushDownProjectExprs(leafProject)) {
+ // Split: remove PPD/subPath expressions from lower project
+ List<NamedExpression> simplified = new ArrayList<>();
+ for (NamedExpression ne : leafProject.getProjects()) {
+ if (isNestedLazyExpression(ne)) {
+ pulledUpExprs.add(ne);
+ // For PPD expressions, input slots ARE the lazy
candidates
+ // (e.g., struct_col for struct_element,
payload['name'] for variant)
+ lazyCandidates.addAll(ne.getInputSlots());
+ } else {
+ simplified.add(ne);
+ }
+ }
+ // Add lazy candidate slots to lower project so
LazySlotPruning can remove them
+ for (Slot slot : lazyCandidates) {
+ if (!containsSlotInExprs(simplified, slot)) {
+ simplified.add(slot);
+ }
+ }
+ topN = replaceLeafProject(topN, leafProject, simplified);
+ restructured = true;
+ }
+
+ // Build effective probe list: TopN output (excl. pulled-up slots)
+ lazy candidates
+ Set<ExprId> pulledUpExprIds = new HashSet<>();
+ for (NamedExpression ne : pulledUpExprs) {
+ pulledUpExprIds.add(ne.getExprId());
+ }
+ List<Slot> effectiveOutput = new ArrayList<>();
+ for (Slot slot : topN.getOutput()) {
+ if (!pulledUpExprIds.contains(slot.getExprId())) {
+ effectiveOutput.add(slot);
+ }
+ }
+ if (restructured) {
+ for (Slot candidate : lazyCandidates) {
+ if (!containsSlot(effectiveOutput, candidate)) {
+ effectiveOutput.add(candidate);
+ }
+ }
+ }
+
+ Plan result = doComputeTopN(topN, ctx,
ImmutableList.copyOf(effectiveOutput),
+ restructured ? leafProject : null, lazyCandidates);
+
+ if (pulledUpExprs.isEmpty()) {
+ result = new
PhysicalProject(ImmutableList.copyOf(userVisibleOutput), null, result);
+ } else {
+ List<NamedExpression> outputExprs = new ArrayList<>();
+ for (Slot slot : userVisibleOutput) {
+ if (!pulledUpExprIds.contains(slot.getExprId())) {
+ outputExprs.add(slot);
+ }
+ }
+ outputExprs.addAll(pulledUpExprs);
+ result = new
PhysicalProject(ImmutableList.copyOf(outputExprs), null, result);
Review Comment:
This rebuilds the final projection as all non-pulled slots followed by all
pulled-up expressions, which changes the user-visible column order whenever a
pulled-up nested expression is not already the last selected column. For
example `select substring(struct_element(struct_col, 'city'), 1) as city, id
... order by id limit 3` has `userVisibleOutput = [city, id]`, but this code
produces `[id, city]`, so the result schema and row values are swapped relative
to the SQL. Please preserve the original `userVisibleOutput` order by replacing
each pulled slot in-place with its corresponding pulled-up expression, and add
a regression case with the nested expression before another selected column.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]