Copilot commented on code in PR #17168: URL: https://github.com/apache/pinot/pull/17168#discussion_r2565047192
########## pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/UnnestOperator.java: ########## @@ -0,0 +1,296 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.query.runtime.operator; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.pinot.common.datatable.StatMap; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.query.planner.logical.RexExpression; +import org.apache.pinot.query.planner.plannode.UnnestNode; +import org.apache.pinot.query.runtime.blocks.MseBlock; +import org.apache.pinot.query.runtime.blocks.RowHeapDataBlock; +import org.apache.pinot.query.runtime.operator.operands.TransformOperand; +import org.apache.pinot.query.runtime.operator.operands.TransformOperandFactory; +import org.apache.pinot.query.runtime.plan.OpChainExecutionContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * UnnestOperator expands array/collection values per input row into zero or more output rows. + * Supports multiple arrays, aligning them by index (like a zip operation). + * If arrays have different lengths, shorter arrays are padded with null values. + * The output schema is provided by the associated UnnestNode's data schema. + */ +public class UnnestOperator extends MultiStageOperator { + private static final Logger LOGGER = LoggerFactory.getLogger(UnnestOperator.class); + private static final String EXPLAIN_NAME = "UNNEST"; + + private final MultiStageOperator _input; + private final List<TransformOperand> _arrayExprOperands; + private final DataSchema _resultSchema; + private final boolean _withOrdinality; + private final List<Integer> _elementIndexes; + private final int _ordinalityIndex; + private final StatMap<StatKey> _statMap = new StatMap<>(StatKey.class); + + public UnnestOperator(OpChainExecutionContext context, MultiStageOperator input, DataSchema inputSchema, + UnnestNode node) { + super(context); + _input = input; + List<RexExpression> arrayExprs = node.getArrayExprs(); + _arrayExprOperands = new ArrayList<>(arrayExprs.size()); + for (RexExpression arrayExpr : arrayExprs) { + _arrayExprOperands.add(TransformOperandFactory.getTransformOperand(arrayExpr, inputSchema)); + } + _resultSchema = node.getDataSchema(); + _withOrdinality = node.isWithOrdinality(); + _elementIndexes = node.getElementIndexes(); + _ordinalityIndex = node.getOrdinalityIndex(); + } + + @Override + public void registerExecution(long time, int numRows, long memoryUsedBytes, long gcTimeMs) { + _statMap.merge(StatKey.EXECUTION_TIME_MS, time); + _statMap.merge(StatKey.EMITTED_ROWS, numRows); + _statMap.merge(StatKey.ALLOCATED_MEMORY_BYTES, memoryUsedBytes); + _statMap.merge(StatKey.GC_TIME_MS, gcTimeMs); + } + + @Override + protected Logger logger() { + return LOGGER; + } + + @Override + public List<MultiStageOperator> getChildOperators() { + return List.of(_input); + } + + @Override + public Type getOperatorType() { + return Type.UNNEST; + } + + @Override + public String toExplainString() { + return EXPLAIN_NAME; + } + + @Override + protected MseBlock getNextBlock() { + MseBlock block = _input.nextBlock(); + if (block.isEos()) { + return block; + } + MseBlock.Data dataBlock = (MseBlock.Data) block; + List<Object[]> inputRows = dataBlock.asRowHeap().getRows(); + List<Object[]> outRows = new ArrayList<>(); + + for (Object[] row : inputRows) { + // Extract all arrays from the input row + List<List<Object>> arrays = new ArrayList<>(); + for (TransformOperand operand : _arrayExprOperands) { + Object value = operand.apply(row); + List<Object> elements = extractArrayElements(value); + arrays.add(elements); + } + // Align arrays by index (zip operation) + alignArraysByIndex(row, arrays, outRows); + } + + return new RowHeapDataBlock(outRows, _resultSchema); + } + + private List<Object> extractArrayElements(Object value) { + List<Object> elements = new ArrayList<>(); + if (value == null) { + return elements; + } + if (value instanceof List) { + elements.addAll((List<?>) value); + } else if (value.getClass().isArray()) { + if (value instanceof int[]) { + int[] arr = (int[]) value; + for (int v : arr) { + elements.add(v); + } + } else if (value instanceof long[]) { + long[] arr = (long[]) value; + for (long v : arr) { + elements.add(v); + } + } else if (value instanceof double[]) { + double[] arr = (double[]) value; + for (double v : arr) { + elements.add(v); + } + } else if (value instanceof boolean[]) { + boolean[] arr = (boolean[]) value; + for (boolean v : arr) { + elements.add(v); + } + } else if (value instanceof char[]) { + char[] arr = (char[]) value; + for (char v : arr) { + elements.add(v); + } + } else if (value instanceof short[]) { + short[] arr = (short[]) value; + for (short v : arr) { + elements.add(v); + } + } else if (value instanceof byte[]) { + byte[] arr = (byte[]) value; + for (byte v : arr) { + elements.add(v); + } + } else if (value instanceof String[]) { + String[] arr = (String[]) value; + Collections.addAll(elements, arr); + } else if (value instanceof Object[]) { + Object[] arr = (Object[]) value; + Collections.addAll(elements, arr); + } else { + int length = java.lang.reflect.Array.getLength(value); + for (int i = 0; i < length; i++) { + elements.add(java.lang.reflect.Array.get(value, i)); + } Review Comment: Using reflection for array element extraction is inefficient and should be avoided in hot paths. This fallback case handles uncommon array types (e.g., `float[]`, arrays of wrapper types) but impacts performance for all array types not explicitly handled. Consider profiling to determine if this fallback is frequently used. If it is, add explicit handling for the most common types. If not, document that this is intentionally a slow fallback for rare cases. ########## pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/UnnestOperator.java: ########## @@ -0,0 +1,296 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.query.runtime.operator; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.pinot.common.datatable.StatMap; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.query.planner.logical.RexExpression; +import org.apache.pinot.query.planner.plannode.UnnestNode; +import org.apache.pinot.query.runtime.blocks.MseBlock; +import org.apache.pinot.query.runtime.blocks.RowHeapDataBlock; +import org.apache.pinot.query.runtime.operator.operands.TransformOperand; +import org.apache.pinot.query.runtime.operator.operands.TransformOperandFactory; +import org.apache.pinot.query.runtime.plan.OpChainExecutionContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * UnnestOperator expands array/collection values per input row into zero or more output rows. + * Supports multiple arrays, aligning them by index (like a zip operation). + * If arrays have different lengths, shorter arrays are padded with null values. + * The output schema is provided by the associated UnnestNode's data schema. + */ +public class UnnestOperator extends MultiStageOperator { + private static final Logger LOGGER = LoggerFactory.getLogger(UnnestOperator.class); + private static final String EXPLAIN_NAME = "UNNEST"; + + private final MultiStageOperator _input; + private final List<TransformOperand> _arrayExprOperands; + private final DataSchema _resultSchema; + private final boolean _withOrdinality; + private final List<Integer> _elementIndexes; + private final int _ordinalityIndex; + private final StatMap<StatKey> _statMap = new StatMap<>(StatKey.class); + + public UnnestOperator(OpChainExecutionContext context, MultiStageOperator input, DataSchema inputSchema, + UnnestNode node) { + super(context); + _input = input; + List<RexExpression> arrayExprs = node.getArrayExprs(); + _arrayExprOperands = new ArrayList<>(arrayExprs.size()); + for (RexExpression arrayExpr : arrayExprs) { + _arrayExprOperands.add(TransformOperandFactory.getTransformOperand(arrayExpr, inputSchema)); + } + _resultSchema = node.getDataSchema(); + _withOrdinality = node.isWithOrdinality(); + _elementIndexes = node.getElementIndexes(); + _ordinalityIndex = node.getOrdinalityIndex(); + } + + @Override + public void registerExecution(long time, int numRows, long memoryUsedBytes, long gcTimeMs) { + _statMap.merge(StatKey.EXECUTION_TIME_MS, time); + _statMap.merge(StatKey.EMITTED_ROWS, numRows); + _statMap.merge(StatKey.ALLOCATED_MEMORY_BYTES, memoryUsedBytes); + _statMap.merge(StatKey.GC_TIME_MS, gcTimeMs); + } + + @Override + protected Logger logger() { + return LOGGER; + } + + @Override + public List<MultiStageOperator> getChildOperators() { + return List.of(_input); + } + + @Override + public Type getOperatorType() { + return Type.UNNEST; + } + + @Override + public String toExplainString() { + return EXPLAIN_NAME; + } + + @Override + protected MseBlock getNextBlock() { + MseBlock block = _input.nextBlock(); + if (block.isEos()) { + return block; + } + MseBlock.Data dataBlock = (MseBlock.Data) block; + List<Object[]> inputRows = dataBlock.asRowHeap().getRows(); + List<Object[]> outRows = new ArrayList<>(); + + for (Object[] row : inputRows) { + // Extract all arrays from the input row + List<List<Object>> arrays = new ArrayList<>(); + for (TransformOperand operand : _arrayExprOperands) { + Object value = operand.apply(row); + List<Object> elements = extractArrayElements(value); + arrays.add(elements); + } + // Align arrays by index (zip operation) + alignArraysByIndex(row, arrays, outRows); + } + + return new RowHeapDataBlock(outRows, _resultSchema); + } + + private List<Object> extractArrayElements(Object value) { + List<Object> elements = new ArrayList<>(); + if (value == null) { + return elements; + } + if (value instanceof List) { + elements.addAll((List<?>) value); + } else if (value.getClass().isArray()) { + if (value instanceof int[]) { + int[] arr = (int[]) value; + for (int v : arr) { + elements.add(v); + } + } else if (value instanceof long[]) { + long[] arr = (long[]) value; + for (long v : arr) { + elements.add(v); + } + } else if (value instanceof double[]) { + double[] arr = (double[]) value; + for (double v : arr) { + elements.add(v); + } + } else if (value instanceof boolean[]) { + boolean[] arr = (boolean[]) value; + for (boolean v : arr) { + elements.add(v); + } + } else if (value instanceof char[]) { + char[] arr = (char[]) value; + for (char v : arr) { + elements.add(v); + } + } else if (value instanceof short[]) { + short[] arr = (short[]) value; + for (short v : arr) { + elements.add(v); + } + } else if (value instanceof byte[]) { + byte[] arr = (byte[]) value; + for (byte v : arr) { + elements.add(v); + } + } else if (value instanceof String[]) { + String[] arr = (String[]) value; + Collections.addAll(elements, arr); + } else if (value instanceof Object[]) { + Object[] arr = (Object[]) value; + Collections.addAll(elements, arr); + } else { + int length = java.lang.reflect.Array.getLength(value); + for (int i = 0; i < length; i++) { + elements.add(java.lang.reflect.Array.get(value, i)); + } + } + } else { + // If not array-like, treat as a single element + elements.add(value); + } Review Comment: The fallback behavior of treating non-array values as single elements is not clearly specified in the class or method documentation. This could lead to unexpected behavior if a scalar value is passed where an array is expected. Add documentation explaining this fallback behavior and whether it's intentional for SQL semantics or defensive programming. ########## pinot-query-planner/src/test/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverterTest.java: ########## @@ -203,4 +212,291 @@ public void testConvertEnrichedJoinNodeTest() { EnrichedJoinNode enrichedJoinNode = (EnrichedJoinNode) node; Assert.assertEquals(enrichedJoinNode.getFilterProjectRexes().size(), 2); } + + @Test + public void testConvertLogicalCorrelateProducesUnnestMetadata() { + TypeFactory typeFactory = TypeFactory.INSTANCE; + RelOptCluster cluster = createCluster(typeFactory); + RelDataType leftRowType = typeFactory.builder() + .add("id", SqlTypeName.INTEGER) + .add("arr", typeFactory.createArrayType(typeFactory.createSqlType(SqlTypeName.INTEGER), -1)) + .build(); + LogicalValues left = LogicalValues.create(cluster, leftRowType, ImmutableList.of()); + + CorrelationId correlationId = new CorrelationId(0); + LogicalProject project = buildCorrelatedProject(cluster, leftRowType, correlationId, "arr"); + Uncollect uncollect = Uncollect.create(project.getTraitSet(), project, false, List.of()); + LogicalCorrelate correlate = + LogicalCorrelate.create(left, uncollect, correlationId, ImmutableBitSet.of(1), JoinRelType.INNER); + + RelToPlanNodeConverter converter = new RelToPlanNodeConverter(null, + CommonConstants.Broker.DEFAULT_BROKER_DEFAULT_HASH_FUNCTION); + PlanNode planNode = converter.toPlanNode(correlate); + + Assert.assertTrue(planNode instanceof UnnestNode); + UnnestNode unnestNode = (UnnestNode) planNode; + Assert.assertEquals(((RexExpression.InputRef) unnestNode.getArrayExpr()).getIndex(), 1); + Assert.assertEquals(unnestNode.getColumnAlias(), "arr"); + Assert.assertEquals(unnestNode.getElementIndex(), 2); + Assert.assertFalse(unnestNode.isWithOrdinality()); + } + + @Test + public void testConvertLogicalCorrelateWithFilterAndOrdinality() { + TypeFactory typeFactory = TypeFactory.INSTANCE; + RelOptCluster cluster = createCluster(typeFactory); + RelDataType leftRowType = typeFactory.builder() + .add("id", SqlTypeName.INTEGER) + .add("arr", typeFactory.createArrayType(typeFactory.createSqlType(SqlTypeName.INTEGER), -1)) + .build(); + LogicalValues left = LogicalValues.create(cluster, leftRowType, ImmutableList.of()); + + CorrelationId correlationId = new CorrelationId(1); + LogicalProject project = buildCorrelatedProject(cluster, leftRowType, correlationId, "arr"); + Uncollect uncollect = Uncollect.create(project.getTraitSet(), project, true, List.of()); + RexBuilder rexBuilder = cluster.getRexBuilder(); + RexNode ordRef = rexBuilder.makeInputRef(uncollect.getRowType(), 1); + RexNode literal = rexBuilder.makeExactLiteral(BigDecimal.ONE, typeFactory.createSqlType(SqlTypeName.INTEGER)); + RexNode condition = rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN, ordRef, literal); + LogicalFilter filter = LogicalFilter.create(uncollect, condition); + LogicalCorrelate correlate = + LogicalCorrelate.create(left, filter, correlationId, ImmutableBitSet.of(1), JoinRelType.LEFT); + + RelToPlanNodeConverter converter = new RelToPlanNodeConverter(null, + CommonConstants.Broker.DEFAULT_BROKER_DEFAULT_HASH_FUNCTION); + PlanNode planNode = converter.toPlanNode(correlate); + + Assert.assertTrue(planNode instanceof FilterNode); + FilterNode filterNode = (FilterNode) planNode; + Assert.assertEquals(filterNode.getInputs().size(), 1); + Assert.assertTrue(filterNode.getInputs().get(0) instanceof UnnestNode); + UnnestNode child = (UnnestNode) filterNode.getInputs().get(0); + Assert.assertTrue(child.isWithOrdinality()); + Assert.assertEquals(child.getElementIndex(), 2); + Assert.assertEquals(child.getOrdinalityIndex(), 3); + + RexExpression.FunctionCall conditionExpr = (RexExpression.FunctionCall) filterNode.getCondition(); + Assert.assertEquals(conditionExpr.getFunctionName(), SqlStdOperatorTable.GREATER_THAN.getKind().toString()); + RexExpression.InputRef rewrittenOrdinal = + (RexExpression.InputRef) conditionExpr.getFunctionOperands().get(0); + Assert.assertEquals(rewrittenOrdinal.getIndex(), child.getOrdinalityIndex()); + } + + private static RelOptCluster createCluster(TypeFactory typeFactory) { + HepProgramBuilder hepProgramBuilder = new HepProgramBuilder(); + HepPlanner planner = new HepPlanner(hepProgramBuilder.build()); + RexBuilder rexBuilder = new RexBuilder(typeFactory); + RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder); + cluster.setMetadataProvider(DefaultRelMetadataProvider.INSTANCE); + return cluster; + } + + @Test + public void testConvertLogicalUncollectWithOrdinalityAndAliases() { + // This test is disabled because Calcite's Uncollect.create() doesn't support aliases parameter + // for single arrays with ordinality. The aliases parameter only works for ROW types. + // In practice, single array UNNEST with ordinality is handled via LogicalCorrelate. Review Comment: Test method `testConvertLogicalUncollectWithOrdinalityAndAliases` has a comment indicating it's disabled due to Calcite limitations, but the test is not marked with `@Test(enabled = false)` or similar annotation. If this test is meant to be skipped, add the appropriate annotation. If the comment is outdated and the test should run, remove the comment. Currently, the test will execute despite the comment suggesting otherwise. ########## pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java: ########## @@ -159,6 +170,492 @@ public PlanNode toPlanNode(RelNode node) { return result; } + private UnnestNode convertLogicalUncollect(Uncollect node) { + // Extract array expressions (typically from a Project with one or more expressions) + List<RexExpression> arrayExprs = new ArrayList<>(); + List<String> columnAliases = new ArrayList<>(); + RelNode input = node.getInput(); + boolean withOrdinality = node.withOrdinality; + String ordinalityAlias = withOrdinality ? SqlUnnestOperator.ORDINALITY_COLUMN_NAME : null; + + if (input instanceof Project) { + Project p = (Project) input; + List<RelDataTypeField> outputFields = node.getRowType().getFieldList(); + List<RelDataTypeField> projectFields = p.getRowType().getFieldList(); + int numProjects = p.getProjects().size(); + int numOutputFields = outputFields.size(); + + // Check if WITH ORDINALITY is present: output fields = project expressions + ordinality + if (numOutputFields > numProjects) { + withOrdinality = true; + String ordAlias = outputFields.get(numOutputFields - 1).getName(); + // Use the alias from Uncollect's rowType if available, otherwise use default + ordinalityAlias = (ordAlias != null && !ordAlias.isEmpty()) ? ordAlias : "ordinality"; + } + + // Extract all array expressions from the Project + // Field names come from Uncollect's rowType (which has the aliases from AS clause) + // Fall back to Project's field names if Uncollect's field names are empty/default + for (int i = 0; i < numProjects; i++) { + arrayExprs.add(RexExpressionUtils.fromRexNode(p.getProjects().get(i))); + String alias = null; + if (i < outputFields.size()) { + alias = outputFields.get(i).getName(); + // If alias is empty or null, try Project's field name + if ((alias == null || alias.isEmpty()) && i < projectFields.size()) { + alias = projectFields.get(i).getName(); + } + } else if (i < projectFields.size()) { + alias = projectFields.get(i).getName(); + } + columnAliases.add(resolveElementAlias(alias, i)); + } + } + + if (arrayExprs.isEmpty()) { + // Fallback: refer to first input ref + arrayExprs.add(new RexExpression.InputRef(0)); + List<RelDataTypeField> fields = node.getRowType().getFieldList(); + if (!fields.isEmpty()) { + String alias = fields.get(0).getName(); + columnAliases.add(resolveElementAlias(alias, 0)); + // Check for ordinality in fallback case + if (fields.size() > 1 && !withOrdinality) { + withOrdinality = true; + String ordAlias = fields.get(1).getName(); + ordinalityAlias = (ordAlias != null && !ordAlias.isEmpty()) ? ordAlias : "ordinality"; + } + } else { + columnAliases.add(resolveElementAlias(null, 0)); + } + } + + return new UnnestNode(DEFAULT_STAGE_ID, toDataSchema(node.getRowType()), NodeHint.EMPTY, + convertInputs(node.getInputs()), arrayExprs, columnAliases, withOrdinality, ordinalityAlias); + } + + private BasePlanNode convertLogicalCorrelate(LogicalCorrelate node) { + // Pattern: Correlate(left, Uncollect(Project(correlatedFields...))) + RelNode right = node.getRight(); + RelDataType leftRowType = node.getLeft().getRowType(); + Project aliasProject = right instanceof Project ? (Project) right : null; + Project correlatedProject = findProjectUnderUncollect(right); + List<RexExpression> arrayExprs = new ArrayList<>(); + List<String> columnAliases = new ArrayList<>(); + if (correlatedProject != null) { + List<RelDataTypeField> outputFields = node.getRowType().getFieldList(); + // Extract all array expressions from the Project + // The output fields include: left columns + array elements + (ordinality if present) + // We need to extract only the array element columns (skip left columns, skip ordinality if present) + int leftColumnCount = leftRowType.getFieldCount(); + int numProjects = correlatedProject.getProjects().size(); + List<RelDataTypeField> projectFields = correlatedProject.getRowType().getFieldList(); + for (int i = 0; i < numProjects; i++) { + RexNode rex = correlatedProject.getProjects().get(i); + RexExpression arrayExpr = deriveArrayExpression(rex, correlatedProject, leftRowType); + if (arrayExpr == null) { + arrayExpr = RexExpressionUtils.fromRexNode(rex); + } + arrayExprs.add(arrayExpr); + // For LogicalCorrelate, aliases come from the Correlate's output row type (after left columns) + // This includes the aliases specified in Uncollect.create() + // However, if Calcite auto-generates a name (like "arr0"), prefer the Project field name + int aliasIndex = leftColumnCount + i; + String alias = null; + if (aliasIndex < outputFields.size()) { + alias = outputFields.get(aliasIndex).getName(); + // Check if the alias looks auto-generated (ends with a digit) and prefer Project field name + if (alias != null && i < projectFields.size() && alias.matches(".*\\d$")) { + String projectFieldName = projectFields.get(i).getName(); + if (projectFieldName != null && !projectFieldName.isEmpty()) { + // Use Project field name if it doesn't end with a digit (not auto-generated) + if (!projectFieldName.matches(".*\\d$")) { + alias = projectFieldName; + } + } + } + } + columnAliases.add(resolveElementAlias(alias, i)); + } + } + if (arrayExprs.isEmpty()) { + // Fallback: refer to first input ref + arrayExprs.add(new RexExpression.InputRef(0)); + List<RelDataTypeField> outputFields = node.getRowType().getFieldList(); + if (!outputFields.isEmpty()) { + columnAliases.add(resolveElementAlias(outputFields.get(0).getName(), 0)); + } else { + columnAliases.add(resolveElementAlias(null, 0)); + } + } + LogicalFilter correlateFilter = findCorrelateFilter(right); + boolean wrapWithFilter = correlateFilter != null; + RexNode filterCondition = wrapWithFilter ? correlateFilter.getCondition() : null; + // Use the entire correlate output schema + PlanNode inputNode = toPlanNode(node.getLeft()); + // Ensure inputs list is mutable because downstream visitors (e.g., withInputs methods) may modify the inputs list + List<PlanNode> inputs = new ArrayList<>(); + inputs.add(inputNode); + ElementOrdinalInfo ordinalInfo = deriveElementOrdinalInfo(right, leftRowType, node.getRowType(), arrayExprs.size()); + boolean withOrdinality = ordinalInfo.hasOrdinality(); + String ordinalityAlias = ordinalInfo.getOrdinalityAlias(); + if (ordinalityAlias == null || SqlUnnestOperator.ORDINALITY_COLUMN_NAME.equals(ordinalityAlias)) { + if (aliasProject != null) { + String projectOrdinalityAlias = getOrdinalityAliasFromProject(aliasProject, arrayExprs.size()); + if (projectOrdinalityAlias != null) { + ordinalityAlias = projectOrdinalityAlias; + } + } + if (ordinalityAlias == null || SqlUnnestOperator.ORDINALITY_COLUMN_NAME.equals(ordinalityAlias)) { + String rightOrdinalityAlias = getOrdinalityAliasFromRelNode(right, arrayExprs.size()); + if (rightOrdinalityAlias != null) { + ordinalityAlias = rightOrdinalityAlias; + } + } + } + List<Integer> elementIndexes = ordinalInfo.getElementIndexes(); + int ordinalityIndex = ordinalInfo.getOrdinalityIndex(); + UnnestNode unnest = new UnnestNode(DEFAULT_STAGE_ID, toDataSchema(node.getRowType()), NodeHint.EMPTY, + inputs, arrayExprs, columnAliases, withOrdinality, ordinalityAlias, elementIndexes, ordinalityIndex); + if (wrapWithFilter) { + // Wrap Unnest with a FilterNode; rewrite filter InputRefs to absolute output indexes + // For multiple arrays, we need to handle rewriting differently + RexExpression rewritten = rewriteInputRefsForMultipleArrays( + RexExpressionUtils.fromRexNode(filterCondition), elementIndexes, ordinalityIndex); + return new FilterNode(DEFAULT_STAGE_ID, toDataSchema(node.getRowType()), NodeHint.EMPTY, + new ArrayList<>(List.of(unnest)), rewritten); + } + return unnest; + } + + private static String resolveElementAlias(String proposedAlias, int idx) { + if (proposedAlias != null && !proposedAlias.isEmpty()) { + return proposedAlias; + } + return "unnest_col_" + idx; + } + + @Nullable + private static String getOrdinalityAliasFromProject(Project project, int numArrays) { + List<RelDataTypeField> fields = project.getRowType().getFieldList(); + if (fields.size() > numArrays) { + String alias = fields.get(numArrays).getName(); + if (alias != null && !alias.isEmpty() && !SqlUnnestOperator.ORDINALITY_COLUMN_NAME.equals(alias)) { + return alias; + } + } + return null; + } + + @Nullable + private static String getOrdinalityAliasFromRelNode(RelNode node, int numArrays) { + List<RelDataTypeField> fields = node.getRowType().getFieldList(); + if (fields.size() > numArrays) { + String alias = fields.get(fields.size() - 1).getName(); + if (alias != null && !alias.isEmpty() && !SqlUnnestOperator.ORDINALITY_COLUMN_NAME.equals(alias)) { + return alias; + } + } + return null; + } + + @Nullable + private static Project findProjectUnderUncollect(RelNode node) { + RelNode current = node; + while (current != null) { + if (current instanceof Uncollect) { + RelNode input = ((Uncollect) current).getInput(); + return input instanceof Project ? (Project) input : null; + } + if (current instanceof Project) { + current = ((Project) current).getInput(); + } else if (current instanceof LogicalFilter) { + current = ((LogicalFilter) current).getInput(); + } else { + return null; + } + } + return null; + } + + @Nullable + private static Uncollect findUncollect(RelNode node) { + RelNode current = node; + while (current != null) { + if (current instanceof Uncollect) { + return (Uncollect) current; + } + if (current instanceof Project) { + current = ((Project) current).getInput(); + } else if (current instanceof LogicalFilter) { + current = ((LogicalFilter) current).getInput(); + } else { + return null; + } + } + return null; + } + + @Nullable + private static RexExpression deriveArrayExpression(RexNode rex, Project project, RelDataType leftRowType) { + Integer idx = resolveInputRefFromCorrel(rex, leftRowType); + if (idx != null) { + return new RexExpression.InputRef(idx); + } + RexExpression candidate = RexExpressionUtils.fromRexNode(rex); + return candidate instanceof RexExpression.InputRef ? candidate : null; + } + + @Nullable + private static LogicalFilter findCorrelateFilter(RelNode node) { + RelNode current = node; + while (current instanceof Project || current instanceof LogicalFilter) { + if (current instanceof LogicalFilter) { + return (LogicalFilter) current; + } + current = ((Project) current).getInput(); + } + return null; + } + + private static ElementOrdinalInfo deriveElementOrdinalInfo(RelNode right, RelDataType leftRowType, + RelDataType correlateOutputRowType, int numArrays) { + Uncollect uncollect = findUncollect(right); + boolean hasOrdinality = uncollect != null && uncollect.withOrdinality; + ElementOrdinalAccumulator accumulator = + new ElementOrdinalAccumulator(leftRowType.getFieldCount(), numArrays, hasOrdinality); + if (correlateOutputRowType != null) { + // Use the Correlate's output row type which includes left columns + unnested elements + ordinality + accumulator.populateFromCorrelateOutput(correlateOutputRowType, leftRowType.getFieldCount()); + } else { + // Fallback to old logic for non-Correlate cases + if (right instanceof Uncollect) { + accumulator.populateFromRowType(right.getRowType()); + } else if (right instanceof Project) { + accumulator.populateFromProject((Project) right); + } else if (right instanceof LogicalFilter) { + LogicalFilter filter = (LogicalFilter) right; + RelNode filterInput = filter.getInput(); + if (filterInput instanceof Uncollect) { + accumulator.populateFromRowType(filter.getRowType()); + } else if (filterInput instanceof Project) { + accumulator.populateFromProject((Project) filterInput); + } + } + } + if (uncollect != null) { + accumulator.ensureOrdinalityFromRowType(uncollect.getRowType(), uncollect.withOrdinality); + } + return accumulator.toInfo(); + } + + private static final class ElementOrdinalAccumulator { + private final int _base; + private final int _numArrays; + private final boolean _hasOrdinality; + private final List<String> _elementAliases = new ArrayList<>(); + private String _ordinalityAlias; + private final List<Integer> _elementIndexes = new ArrayList<>(); + private int _ordinalityIndex = -1; + + ElementOrdinalAccumulator(int base, int numArrays, boolean hasOrdinality) { + _base = base; + _numArrays = numArrays; + _hasOrdinality = hasOrdinality; + } + + void populateFromRowType(RelDataType rowType) { + List<RelDataTypeField> fields = rowType.getFieldList(); + // Extract element aliases and indexes for all arrays + for (int i = 0; i < _numArrays && i < fields.size(); i++) { + _elementAliases.add(fields.get(i).getName()); + _elementIndexes.add(_base + i); + } + // Check if ordinality is present: fields.size() should be numArrays + 1 + if (fields.size() > _numArrays && _ordinalityIndex < 0) { + _ordinalityAlias = fields.get(_numArrays).getName(); + _ordinalityIndex = _base + _numArrays; + } + } + + void populateFromProject(Project project) { + List<RexNode> projects = project.getProjects(); + List<RelDataTypeField> projFields = project.getRowType().getFieldList(); + // Extract element aliases and indexes from project outputs + for (int j = 0; j < projects.size() && j < _numArrays; j++) { + String outName = projFields.get(j).getName(); + _elementAliases.add(outName); + _elementIndexes.add(_base + j); + } + // Check if ordinality is present: projFields.size() should be numArrays + 1 + if (projFields.size() > _numArrays && _ordinalityIndex < 0) { + _ordinalityAlias = projFields.get(_numArrays).getName(); + _ordinalityIndex = _base + _numArrays; + } + } + + void populateFromCorrelateOutput(RelDataType correlateOutputRowType, int leftColumnCount) { + List<RelDataTypeField> fields = correlateOutputRowType.getFieldList(); + int rightFieldCount = Math.min(_numArrays + (_hasOrdinality ? 1 : 0), fields.size()); + int actualLeftColumns = Math.max(0, fields.size() - rightFieldCount); + int missingLeftColumns = Math.max(0, leftColumnCount - actualLeftColumns); + int adjustedBase = Math.max(0, leftColumnCount - missingLeftColumns); + + for (int i = 0; i < _numArrays; i++) { + int fieldIndex = adjustedBase + i; + if (fieldIndex < fields.size()) { + _elementAliases.add(fields.get(fieldIndex).getName()); + _elementIndexes.add(fieldIndex); + } else { + _elementAliases.add(null); + _elementIndexes.add(_base + i); + } + } + int ordinalityFieldIndex = adjustedBase + _numArrays; + if (_hasOrdinality && ordinalityFieldIndex < fields.size() && _ordinalityIndex < 0) { + _ordinalityAlias = fields.get(ordinalityFieldIndex).getName(); + _ordinalityIndex = ordinalityFieldIndex; + } + } + + void ensureOrdinalityFromRowType(RelDataType rowType, boolean hasOrdinality) { + if (!hasOrdinality) { + return; + } + List<RelDataTypeField> fields = rowType.getFieldList(); + String ordAlias = fields.size() > _numArrays ? fields.get(_numArrays).getName() : null; + if (_ordinalityIndex < 0) { + _ordinalityIndex = _base + _numArrays; + } + if (_ordinalityAlias == null || SqlUnnestOperator.ORDINALITY_COLUMN_NAME.equals(_ordinalityAlias)) { + _ordinalityAlias = + (ordAlias != null && !ordAlias.isEmpty()) ? ordAlias : SqlUnnestOperator.ORDINALITY_COLUMN_NAME; + } + } + + ElementOrdinalInfo toInfo() { + // For backward compatibility, provide single element index if only one array + int singleElementIndex = _elementIndexes.isEmpty() ? -1 : _elementIndexes.get(0); + String singleElementAlias = _elementAliases.isEmpty() ? null : _elementAliases.get(0); + return new ElementOrdinalInfo(singleElementAlias, _ordinalityAlias, singleElementIndex, + _ordinalityIndex, _elementIndexes); + } + } + + private static final class ElementOrdinalInfo { + private final String _elementAlias; + private final String _ordinalityAlias; + private final int _elementIndex; + private final int _ordinalityIndex; + private final List<Integer> _elementIndexes; + + ElementOrdinalInfo(String elementAlias, String ordinalityAlias, int elementIndex, int ordinalityIndex) { + this(elementAlias, ordinalityAlias, elementIndex, ordinalityIndex, + elementIndex >= 0 ? List.of(elementIndex) : List.of()); + } + + ElementOrdinalInfo(String elementAlias, String ordinalityAlias, int elementIndex, int ordinalityIndex, + List<Integer> elementIndexes) { + _elementAlias = elementAlias; + _ordinalityAlias = ordinalityAlias; + _elementIndex = elementIndex; + _ordinalityIndex = ordinalityIndex; + _elementIndexes = elementIndexes; + } + + String getElementAlias() { + return _elementAlias; + } + + String getOrdinalityAlias() { + return _ordinalityAlias; + } + + int getElementIndex() { + return _elementIndex; + } + + List<Integer> getElementIndexes() { + return _elementIndexes; + } + + int getOrdinalityIndex() { + return _ordinalityIndex; + } + + boolean hasOrdinality() { + return _ordinalityIndex >= 0; + } + } + + private static RexExpression rewriteInputRefs(RexExpression expr, int elemOutIdx, int ordOutIdx) { + if (expr instanceof RexExpression.InputRef) { + int idx = ((RexExpression.InputRef) expr).getIndex(); + if (idx == 0 && elemOutIdx >= 0) { + return new RexExpression.InputRef(elemOutIdx); + } else if (idx == 1 && ordOutIdx >= 0) { + return new RexExpression.InputRef(ordOutIdx); + } else { + return expr; + } + } else if (expr instanceof RexExpression.FunctionCall) { + RexExpression.FunctionCall fc = (RexExpression.FunctionCall) expr; + List<RexExpression> ops = fc.getFunctionOperands(); + List<RexExpression> rewritten = new ArrayList<>(ops.size()); + for (RexExpression op : ops) { + rewritten.add(rewriteInputRefs(op, elemOutIdx, ordOutIdx)); + } + return new RexExpression.FunctionCall(fc.getDataType(), fc.getFunctionName(), rewritten); + } else { + return expr; + } + } + + private static RexExpression rewriteInputRefsForMultipleArrays(RexExpression expr, List<Integer> elemOutIdxs, + int ordOutIdx) { + if (expr instanceof RexExpression.InputRef) { + int idx = ((RexExpression.InputRef) expr).getIndex(); + // Map element indexes: 0 -> first element index, 1 -> second element index, etc. + if (idx >= 0 && idx < elemOutIdxs.size() && elemOutIdxs.get(idx) >= 0) { + return new RexExpression.InputRef(elemOutIdxs.get(idx)); + } else if (idx == elemOutIdxs.size() && ordOutIdx >= 0) { + // Ordinality index comes after all element indexes + return new RexExpression.InputRef(ordOutIdx); + } else { + return expr; + } + } else if (expr instanceof RexExpression.FunctionCall) { + RexExpression.FunctionCall fc = (RexExpression.FunctionCall) expr; + List<RexExpression> ops = fc.getFunctionOperands(); + List<RexExpression> rewritten = new ArrayList<>(ops.size()); + for (RexExpression op : ops) { + rewritten.add(rewriteInputRefsForMultipleArrays(op, elemOutIdxs, ordOutIdx)); + } + return new RexExpression.FunctionCall(fc.getDataType(), fc.getFunctionName(), rewritten); + } else { + return expr; + } + } + + private static Integer resolveInputRefFromCorrel(RexNode expr, RelDataType leftRowType) { + if (expr instanceof RexFieldAccess) { + RexFieldAccess fieldAccess = (RexFieldAccess) expr; + if (fieldAccess.getReferenceExpr() instanceof RexCorrelVariable) { + String fieldName = fieldAccess.getField().getName(); + List<RelDataTypeField> fields = leftRowType.getFieldList(); + // SQL field names are case-insensitive by default in Calcite, so we use equalsIgnoreCase for matching. + // NOTE: This assumes that the schema is configured with Calcite's default case-insensitivity. + // If the schema is case-sensitive, this approach may produce incorrect results. Update logic if needed. + for (int i = 0; i < fields.size(); i++) { + if (fields.get(i).getName().equalsIgnoreCase(fieldName)) { + return i; Review Comment: The case-insensitive field name matching includes a NOTE comment about potential incorrectness with case-sensitive schemas, but doesn't provide a way to detect or handle this case. Consider checking the schema's case sensitivity setting (if available via Calcite APIs) and adapting the comparison logic accordingly, or at minimum add a warning log when this method is used. ```suggestion String candidateName = fields.get(i).getName(); if (candidateName.equals(fieldName)) { return i; } else if (candidateName.equalsIgnoreCase(fieldName)) { // Log a warning if case-insensitive match is used, as this may be incorrect for case-sensitive schemas. LOGGER.warn("Case-insensitive field name match used for '{}'. This may be incorrect if the schema is case-sensitive.", fieldName); return i; ``` ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/UnnestIntegrationTest.java: ########## @@ -0,0 +1,398 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests.custom; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import java.io.File; +import java.util.List; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + + +@Test(suiteName = "CustomClusterIntegrationTest") +public class UnnestIntegrationTest extends CustomDataQueryClusterIntegrationTest { + + private static final String DEFAULT_TABLE_NAME = "UnnestIntegrationTest"; + private static final String INT_COLUMN = "intCol"; + private static final String LONG_COLUMN = "longCol"; + private static final String FLOAT_COLUMN = "floatCol"; + private static final String DOUBLE_COLUMN = "doubleCol"; + private static final String STRING_COLUMN = "stringCol"; + private static final String TIMESTAMP_COLUMN = "timestampCol"; + private static final String GROUP_BY_COLUMN = "groupKey"; + private static final String LONG_ARRAY_COLUMN = "longArrayCol"; + private static final String DOUBLE_ARRAY_COLUMN = "doubleArrayCol"; + private static final String STRING_ARRAY_COLUMN = "stringArrayCol"; + + @Test(dataProvider = "useV2QueryEngine") + public void testCountWithCrossJoinUnnest(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + String query = String.format("SELECT COUNT(*) FROM %s CROSS JOIN UNNEST(longArrayCol) AS u(elem)", getTableName()); + JsonNode json = postQuery(query); + JsonNode rows = json.get("resultTable").get("rows"); + assertNotNull(rows); + long count = rows.get(0).get(0).asLong(); + assertEquals(count, 4 * getCountStarResult()); + } + + @Test(dataProvider = "useV2QueryEngine") + public void testSelectWithCrossJoinUnnest(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + String query = String.format("SELECT intCol, u.elem FROM %s CROSS JOIN UNNEST(stringArrayCol) AS u(elem)" + + " ORDER BY intCol", getTableName()); + JsonNode json = postQuery(query); + JsonNode rows = json.get("resultTable").get("rows"); + assertNotNull(rows); + assertEquals(rows.size(), 3 * getCountStarResult()); + for (int i = 0; i < rows.size(); i++) { + JsonNode row = rows.get(i); + assertEquals(row.get(0).asInt(), i / 3); + switch (i % 3) { + case 0: + assertEquals(row.get(1).asText(), "a"); + break; + case 1: + assertEquals(row.get(1).asText(), "b"); + break; + case 2: + assertEquals(row.get(1).asText(), "c"); + break; + default: + break; + } + } + } + + @Test(dataProvider = "useV2QueryEngine") + public void testSelectWithCrossJoinUnnestOnMultiColumn(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + String query = String.format( + "SELECT intCol, u.longValue, u.stringValue FROM %s CROSS JOIN UNNEST(longArrayCol, stringArrayCol) AS u" + + "(longValue, stringValue)" + + " ORDER BY intCol", getTableName()); + JsonNode json = postQuery(query); + System.out.println("json = " + json); Review Comment: Debug print statement using `System.out.println` should be removed or replaced with proper logging. Use the test framework's logging mechanism or remove if no longer needed for debugging. ```suggestion ``` ########## pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java: ########## @@ -159,6 +170,492 @@ public PlanNode toPlanNode(RelNode node) { return result; } + private UnnestNode convertLogicalUncollect(Uncollect node) { + // Extract array expressions (typically from a Project with one or more expressions) + List<RexExpression> arrayExprs = new ArrayList<>(); + List<String> columnAliases = new ArrayList<>(); + RelNode input = node.getInput(); + boolean withOrdinality = node.withOrdinality; + String ordinalityAlias = withOrdinality ? SqlUnnestOperator.ORDINALITY_COLUMN_NAME : null; + + if (input instanceof Project) { + Project p = (Project) input; + List<RelDataTypeField> outputFields = node.getRowType().getFieldList(); + List<RelDataTypeField> projectFields = p.getRowType().getFieldList(); + int numProjects = p.getProjects().size(); + int numOutputFields = outputFields.size(); + + // Check if WITH ORDINALITY is present: output fields = project expressions + ordinality + if (numOutputFields > numProjects) { + withOrdinality = true; + String ordAlias = outputFields.get(numOutputFields - 1).getName(); + // Use the alias from Uncollect's rowType if available, otherwise use default + ordinalityAlias = (ordAlias != null && !ordAlias.isEmpty()) ? ordAlias : "ordinality"; + } + + // Extract all array expressions from the Project + // Field names come from Uncollect's rowType (which has the aliases from AS clause) + // Fall back to Project's field names if Uncollect's field names are empty/default + for (int i = 0; i < numProjects; i++) { + arrayExprs.add(RexExpressionUtils.fromRexNode(p.getProjects().get(i))); + String alias = null; + if (i < outputFields.size()) { + alias = outputFields.get(i).getName(); + // If alias is empty or null, try Project's field name + if ((alias == null || alias.isEmpty()) && i < projectFields.size()) { + alias = projectFields.get(i).getName(); + } + } else if (i < projectFields.size()) { + alias = projectFields.get(i).getName(); + } + columnAliases.add(resolveElementAlias(alias, i)); + } + } + + if (arrayExprs.isEmpty()) { + // Fallback: refer to first input ref + arrayExprs.add(new RexExpression.InputRef(0)); + List<RelDataTypeField> fields = node.getRowType().getFieldList(); + if (!fields.isEmpty()) { + String alias = fields.get(0).getName(); + columnAliases.add(resolveElementAlias(alias, 0)); + // Check for ordinality in fallback case + if (fields.size() > 1 && !withOrdinality) { + withOrdinality = true; + String ordAlias = fields.get(1).getName(); + ordinalityAlias = (ordAlias != null && !ordAlias.isEmpty()) ? ordAlias : "ordinality"; + } + } else { + columnAliases.add(resolveElementAlias(null, 0)); + } + } + + return new UnnestNode(DEFAULT_STAGE_ID, toDataSchema(node.getRowType()), NodeHint.EMPTY, + convertInputs(node.getInputs()), arrayExprs, columnAliases, withOrdinality, ordinalityAlias); + } + + private BasePlanNode convertLogicalCorrelate(LogicalCorrelate node) { + // Pattern: Correlate(left, Uncollect(Project(correlatedFields...))) + RelNode right = node.getRight(); + RelDataType leftRowType = node.getLeft().getRowType(); + Project aliasProject = right instanceof Project ? (Project) right : null; + Project correlatedProject = findProjectUnderUncollect(right); + List<RexExpression> arrayExprs = new ArrayList<>(); + List<String> columnAliases = new ArrayList<>(); + if (correlatedProject != null) { + List<RelDataTypeField> outputFields = node.getRowType().getFieldList(); + // Extract all array expressions from the Project + // The output fields include: left columns + array elements + (ordinality if present) + // We need to extract only the array element columns (skip left columns, skip ordinality if present) + int leftColumnCount = leftRowType.getFieldCount(); + int numProjects = correlatedProject.getProjects().size(); + List<RelDataTypeField> projectFields = correlatedProject.getRowType().getFieldList(); + for (int i = 0; i < numProjects; i++) { + RexNode rex = correlatedProject.getProjects().get(i); + RexExpression arrayExpr = deriveArrayExpression(rex, correlatedProject, leftRowType); + if (arrayExpr == null) { + arrayExpr = RexExpressionUtils.fromRexNode(rex); + } + arrayExprs.add(arrayExpr); + // For LogicalCorrelate, aliases come from the Correlate's output row type (after left columns) + // This includes the aliases specified in Uncollect.create() + // However, if Calcite auto-generates a name (like "arr0"), prefer the Project field name + int aliasIndex = leftColumnCount + i; + String alias = null; + if (aliasIndex < outputFields.size()) { + alias = outputFields.get(aliasIndex).getName(); + // Check if the alias looks auto-generated (ends with a digit) and prefer Project field name + if (alias != null && i < projectFields.size() && alias.matches(".*\\d$")) { + String projectFieldName = projectFields.get(i).getName(); + if (projectFieldName != null && !projectFieldName.isEmpty()) { + // Use Project field name if it doesn't end with a digit (not auto-generated) + if (!projectFieldName.matches(".*\\d$")) { + alias = projectFieldName; + } + } + } Review Comment: The heuristic of detecting auto-generated names by checking if they end with a digit (`.matches(\".*\\\\d$\")`) is fragile and could misclassify legitimate user-provided aliases like `column1` or `arr2`. This pattern matching approach may fail for schemas with numeric column names. Consider using a more robust method to detect auto-generated names, such as checking for specific Calcite naming patterns (e.g., `EXPR$0`, `arr0`) or maintaining a list of known auto-generated prefixes. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
