Copilot commented on code in PR #16497: URL: https://github.com/apache/pinot/pull/16497#discussion_r2253005989
########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinStringAggregationFunction.java: ########## @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.List; +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.exception.BadQueryRequestException; + + +public class MinStringAggregationFunction extends NullableSingleInputAggregationFunction<String, String> { + + public MinStringAggregationFunction(List<ExpressionContext> arguments, boolean nullHandlingEnabled) { + super(verifySingleArgument(arguments, "MINSTRING"), nullHandlingEnabled); + } + + @Override + public AggregationFunctionType getType() { + return AggregationFunctionType.MINSTRING; + } + + @Override + public AggregationResultHolder createAggregationResultHolder() { + return new ObjectAggregationResultHolder(); + } + + @Override + public GroupByResultHolder createGroupByResultHolder(int initialCapacity, int maxCapacity) { + return new ObjectGroupByResultHolder(initialCapacity, maxCapacity); + } + + @Override + public void aggregate(int length, AggregationResultHolder aggregationResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MINSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // Ignore null and "null" string literals + if (value == null || "null".equals(value)) { + continue; + } + String currentMin = aggregationResultHolder.getResult(); + // Update the currentMin if a smaller string value is found + if (currentMin == null || value.compareTo(currentMin) < 0) { + aggregationResultHolder.setValue(value); + } + } + }); + } + + @Override + public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MINSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // For SV, "null" as a string literal can exist and needs to be handled + if (value == null || "null".equals(value)) { + continue; + } + int groupKey = groupKeyArray[i]; + String currentMin = groupByResultHolder.getResult(groupKey); + if (currentMin == null || "null".equals(currentMin) || value.compareTo(currentMin) < 0) { Review Comment: The condition `"null".equals(currentMin)` is redundant here. Since the aggregation logic already skips "null" string literals in the input processing (lines 93-95), currentMin should never be "null" at this point. ```suggestion if (currentMin == null || value.compareTo(currentMin) < 0) { ``` ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java: ########## @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.List; +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.exception.BadQueryRequestException; + + +public class MaxStringAggregationFunction extends NullableSingleInputAggregationFunction<String, String> { + + public MaxStringAggregationFunction(List<ExpressionContext> arguments, boolean nullHandlingEnabled) { + super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled); + } + + @Override + public AggregationFunctionType getType() { + return AggregationFunctionType.MAXSTRING; + } + + @Override + public AggregationResultHolder createAggregationResultHolder() { + return new ObjectAggregationResultHolder(); + } + + @Override + public GroupByResultHolder createGroupByResultHolder(int initialCapacity, int maxCapacity) { + return new ObjectGroupByResultHolder(initialCapacity, maxCapacity); + } + + @Override + public void aggregate(int length, AggregationResultHolder aggregationResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // Ignore null and "null" string literals + if (value == null || "null".equals(value)) { + continue; + } + String currentMax = aggregationResultHolder.getResult(); + // Update the currentMax if a larger string value is found + if (currentMax == null || value.compareTo(currentMax) > 0) { + aggregationResultHolder.setValue(value); + } + } + }); + } + + @Override + public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // For SV, "null" as a string literal can exist and needs to be handled + if (value == null || "null".equals(value)) { + continue; + } + int groupKey = groupKeyArray[i]; + String currentMax = groupByResultHolder.getResult(groupKey); + if (currentMax == null || "null".equals(currentMax) || value.compareTo(currentMax) > 0) { Review Comment: The condition `"null".equals(currentMax)` is redundant here. Since the aggregation logic already skips "null" string literals in the input processing (lines 93-95), currentMax should never be "null" at this point. ```suggestion if (currentMax == null || value.compareTo(currentMax) > 0) { ``` ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinStringAggregationFunction.java: ########## @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.List; +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.exception.BadQueryRequestException; + + +public class MinStringAggregationFunction extends NullableSingleInputAggregationFunction<String, String> { + + public MinStringAggregationFunction(List<ExpressionContext> arguments, boolean nullHandlingEnabled) { + super(verifySingleArgument(arguments, "MINSTRING"), nullHandlingEnabled); + } + + @Override + public AggregationFunctionType getType() { + return AggregationFunctionType.MINSTRING; + } + + @Override + public AggregationResultHolder createAggregationResultHolder() { + return new ObjectAggregationResultHolder(); + } + + @Override + public GroupByResultHolder createGroupByResultHolder(int initialCapacity, int maxCapacity) { + return new ObjectGroupByResultHolder(initialCapacity, maxCapacity); + } + + @Override + public void aggregate(int length, AggregationResultHolder aggregationResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MINSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // Ignore null and "null" string literals + if (value == null || "null".equals(value)) { + continue; + } + String currentMin = aggregationResultHolder.getResult(); + // Update the currentMin if a smaller string value is found + if (currentMin == null || value.compareTo(currentMin) < 0) { + aggregationResultHolder.setValue(value); + } + } + }); + } + + @Override + public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MINSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // For SV, "null" as a string literal can exist and needs to be handled + if (value == null || "null".equals(value)) { + continue; + } + int groupKey = groupKeyArray[i]; + String currentMin = groupByResultHolder.getResult(groupKey); + if (currentMin == null || "null".equals(currentMin) || value.compareTo(currentMin) < 0) { + groupByResultHolder.setValueForKey(groupKey, value); + } + } + }); + } + + @Override + public void aggregateGroupByMV(int length, int[][] groupKeysArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MINSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // For MV, "null" as a string literal can exist and needs to be handled + if (value == null || "null".equals(value)) { + continue; + } + for (int groupKey : groupKeysArray[i]) { + String currentMin = groupByResultHolder.getResult(groupKey); + if (currentMin == null || "null".equals(currentMin) || value.compareTo(currentMin) < 0) { Review Comment: The condition `"null".equals(currentMin)` is redundant here. Since the aggregation logic already skips "null" string literals in the input processing (lines 118-120), currentMin should never be "null" at this point. ```suggestion if (currentMin == null || value.compareTo(currentMin) < 0) { ``` ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java: ########## @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.List; +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.exception.BadQueryRequestException; + + +public class MaxStringAggregationFunction extends NullableSingleInputAggregationFunction<String, String> { + + public MaxStringAggregationFunction(List<ExpressionContext> arguments, boolean nullHandlingEnabled) { + super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled); + } + + @Override + public AggregationFunctionType getType() { + return AggregationFunctionType.MAXSTRING; + } + + @Override + public AggregationResultHolder createAggregationResultHolder() { + return new ObjectAggregationResultHolder(); + } + + @Override + public GroupByResultHolder createGroupByResultHolder(int initialCapacity, int maxCapacity) { + return new ObjectGroupByResultHolder(initialCapacity, maxCapacity); + } + + @Override + public void aggregate(int length, AggregationResultHolder aggregationResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // Ignore null and "null" string literals + if (value == null || "null".equals(value)) { + continue; + } + String currentMax = aggregationResultHolder.getResult(); + // Update the currentMax if a larger string value is found + if (currentMax == null || value.compareTo(currentMax) > 0) { + aggregationResultHolder.setValue(value); + } + } + }); + } + + @Override + public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // For SV, "null" as a string literal can exist and needs to be handled + if (value == null || "null".equals(value)) { + continue; + } + int groupKey = groupKeyArray[i]; + String currentMax = groupByResultHolder.getResult(groupKey); + if (currentMax == null || "null".equals(currentMax) || value.compareTo(currentMax) > 0) { + groupByResultHolder.setValueForKey(groupKey, value); + } + } + }); + } + + @Override + public void aggregateGroupByMV(int length, int[][] groupKeysArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + if (blockValSet.getValueType().isNumeric()) { + throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric column: " + + blockValSet.getValueType()); + } + String[] stringValues = blockValSet.getStringValuesSV(); + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + String value = stringValues[i]; + // For MV, "null" as a string literal can exist and needs to be handled + if (value == null || "null".equals(value)) { + continue; + } + for (int groupKey : groupKeysArray[i]) { + String currentMax = groupByResultHolder.getResult(groupKey); + if (currentMax == null || "null".equals(currentMax) || value.compareTo(currentMax) > 0) { Review Comment: The condition `"null".equals(currentMax)` is redundant here. Since the aggregation logic already skips "null" string literals in the input processing (lines 118-120), currentMax should never be "null" at this point. ```suggestion if (currentMax == null || value.compareTo(currentMax) > 0) { ``` ########## pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunctionTest.java: ########## @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.Collections; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.RequestContextUtils; +import org.apache.pinot.queries.FluentQueryTest; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; + + +public class MaxStringAggregationFunctionTest extends AbstractAggregationFunctionTest { + + /** + * Helper method to create a FluentQueryTest builder for a table with a single String field. + * This is used to simulate the DataTypeScenario concept from numeric aggregation tests, + * but fixed for the STRING data type. + */ + protected FluentQueryTest.DeclaringTable getDeclaringTable(boolean enableColumnBasedNullHandling) { + return FluentQueryTest.withBaseDir(_baseDir) + .givenTable( + new Schema.SchemaBuilder() + .setSchemaName("testTable") + .setEnableColumnBasedNullHandling(enableColumnBasedNullHandling) + .addSingleValueDimension("myField", FieldSpec.DataType.STRING) + .build(), SINGLE_FIELD_TABLE_CONFIG); + } + + @Test + public void testFunctionBasics() { + ExpressionContext expression = RequestContextUtils.getExpression("column"); + MaxStringAggregationFunction function = new MaxStringAggregationFunction(Collections.singletonList(expression), + false); + + // Test function type + assertEquals(function.getType(), AggregationFunctionType.MAXSTRING); + + // Test string comparisons + assertEquals(function.merge("apple", "banana"), "banana"); + assertEquals(function.merge("banana", "apple"), "banana"); + assertEquals(function.merge("", "apple"), "apple"); + assertEquals(function.merge("apple", ""), "apple"); + + // Test null handling + assertEquals(function.merge("apple", null), "apple"); + assertEquals(function.merge(null, "apple"), "apple"); + assertNull(function.merge(null, null)); + + // Test final result merging + assertEquals(function.mergeFinalResult("apple", "banana"), "banana"); + } + + @Test + void aggregationAllNullsWithNullHandlingDisabled() { + // For MAXSTRING, when null handling is disabled, and all values are null, + // the result should be 'null' as there's no valid string to compare. + // This differs from numeric MAX/MIN which might return an initial default value. + getDeclaringTable(false) // nullHandlingEnabled = false + .onFirstInstance("myField", + "null", + "null" + ).andOnSecondInstance("myField", + "null" + ).whenQuery("select maxstring(myField) from testTable") + .thenResultIs("STRING", "\"null\""); // Asserting "null" as a string literal for the result + } + + @Test + void aggregationAllNullsWithNullHandlingEnabled() { + // When null handling is enabled, and all values are null, the result should also be 'null'. + getDeclaringTable(true) // nullHandlingEnabled = true + .onFirstInstance("myField", + "null", + "null" + ).andOnSecondInstance("myField", + "null" + ).whenQuery("select maxstring(myField) from testTable") + .thenResultIs("STRING", "\"null\""); // Asserting "null" as a string literal for the result + } + + @Test + void aggregationGroupBySVAllNullsWithNullHandlingDisabled() { + // For group by, if all values in a group are null and null handling is disabled, + // the group's result for MAXSTRING should be 'null'. + getDeclaringTable(false) // nullHandlingEnabled = false + .onFirstInstance("myField", + "null", + "null" + ).andOnSecondInstance("myField", + "null" + ).whenQuery("select 'literal', maxstring(myField) from testTable group by 'literal'") + // Expected "null" as a string literal for the aggregated column + .thenResultIs("STRING | STRING", "literal | \"null\""); + } + + @Test + void aggregationGroupBySVAllNullsWithNullHandlingEnabled() { + // For group by, if all values in a group are null and null handling is enabled, + // the group's result for MAXSTRING should be 'null'. + getDeclaringTable(true) // nullHandlingEnabled = true + .onFirstInstance("myField", + "null", + "null" + ).andOnSecondInstance("myField", + "null" + ).whenQuery("select 'literal', maxstring(myField) from testTable group by 'literal'") + .thenResultIs("STRING | STRING", "literal | \"null\""); + } + + @Test + void aggregationWithNullHandlingDisabled() { + // With null handling disabled, null values are effectively skipped, and the maximum non-null + // string should be found. The updated function handles this correctly. + getDeclaringTable(false) // nullHandlingEnabled = false + .onFirstInstance("myField", + "cat", + "null", + "apple" + ).andOnSecondInstance("myField", + "null", + "zebra", + "null" + ).whenQuery("select maxstring(myField) from testTable") + .thenResultIs("STRING", "zebra"); // Max of {"cat", "apple", "zebra"} is "zebra" + } + + @Test + void aggregationWithNullHandlingEnabled() { + // With null handling enabled, null values are explicitly ignored, and the maximum non-null + // string should be found. The updated function handles this correctly. + getDeclaringTable(true) // nullHandlingEnabled = true + .onFirstInstance("myField", + "cat", + "null", + "apple" + ).andOnSecondInstance("myField", + "null", + "zebra", + "null" + ).whenQuery("select maxstring(myField) from testTable") + .thenResultIs("STRING", "zebra"); // Max of {"cat", "apple", "zebra"} is "zebra" + } + + @Test + void aggregationGroupBySVWithNullHandlingDisabled() { + // Group By on a single value (SV) column with mixed nulls and non-nulls. + // Null handling disabled: nulls are ignored if there's at least one non-null value in the group. + // The updated function should now correctly find the max among non-nulls. + getDeclaringTable(false) // nullHandlingEnabled = false + .onFirstInstance("myField", + "alpha", // Grouped with 'literal' + "null", // Grouped with 'literal' + "gamma" // Grouped with 'literal' + ).andOnSecondInstance("myField", + "null", // Grouped with 'literal' + "beta", // Grouped with 'literal' + "null" // Grouped with 'literal' + ).whenQuery("select 'literal', maxstring(myField) from testTable group by 'literal'") + .thenResultIs("STRING | STRING", "literal | gamma"); // Max of {"alpha", "gamma", "beta"} is "gamma" + } + + @Test + void aggregationGroupBySVWithNullHandlingEnabled() { + // Group By on a single value (SV) column with mixed nulls and non-nulls. + // Null handling enabled: nulls are ignored. + // The updated function should now correctly find the max among non-nulls. + getDeclaringTable(true) // nullHandlingEnabled = true + .onFirstInstance("myField", + "alpha", // Grouped with 'literal' + "null", // Grouped with 'literal' + "gamma" // Grouped with 'literal' + ).andOnSecondInstance("myField", + "null", // Grouped with 'literal' + "beta", // Grouped with 'literal' + "null" // Grouped with 'literal' + ).whenQuery("select 'literal', maxstring(myField) from testTable group by 'literal'") + .thenResultIs("STRING | STRING", "literal | gamma"); // Max of {"alpha", "gamma", "beta"} is "gamma" + } + + @Test + void aggregationGroupByMV() { + FluentQueryTest.withBaseDir(_baseDir) + .givenTable( + new Schema.SchemaBuilder() + .setSchemaName("testTable") + .setEnableColumnBasedNullHandling(true) // Set at schema level for general behavior + .addMultiValueDimension("tags", FieldSpec.DataType.STRING) // Dimension for tags + .addDimensionField("value", FieldSpec.DataType.STRING) + .build(), SINGLE_FIELD_TABLE_CONFIG) + .onFirstInstance( + new Object[]{"tag1;tag2", "banana"}, // Row 1: tag1 -> "banana", tag2 -> "banana" + new Object[]{"tag2;tag3", null} // Row 2: tag2 -> null, tag3 -> null + ) + .andOnSecondInstance( + new Object[]{"tag1;tag2", "apple"}, // Row 3: tag1 -> "apple", tag2 -> "apple" + new Object[]{"tag2;tag3", "cherry"} // Row 4: tag2 -> "cherry", tag3 -> "cherry" + ) + // Query without explicit null handling enabled via query option (uses table schema setting or default) + .whenQuery("select tags, MAXSTRING(value) from testTable group by tags order by tags") + .thenResultIs( + "STRING | STRING", + "tag1 | banana", // Values for tag1: "banana", "apple". Max is "banana". + "tag2 | cherry", Review Comment: The comment on line 225 is incomplete. It should specify what values are being aggregated for tag2, similar to the comments for tag1 and tag3. ```suggestion "tag2 | cherry", // Values for tag2: "banana", null, "apple", "cherry". Max is "cherry". ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
