comphead commented on code in PR #4003:
URL: https://github.com/apache/datafusion-comet/pull/4003#discussion_r3150573805
##########
spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala:
##########
@@ -650,6 +650,87 @@ class CometAggregateSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
+ // FIRST/LAST are order-dependent aggregates whose merge result depends on
hash table
+ // processing order. In PartialMerge mode, DataFusion's hash table may
process rows
+ // in a different order than Spark's, so we fall back to Spark for
correctness.
+ test("partialMerge - FIRST/LAST with distinct aggregates falls back") {
+ val numValues = 10000
+ Seq(100).foreach { numGroups =>
+ Seq(128).foreach { batchSize =>
+ withSQLConf(
+ SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
+ CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+ withParquetTable(
+ (0 until numValues).map(i => (i, Random.nextInt() % numGroups)),
+ "tbl",
+ false) {
+ withView("v") {
+ sql("CREATE TEMP VIEW v AS SELECT _1, _2 FROM tbl ORDER BY _1")
+// checkSparkAnswerAndFallbackReason(
Review Comment:
oh this is a good catch, FIRST/LAST should correctly fallback
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]