parthchandra commented on code in PR #3611:
URL: https://github.com/apache/datafusion-comet/pull/3611#discussion_r3030404196


##########
spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala:
##########
@@ -930,6 +930,82 @@ class CometArrayExpressionSuite extends CometTestBase with 
AdaptiveSparkPlanHelp
     }
   }
 
+  test("array_exists - DataFrame API") {

Review Comment:
   Could we add a condition for three valued logic to be true or false? It's a 
legacy feature - ` SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC` 
(`spark.sql.legacy.followThreeValuedLogicInArrayExists`) so not sure whether we 
want to spend too much effort on it. 



##########
spark/src/main/scala/org/apache/comet/serde/arrays.scala:
##########
@@ -622,6 +622,80 @@ object CometArrayFilter extends 
CometExpressionSerde[ArrayFilter] {
   }
 }
 
+object CometArrayExists extends CometExpressionSerde[ArrayExists] {
+
+  override def getSupportLevel(expr: ArrayExists): SupportLevel = {
+    val elementType = 
expr.argument.dataType.asInstanceOf[ArrayType].elementType
+    elementType match {
+      case BooleanType | ByteType | ShortType | IntegerType | LongType | 
FloatType | DoubleType |
+          _: DecimalType | DateType | TimestampType | TimestampNTZType | 
StringType =>
+        Compatible()
+      case _ => Unsupported(Some(s"element type not supported: $elementType"))
+    }
+  }
+
+  override def convert(
+      expr: ArrayExists,
+      inputs: Seq[Attribute],
+      binding: Boolean): Option[ExprOuterClass.Expr] = {
+    val arrayExprProto = exprToProto(expr.argument, inputs, binding)
+    if (arrayExprProto.isEmpty) {
+      withInfo(expr, expr.argument)
+      return None
+    }
+
+    expr.function match {
+      case LambdaFunction(body, Seq(_: NamedLambdaVariable), _) =>

Review Comment:
   Do we need a fallback for cases where we cannot support a particular lambda? 



##########
spark/src/main/scala/org/apache/comet/serde/arrays.scala:
##########
@@ -622,6 +622,80 @@ object CometArrayFilter extends 
CometExpressionSerde[ArrayFilter] {
   }
 }
 
+object CometArrayExists extends CometExpressionSerde[ArrayExists] {
+
+  override def getSupportLevel(expr: ArrayExists): SupportLevel = {
+    val elementType = 
expr.argument.dataType.asInstanceOf[ArrayType].elementType
+    elementType match {
+      case BooleanType | ByteType | ShortType | IntegerType | LongType | 
FloatType | DoubleType |
+          _: DecimalType | DateType | TimestampType | TimestampNTZType | 
StringType =>
+        Compatible()
+      case _ => Unsupported(Some(s"element type not supported: $elementType"))

Review Comment:
   Do complex types make sense as element type for `array_exists`?



##########
spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala:
##########
@@ -930,6 +930,82 @@ class CometArrayExpressionSuite extends CometTestBase with 
AdaptiveSparkPlanHelp
     }
   }
 
+  test("array_exists - DataFrame API") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"create table $table(arr array<int>, threshold int) using parquet")
+      sql(s"insert into $table values (array(1, 2, 3), 2)")
+      sql(s"insert into $table values (array(1, 2), 5)")
+      sql(s"insert into $table values (array(), 0)")
+      sql(s"insert into $table values (null, 1)")
+      sql(s"insert into $table values (array(1, null, 3), 2)")
+
+      val df = spark.table(table)
+
+      checkSparkAnswerAndOperator(df.select(exists(col("arr"), x => x > 2)))
+      checkSparkAnswerAndOperator(df.select(exists(col("arr"), x => x > 
col("threshold"))))
+      checkSparkAnswerAndOperator(
+        df.select(
+          exists(col("arr"), x => x > 0).as("any_positive"),
+          exists(col("arr"), x => x > 100).as("any_large")))
+    }
+  }
+
+  test("array_exists - DataFrame API with decimal") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"create table $table(arr array<decimal(10,2)>) using parquet")
+      sql(s"insert into $table values (array(1.50, 2.75, 3.25))")
+      sql(s"insert into $table values (array(0.10, 0.20))")
+
+      val df = spark.table(table)
+      checkSparkAnswerAndOperator(df.select(exists(col("arr"), x => x > 2.0)))
+    }
+  }
+
+  test("array_exists - DataFrame API with date") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"create table $table(arr array<date>) using parquet")
+      sql(s"insert into $table values (array(date'2024-01-01', 
date'2024-06-15'))")
+      sql(s"insert into $table values (array(date'2023-01-01'))")
+
+      val df = spark.table(table)
+      checkSparkAnswerAndOperator(
+        df.select(exists(col("arr"), x => x > lit("2024-03-01").cast("date"))))

Review Comment:
   Can we add tests for a literal false, and literal null (i.e. always false, 
and always null)?



##########
spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala:
##########
@@ -930,6 +930,82 @@ class CometArrayExpressionSuite extends CometTestBase with 
AdaptiveSparkPlanHelp
     }
   }
 
+  test("array_exists - DataFrame API") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"create table $table(arr array<int>, threshold int) using parquet")
+      sql(s"insert into $table values (array(1, 2, 3), 2)")
+      sql(s"insert into $table values (array(1, 2), 5)")
+      sql(s"insert into $table values (array(), 0)")
+      sql(s"insert into $table values (null, 1)")
+      sql(s"insert into $table values (array(1, null, 3), 2)")
+
+      val df = spark.table(table)
+
+      checkSparkAnswerAndOperator(df.select(exists(col("arr"), x => x > 2)))
+      checkSparkAnswerAndOperator(df.select(exists(col("arr"), x => x > 
col("threshold"))))
+      checkSparkAnswerAndOperator(
+        df.select(
+          exists(col("arr"), x => x > 0).as("any_positive"),
+          exists(col("arr"), x => x > 100).as("any_large")))
+    }
+  }
+
+  test("array_exists - DataFrame API with decimal") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"create table $table(arr array<decimal(10,2)>) using parquet")
+      sql(s"insert into $table values (array(1.50, 2.75, 3.25))")
+      sql(s"insert into $table values (array(0.10, 0.20))")
+
+      val df = spark.table(table)
+      checkSparkAnswerAndOperator(df.select(exists(col("arr"), x => x > 2.0)))
+    }
+  }
+
+  test("array_exists - DataFrame API with date") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"create table $table(arr array<date>) using parquet")
+      sql(s"insert into $table values (array(date'2024-01-01', 
date'2024-06-15'))")
+      sql(s"insert into $table values (array(date'2023-01-01'))")
+
+      val df = spark.table(table)
+      checkSparkAnswerAndOperator(
+        df.select(exists(col("arr"), x => x > lit("2024-03-01").cast("date"))))

Review Comment:
   Nice test!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to