hsiang-c commented on code in PR #3986:
URL: https://github.com/apache/datafusion-comet/pull/3986#discussion_r3148735738


##########
spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala:
##########
@@ -54,21 +54,119 @@ class CometJoinSuite extends CometTestBase {
         .toSeq)
   }
 
-  test("SortMergeJoin with unsupported key type should fall back to Spark") {
+  test("SortMergeJoin with TimestampType key runs natively") {
     withSQLConf(
       SQLConf.SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu",
       SQLConf.ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.PREFER_SORTMERGEJOIN.key -> "true") {
       withTable("t1", "t2") {
         sql("CREATE TABLE t1(name STRING, time TIMESTAMP) USING PARQUET")
-        sql("INSERT OVERWRITE t1 VALUES('a', timestamp'2019-01-01 11:11:11')")
+        sql(
+          "INSERT OVERWRITE t1 VALUES " +
+            "('a', timestamp'2019-01-01 11:11:11'), " +
+            "('b', timestamp'2020-05-05 05:05:05')")
 
         sql("CREATE TABLE t2(name STRING, time TIMESTAMP) USING PARQUET")
-        sql("INSERT OVERWRITE t2 VALUES('a', timestamp'2019-01-01 11:11:11')")
+        sql(
+          "INSERT OVERWRITE t2 VALUES " +
+            "('a', timestamp'2019-01-01 11:11:11'), " +
+            "('c', timestamp'2021-07-07 07:07:07')")
+
+        checkSparkAnswerAndOperator(
+          sql("SELECT * FROM t1 JOIN t2 ON t1.time = t2.time"),
+          Seq(classOf[CometSortMergeJoinExec]))
+      }
+    }
+  }
+
+  test("SortMergeJoin with TimestampType key supports outer joins") {
+    withSQLConf(
+      SQLConf.SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu",
+      SQLConf.ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.PREFER_SORTMERGEJOIN.key -> "true") {
+      withTable("t1", "t2") {
+        sql("CREATE TABLE t1(id INT, time TIMESTAMP) USING PARQUET")
+        sql(
+          "INSERT OVERWRITE t1 VALUES " +
+            "(1, timestamp'2019-01-01 11:11:11'), " +
+            "(2, timestamp'2020-05-05 05:05:05'), " +
+            "(3, timestamp'2021-07-07 07:07:07')")
+
+        sql("CREATE TABLE t2(id INT, time TIMESTAMP) USING PARQUET")
+        sql(
+          "INSERT OVERWRITE t2 VALUES " +
+            "(10, timestamp'2019-01-01 11:11:11'), " +
+            "(20, timestamp'2022-02-02 02:02:02')")
+
+        for (joinType <- Seq("LEFT OUTER", "RIGHT OUTER", "FULL OUTER")) {

Review Comment:
   👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to