huaxingao commented on code in PR #13167:
URL: https://github.com/apache/iceberg/pull/13167#discussion_r2115232169


##########
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestStoragePartitionedJoins.java:
##########
@@ -549,6 +555,88 @@ public void testJoinsWithMismatchingPartitionKeys() {
         tableName(OTHER_TABLE_NAME));
   }
 
+  @TestTemplate
+  public void testJoinsCompatibleBucketNumbers() {
+    sql(
+        "CREATE TABLE %s (id BIGINT, int_col INT, dep STRING)"
+            + "USING iceberg "
+            + "PARTITIONED BY (bucket(4, id))"
+            + "TBLPROPERTIES (%s)",
+        tableName, tablePropsAsString(TABLE_PROPERTIES));
+
+    sql("INSERT INTO %s VALUES (1L, 100, 'software')", tableName);
+    sql("INSERT INTO %s VALUES (2L, 101, 'hr')", tableName);
+    sql("INSERT INTO %s VALUES (3L, 102, 'operation')", tableName);
+    sql("INSERT INTO %s VALUES (4L, 103, 'sales')", tableName);
+    sql("INSERT INTO %s VALUES (5L, 104, 'marketing')", tableName);
+    sql("INSERT INTO %s VALUES (6L, 105, 'pr')", tableName);
+
+    sql(
+        "CREATE TABLE %s (id BIGINT, int_col INT, dep STRING)"
+            + "USING iceberg "
+            + "PARTITIONED BY (bucket(6, id))"
+            + "TBLPROPERTIES (%s)",
+        tableName(OTHER_TABLE_NAME), tablePropsAsString(TABLE_PROPERTIES));
+
+    sql("INSERT INTO %s VALUES (1L, 100, 'software')", 
tableName(OTHER_TABLE_NAME));
+    sql("INSERT INTO %s VALUES (3L, 300, 'hardware')", 
tableName(OTHER_TABLE_NAME));
+    sql("INSERT INTO %s VALUES (4L, 103, 'sales')", tableName);
+    sql("INSERT INTO %s VALUES (5L, 104, 'marketing')", tableName);
+    sql("INSERT INTO %s VALUES (6L, 105, 'pr')", tableName);
+
+    assertPartitioningAwarePlan(
+        1, /* expected num of shuffles with SPJ */
+        3, /* expected num of shuffles without SPJ */
+        "SELECT * "
+            + "FROM %s t1 "
+            + "INNER JOIN %s t2 "
+            + "ON t1.id = t2.id "
+            + "ORDER BY t1.id, t1.int_col, t1.dep, t2.id, t2.int_col, t2.dep",
+        tableName,
+        tableName(OTHER_TABLE_NAME));
+  }
+
+  @TestTemplate
+  public void testJoinsIncompatibleBucketNumbers() {
+    sql(
+        "CREATE TABLE %s (id BIGINT, int_col INT, dep STRING)"
+            + "USING iceberg "
+            + "PARTITIONED BY (bucket(3, id))"
+            + "TBLPROPERTIES (%s)",
+        tableName, tablePropsAsString(TABLE_PROPERTIES));
+
+    sql("INSERT INTO %s VALUES (1L, 100, 'software')", tableName);
+    sql("INSERT INTO %s VALUES (2L, 101, 'hr')", tableName);
+    sql("INSERT INTO %s VALUES (3L, 102, 'operation')", tableName);
+    sql("INSERT INTO %s VALUES (4L, 103, 'sales')", tableName);
+    sql("INSERT INTO %s VALUES (5L, 104, 'marketing')", tableName);
+    sql("INSERT INTO %s VALUES (6L, 105, 'pr')", tableName);
+
+    sql(
+        "CREATE TABLE %s (id BIGINT, int_col INT, dep STRING)"
+            + "USING iceberg "
+            + "PARTITIONED BY (bucket(5, id))"
+            + "TBLPROPERTIES (%s)",
+        tableName(OTHER_TABLE_NAME), tablePropsAsString(TABLE_PROPERTIES));
+
+    sql("INSERT INTO %s VALUES (1L, 100, 'software')", 
tableName(OTHER_TABLE_NAME));
+    sql("INSERT INTO %s VALUES (3L, 300, 'hardware')", 
tableName(OTHER_TABLE_NAME));
+    sql("INSERT INTO %s VALUES (4L, 103, 'sales')", tableName);

Review Comment:
   Is this for the second table?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to