This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d36560fd4b8 [SPARK-53422][SPARK-30269][SQL][TEST] Make test case 
robust
6d36560fd4b8 is described below

commit 6d36560fd4b81721fb1903f7e82c06d5faa90742
Author: Cheng Pan <[email protected]>
AuthorDate: Mon Sep 1 10:25:41 2025 +0800

    [SPARK-53422][SPARK-30269][SQL][TEST] Make test case robust
    
    ### What changes were proposed in this pull request?
    
    I saw the test failure when trying to upgrade Parquet to 1.16.0, actually, 
this occurs many times in previous Parquet version upgrades, we should not 
assume that Parquet files contain the same records have a fixed size, as it 
might vary in each version.
    
    ```
    [info] - SPARK-30269 failed to update partition stats if it's equal to 
table's old stats *** FAILED *** (374 milliseconds)
    [info]   666 did not equal 690 (StatisticsSuite.scala:1623)
    ```
    
    Here we get the `expectedSize` from the table stats.
    
    ### Why are the changes needed?
    
    Make the test robust.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    ```
    $ build/sbt -Phive "hive/testOnly *StatisticsSuite -- -z SPARK-30269"
    [info] StatisticsSuite:
    [info] - SPARK-30269 failed to update partition stats if it's equal to 
table's old stats (9 seconds, 525 milliseconds)
    [info] Run completed in 13 seconds, 519 milliseconds.
    [info] Total number of tests run: 1
    [info] Suites: completed 1, aborted 0
    [info] Tests: succeeded 1, failed 0, canceled 0, ignored 0, pending 0
    [info] All tests passed.
    [success] Total time: 179 s (02:59), completed Aug 29, 2025, 2:58:44 AM
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #52168 from pan3793/SPARK-53422.
    
    Authored-by: Cheng Pan <[email protected]>
    Signed-off-by: Kent Yao <[email protected]>
---
 .../apache/spark/sql/hive/StatisticsSuite.scala    | 70 +++++++++++-----------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 2af4d01fcfb8..80a213c9466b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -1602,42 +1602,44 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
     val tbl = "SPARK_30269"
     val ext_tbl = "SPARK_30269_external"
     withTempDir { dir =>
-      withTable(tbl, ext_tbl) {
-        sql(s"CREATE TABLE $tbl (key INT, value STRING, ds STRING)" +
-          "USING parquet PARTITIONED BY (ds)")
-        sql(
-          s"""
-             | CREATE TABLE $ext_tbl (key INT, value STRING, ds STRING)
-             | USING PARQUET
-             | PARTITIONED BY (ds)
-             | LOCATION '${dir.toURI}'
+      withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "false") {
+        withTable(tbl, ext_tbl) {
+          sql(s"CREATE TABLE $tbl (key INT, value STRING, ds STRING)" +
+            "USING parquet PARTITIONED BY (ds)")
+          sql(
+            s"""
+               | CREATE TABLE $ext_tbl (key INT, value STRING, ds STRING)
+               | USING PARQUET
+               | PARTITIONED BY (ds)
+               | LOCATION '${dir.toURI}'
            """.stripMargin)
 
-        Seq(tbl, ext_tbl).foreach { tblName =>
-          sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
-
-          val expectedSize = 690
-          // analyze table
-          sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
-          var tableStats = getTableStats(tblName)
-          assert(tableStats.sizeInBytes == expectedSize)
-          assert(tableStats.rowCount.isEmpty)
-
-          sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS")
-          tableStats = getTableStats(tblName)
-          assert(tableStats.sizeInBytes == expectedSize)
-          assert(tableStats.rowCount.get == 1)
-
-          // analyze a single partition
-          sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE 
STATISTICS NOSCAN")
-          var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
-          assert(partStats.sizeInBytes == expectedSize)
-          assert(partStats.rowCount.isEmpty)
-
-          sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE 
STATISTICS")
-          partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
-          assert(partStats.sizeInBytes == expectedSize)
-          assert(partStats.rowCount.get == 1)
+          Seq(tbl, ext_tbl).foreach { tblName =>
+            sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
+            assert(getCatalogTable(tblName).stats.isEmpty)
+
+            // analyze table
+            sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
+            var tableStats = getTableStats(tblName)
+            val expectedSize = tableStats.sizeInBytes
+            assert(tableStats.rowCount.isEmpty)
+
+            sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS")
+            tableStats = getTableStats(tblName)
+            assert(tableStats.sizeInBytes == expectedSize)
+            assert(tableStats.rowCount.get == 1)
+
+            // analyze a single partition
+            sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE 
STATISTICS NOSCAN")
+            var partStats = getPartitionStats(tblName, Map("ds" -> 
"2019-12-13"))
+            assert(partStats.sizeInBytes == expectedSize)
+            assert(partStats.rowCount.isEmpty)
+
+            sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE 
STATISTICS")
+            partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
+            assert(partStats.sizeInBytes == expectedSize)
+            assert(partStats.rowCount.get == 1)
+          }
         }
       }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to