This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 27a0d27b7acb [SPARK-54029][SQL] Add detailed error message for table 
metadata corruption to ease debugging
27a0d27b7acb is described below

commit 27a0d27b7acbccb9883eaf1660aaf83a5c0eb94a
Author: Ganesha S <[email protected]>
AuthorDate: Thu Oct 30 07:58:43 2025 +0800

    [SPARK-54029][SQL] Add detailed error message for table metadata corruption 
to ease debugging
    
    ### What changes were proposed in this pull request?
    
    Add detailed error message for table metadata corruption to ease debugging
    
    ### Why are the changes needed?
    
    Currently, no meaningful error message appears when the table metadata is 
corrupted due to a mismatch between the partition column names in the table 
schema and the declared partition columns. To facilitate debugging, a detailed 
error message should be shown in this situation.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Added unit test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #52731 from ganeshashree/SPARK-54029.
    
    Authored-by: Ganesha S <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../spark/sql/catalyst/catalog/interface.scala     | 10 ++-
 .../sql/catalyst/catalog/SessionCatalogSuite.scala | 75 ++++++++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index d824e090b45d..64d816587f4d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -451,7 +451,15 @@ case class CatalogTable(
    */
   def partitionSchema: StructType = {
     val partitionFields = schema.takeRight(partitionColumnNames.length)
-    assert(partitionFields.map(_.name) == partitionColumnNames)
+    val actualPartitionColumnNames = partitionFields.map(_.name)
+
+    assert(actualPartitionColumnNames == partitionColumnNames,
+      "Corrupted table metadata detected for table " + identifier.quotedString 
+ ". " +
+      "The partition column names in the table schema " +
+      "do not match the declared partition columns. " +
+      "Table schema columns: [" + schema.fieldNames.mkString(", ") + "] " +
+      "Declared partition columns: [" + partitionColumnNames.mkString(", ") + 
"]. " +
+      "This indicates corrupted table metadata that needs to be repaired.")
 
     StructType(partitionFields)
   }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index b9d956c3c2a5..300e5a8653f1 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1951,4 +1951,79 @@ abstract class SessionCatalogSuite extends AnalysisTest 
with Eventually {
       assert(catalog.getCachedTable(qualifiedName2) != null)
     }
   }
+
+  test("CatalogTable partitionSchema provides detailed error for corrupted 
metadata") {
+    // Test case 1: Partition columns don't match schema (wrong names)
+    val corruptedTable1 = CatalogTable(
+      identifier = TableIdentifier("corrupted_table1", Some("test_db")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("name", StringType),
+        StructField("year", IntegerType),
+        StructField("month", IntegerType)
+      )),
+      partitionColumnNames = Seq("year", "day")  // "day" doesn't exist in 
schema
+    )
+
+    val exception1 = intercept[AssertionError] {
+      corruptedTable1.partitionSchema
+    }
+
+    val expectedMessage1 = "assertion failed: Corrupted table metadata 
detected " +
+      "for table `test_db`.`corrupted_table1`. " +
+      "The partition column names in the table schema " +
+      "do not match the declared partition columns. " +
+      "Table schema columns: [id, name, year, month] " +
+      "Declared partition columns: [year, day]. " +
+      "This indicates corrupted table metadata that needs to be repaired."
+    assert(exception1.getMessage === expectedMessage1)
+
+    // Test case 2: Partition columns are not at the end of schema
+    val corruptedTable2 = CatalogTable(
+      identifier = TableIdentifier("corrupted_table2", Some("test_db")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = StructType(Seq(
+        StructField("year", IntegerType),
+        StructField("id", IntegerType),
+        StructField("name", StringType),
+        StructField("month", IntegerType)
+      )),
+      partitionColumnNames = Seq("year", "month")
+    )
+
+    val exception2 = intercept[AssertionError] {
+      corruptedTable2.partitionSchema
+    }
+
+    val expectedMessage2 = "assertion failed: Corrupted table metadata 
detected " +
+      "for table `test_db`.`corrupted_table2`. " +
+      "The partition column names in the table schema " +
+      "do not match the declared partition columns. " +
+      "Table schema columns: [year, id, name, month] " +
+      "Declared partition columns: [year, month]. " +
+      "This indicates corrupted table metadata that needs to be repaired."
+    assert(exception2.getMessage === expectedMessage2)
+
+    // Test case 3: Valid table should work without error
+    val validTable = CatalogTable(
+      identifier = TableIdentifier("valid_table", Some("test_db")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("name", StringType),
+        StructField("year", IntegerType),
+        StructField("month", IntegerType)
+      )),
+      partitionColumnNames = Seq("year", "month")  // Matches schema
+    )
+
+    // This should not throw an exception
+    val partitionSchema = validTable.partitionSchema
+    assert(partitionSchema.fieldNames.toSeq == Seq("year", "month"))
+    assert(partitionSchema.fields.length == 2)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to