This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 70e10de6bdae [SPARK-55626][SQL] Don't load metadata columns on Table 
unless needed in V2TableUtil
70e10de6bdae is described below

commit 70e10de6bdae817d0fb3baba57bef85577b1e10c
Author: Anton Okolnychyi <[email protected]>
AuthorDate: Sun Feb 22 21:41:53 2026 -0800

    [SPARK-55626][SQL] Don't load metadata columns on Table unless needed in 
V2TableUtil
    
    ### What changes were proposed in this pull request?
    
    This PR prevents loading metadata columns on Table unless needed in 
`V2TableUtil`.
    
    ### Why are the changes needed?
    
    These changed are needed to prevent unnecessary load of metadata columns. 
In some cases, accessing the metadata columns can lead to exceptions on the 
connector side as detected in Iceberg. Spark should only ask for metadata 
columns if they were projected.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    This patch comes with tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #54416 from aokolnychyi/spark-55626.
    
    Authored-by: Anton Okolnychyi <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/sql/connector/catalog/V2TableUtil.scala  |  2 +-
 .../sql/connector/catalog/V2TableUtilSuite.scala   | 30 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala
index 42181c6c8389..c7f7b17a5843 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala
@@ -95,7 +95,7 @@ private[sql] object V2TableUtil extends SQLConfHelper {
    */
   def extractMetadataColumns(relation: DataSourceV2Relation): 
Seq[MetadataColumn] = {
     val metaAttrNames = relation.output.filter(_.isMetadataCol).map(_.name)
-    filter(metaAttrNames, metadataColumns(relation.table))
+    if (metaAttrNames.isEmpty) Nil else filter(metaAttrNames, 
metadataColumns(relation.table))
   }
 
   /**
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala
index a9e5668d7fef..77167d9a5657 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala
@@ -392,6 +392,25 @@ class V2TableUtilSuite extends SparkFunSuite {
     assert(errors.isEmpty)
   }
 
+  test("extractMetadataColumns - doesn't access table metadata unless needed") 
{
+    val dataCols = Array(
+      col("id", LongType, nullable = true),
+      col("name", StringType, nullable = true))
+
+    val throwingTable = TestTableThatThrowsOnMetadataAccess("test", dataCols)
+
+    val dataAttrs = dataCols.map(c => AttributeReference(c.name, c.dataType, 
c.nullable)())
+    val relation = DataSourceV2Relation(
+      throwingTable,
+      dataAttrs.toImmutableArraySeq,
+      None,
+      None,
+      CaseInsensitiveStringMap.empty())
+
+    val extractedMetaColumns = V2TableUtil.extractMetadataColumns(relation)
+    assert(extractedMetaColumns.isEmpty)
+  }
+
   test("validateCapturedColumns - array element type changed") {
     val originCols = Array(
       col("id", LongType, nullable = true),
@@ -627,6 +646,17 @@ class V2TableUtilSuite extends SparkFunSuite {
     override def capabilities: util.Set[TableCapability] = 
util.Set.of(BATCH_READ)
   }
 
+  // table that throws when metadataColumns is accessed
+  private case class TestTableThatThrowsOnMetadataAccess(
+      override val name: String,
+      override val columns: Array[Column])
+      extends Table with SupportsMetadataColumns {
+    override def capabilities: util.Set[TableCapability] = 
util.Set.of(BATCH_READ)
+    override lazy val metadataColumns: Array[MetadataColumn] = {
+      throw new RuntimeException("metadataColumns should not be accessed")
+    }
+  }
+
   private case class TestMetadataColumn(
       override val name: String,
       override val dataType: DataType,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to