(spark) branch master updated: [SPARK-54720][SQL] Add SparkSession.emptyDataFrame with a schema

hvanhovell Fri, 19 Dec 2025 06:04:05 -0800

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 59977a84257e [SPARK-54720][SQL] Add SparkSession.emptyDataFrame with a 
schema
59977a84257e is described below

commit 59977a84257e3009eff856e06b60e6eb0890b97a
Author: Herman van Hövell <[email protected]>
AuthorDate: Fri Dec 19 10:03:27 2025 -0400

    [SPARK-54720][SQL] Add SparkSession.emptyDataFrame with a schema
    
    ### What changes were proposed in this pull request?
    This PR adds a version of `SparkSession.emptyDataFrame` that takes a schema.
    
    ### Why are the changes needed?
    It makes it easier to create an empty DataFrame in Scala.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, it adds a new API.
    
    ### How was this patch tested?
    I have added a test case.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #53489 from hvanhovell/SPARK-54720.
    
    Authored-by: Herman van Hövell <[email protected]>
    Signed-off-by: Herman van Hövell <[email protected]>
---
 .../src/main/scala/org/apache/spark/sql/SparkSession.scala |  8 +++++++-
 .../SparkSessionBuilderImplementationBindingSuite.scala    | 14 ++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 8e7ae51e998f..9c6e3bdb9078 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -210,9 +210,15 @@ abstract class SparkSession extends Serializable with 
Closeable {
    *
    * @since 2.0.0
    */
-  @transient
   def emptyDataFrame: DataFrame
 
+  /**
+   * Returns a `DataFrame` with schema `schema` and no rows.
+   *
+   * @since 4.2.0
+   */
+  def emptyDataFrame(schema: StructType): DataFrame = 
emptyDataset(Encoders.row(schema))
+
   /**
    * Creates a `DataFrame` from a local Seq of Product.
    *
diff --git 
a/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
 
b/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
index 57eddd1bc69f..95c82cbdbdb6 100644
--- 
a/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
+++ 
b/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.funsuite.AnyFunSuite
 
 import org.apache.spark.SparkContext
 import org.apache.spark.sql.functions.{max, sum}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, 
StructType}
 
 /**
  * Test suite for SparkSession implementation binding.
@@ -70,4 +71,17 @@ trait SparkSessionBuilderImplementationBindingSuite
     val df = ctx.createDataset(1 to 11).select(max("value").as[Long])
     assert(df.head() == 11)
   }
+
+  test("emptyDataFrame with Schema") {
+    val session = SparkSession.builder().getOrCreate()
+    val schema =
+      new StructType(Array(StructField("a", IntegerType), StructField("b", 
StringType)))
+    val df = session.emptyDataFrame(schema)
+    assert(df.schema == schema)
+    assert(df.isEmpty)
+    val derivedSchema = new StructType(Array(StructField("a", IntegerType)))
+    val derivedDf = df.select("a")
+    assert(derivedDf.schema == derivedSchema)
+    assert(derivedDf.isEmpty)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-54720][SQL] Add SparkSession.emptyDataFrame with a schema

Reply via email to