This is an automated email from the ASF dual-hosted git repository.

ashrigondekar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d18fe1927f1 [SPARK-53973][AVRO] Classify errors for AvroOptions 
boolean casting failure
3d18fe1927f1 is described below

commit 3d18fe1927f140b7f2429c7b88e5156a6e9155d7
Author: Siying Dong <[email protected]>
AuthorDate: Tue Oct 21 18:07:15 2025 -0700

    [SPARK-53973][AVRO] Classify errors for AvroOptions boolean casting failure
    
    ### What changes were proposed in this pull request?
    When an option of AvroOptions requires boolean value, but a value that 
cannot be casted to boolean is passed in, classify the error.
    
    ### Why are the changes needed?
    The error should be classified to have better user experience.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Added a unit test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52686 from siying/avro_boolean_option.
    
    Authored-by: Siying Dong <[email protected]>
    Signed-off-by: Anish Shrigondekar <[email protected]>
---
 .../org/apache/spark/sql/avro/AvroSuite.scala      | 20 +++++++++++++++++
 .../org/apache/spark/sql/avro/AvroOptions.scala    | 25 ++++++++++++++++------
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git 
a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala 
b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index c78bc6212662..0b3823ca1616 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -3094,6 +3094,26 @@ abstract class AvroSuite
     assert(AvroOptions.isValidOption("recursiveFieldMaxDepth"))
   }
 
+  test("SPARK-53973: boolean Avro options reject non-boolean values") {
+    Seq(
+      AvroOptions.STABLE_ID_FOR_UNION_TYPE,
+      AvroOptions.POSITIONAL_FIELD_MATCHING,
+      AvroOptions.IGNORE_EXTENSION
+    ).foreach { opt =>
+      val e = intercept[AnalysisException] {
+        AvroOptions(Map(opt -> "not_bool"))
+      }
+      checkError(
+        exception = e,
+        condition = "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE",
+        parameters = Map(
+          "optionName" -> opt,
+          "message" -> "Cannot cast value 'not_bool' to Boolean."
+        )
+      )
+    }
+  }
+
   test("SPARK-46633: read file with empty blocks") {
     for (maxPartitionBytes <- Seq(100, 100000, 100000000)) {
       withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> 
s"$maxPartitionBytes") {
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
index d358f2fc0a76..967a21613791 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
@@ -41,6 +41,21 @@ private[sql] class AvroOptions(
 
   import AvroOptions._
 
+  private def parseBoolean(optionName: String, value: String): Boolean = {
+    try {
+      value.toBoolean
+    } catch {
+      case _: IllegalArgumentException =>
+        throw QueryCompilationErrors.avroOptionsException(
+          optionName,
+          s"Cannot cast value '$value' to Boolean.")
+    }
+  }
+
+  private def getBoolean(optionName: String, defaultValue: => Boolean): 
Boolean = {
+    parameters.get(optionName).map(v => parseBoolean(optionName, 
v)).getOrElse(defaultValue)
+  }
+
   def this(parameters: Map[String, String], conf: Configuration) = {
     this(CaseInsensitiveMap(parameters), conf)
   }
@@ -78,8 +93,7 @@ private[sql] class AvroOptions(
    * name. This allows for a structurally equivalent Catalyst schema to be 
used with an Avro schema
    * whose field names do not match. Defaults to false.
    */
-  val positionalFieldMatching: Boolean =
-    parameters.get(POSITIONAL_FIELD_MATCHING).exists(_.toBoolean)
+  val positionalFieldMatching: Boolean = getBoolean(POSITIONAL_FIELD_MATCHING, 
defaultValue = false)
 
   /**
    * Top level record name in write result, which is required in Avro spec.
@@ -107,10 +121,7 @@ private[sql] class AvroOptions(
       AvroFileFormat.IgnoreFilesWithoutExtensionProperty,
       ignoreFilesWithoutExtensionByDefault)
 
-    parameters
-      .get(IGNORE_EXTENSION)
-      .map(_.toBoolean)
-      .getOrElse(!ignoreFilesWithoutExtension)
+    getBoolean(IGNORE_EXTENSION, defaultValue = !ignoreFilesWithoutExtension)
   }
 
   /**
@@ -134,7 +145,7 @@ private[sql] class AvroOptions(
     .getOrElse(SQLConf.get.getConf(SQLConf.AVRO_REBASE_MODE_IN_READ).toString)
 
   val useStableIdForUnionType: Boolean =
-    parameters.get(STABLE_ID_FOR_UNION_TYPE).map(_.toBoolean).getOrElse(false)
+    getBoolean(STABLE_ID_FOR_UNION_TYPE, defaultValue = false)
 
   val stableIdPrefixForUnionType: String = parameters
     .getOrElse(STABLE_ID_PREFIX_FOR_UNION_TYPE, "member_")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to