This is an automated email from the ASF dual-hosted git repository.

ashrigondekar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 23c006c3abdb [SPARK-54156][PROTOBUF] Classify errors for 
ProtobufOptions casting failure
23c006c3abdb is described below

commit 23c006c3abdbefcaed8952304728f298e94d7c2d
Author: nyaapa <[email protected]>
AuthorDate: Wed Nov 5 09:53:05 2025 -0800

    [SPARK-54156][PROTOBUF] Classify errors for ProtobufOptions casting failure
    
    ### What changes were proposed in this pull request?
    When an option of ProtobufOptions requires boolean/int value, but a value 
that cannot be casted to boolean/int is passed in, classify the error.
    
    ### Why are the changes needed?
    The error should be classified to have better user experience.
    
    ### Does this PR introduce any user-facing change?
    No.
    
    ### How was this patch tested?
    Added a unit test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52862 from nyaapa/SPARK-54156.
    
    Authored-by: nyaapa <[email protected]>
    Signed-off-by: Anish Shrigondekar <[email protected]>
---
 .../spark/sql/protobuf/utils/ProtobufOptions.scala | 50 ++++++++++++++++++----
 .../sql/protobuf/ProtobufFunctionsSuite.scala      | 37 ++++++++++++++++
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
 
b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
index e85097a272f2..9a54b06a9d7f 100644
--- 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
+++ 
b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.protobuf.utils
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.FileSourceOptions
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, FailFastMode, 
ParseMode}
 
@@ -34,6 +34,39 @@ private[sql] class ProtobufOptions(
 
   import ProtobufOptions._
 
+  private def parseOption[T](
+      optionName: String,
+      value: String,
+      converter: String => T,
+      typeName: String): T = {
+    try {
+      converter(value)
+    } catch {
+      case _: IllegalArgumentException | _: NumberFormatException =>
+        throw new AnalysisException(
+          errorClass = "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE",
+          messageParameters = Map(
+            "optionName" -> optionName,
+            "message" -> s"Cannot cast value '$value' to $typeName."
+          )
+        )
+    }
+  }
+
+  private def getBoolean(optionName: String, defaultValue: => Boolean): 
Boolean = {
+    parameters
+      .get(optionName)
+      .map(v => parseOption(optionName, v, _.toBoolean, "Boolean"))
+      .getOrElse(defaultValue)
+  }
+
+  private def getInt(optionName: String, defaultValue: => Int): Int = {
+    parameters
+      .get(optionName)
+      .map(v => parseOption(optionName, v, _.toInt, "Int"))
+      .getOrElse(defaultValue)
+  }
+
   def this(parameters: Map[String, String], conf: Configuration) = {
     this(CaseInsensitiveMap(parameters), conf)
   }
@@ -56,7 +89,7 @@ private[sql] class ProtobufOptions(
    *  3:  `struct<name: string, friend: struct<name: string, friend: 
struct<name: string>>>`
    * and so on.
    */
-  val recursiveFieldMaxDepth: Int = 
parameters.getOrElse("recursive.fields.max.depth", "-1").toInt
+  val recursiveFieldMaxDepth: Int = getInt("recursive.fields.max.depth", 
defaultValue = -1)
 
   /**
    * This option ("convert.any.fields.to.json") enables converting Protobuf 
'Any' fields to JSON.
@@ -104,7 +137,7 @@ private[sql] class ProtobufOptions(
    * In addition schema safety is also reduced making downstream processing 
error prone.
    */
   val convertAnyFieldsToJson: Boolean =
-    parameters.getOrElse(CONVERT_ANY_FIELDS_TO_JSON_CONFIG, "false").toBoolean
+    getBoolean(CONVERT_ANY_FIELDS_TO_JSON_CONFIG, defaultValue = false)
 
   // Whether to render fields with zero values when deserializing Protobuf to 
a Spark struct.
   // When a field is empty in the serialized Protobuf, this library will 
deserialize them as
@@ -139,8 +172,7 @@ private[sql] class ProtobufOptions(
   //      type-specific defaults.
   // Ref: https://protobuf.dev/programming-guides/field_presence/ for 
information about
   //      what information is available in a serialized proto.
-  val emitDefaultValues: Boolean =
-    parameters.getOrElse("emit.default.values", false.toString).toBoolean
+  val emitDefaultValues: Boolean = getBoolean("emit.default.values", 
defaultValue = false)
 
   // Whether to render enum fields as their integer values.
   //
@@ -167,7 +199,7 @@ private[sql] class ProtobufOptions(
   // Please note the output struct type will now contain an int column
   // instead of string, so use caution if changing existing parsing logic.
   val enumsAsInts: Boolean =
-    parameters.getOrElse("enums.as.ints", false.toString).toBoolean
+    getBoolean("enums.as.ints", defaultValue = false)
 
   // Protobuf supports unsigned integer types uint32 and uint64. By default 
this library
   // will serialize them as the signed IntegerType and LongType respectively. 
For very
@@ -179,7 +211,7 @@ private[sql] class ProtobufOptions(
   // i.e. LongType for uint32 and Decimal(20, 0) for uint64 so their 
representation
   // can contain large unsigned values without overflow.
   val upcastUnsignedInts: Boolean =
-    parameters.getOrElse("upcast.unsigned.ints", false.toString).toBoolean
+    getBoolean("upcast.unsigned.ints", defaultValue = false)
 
   // Whether to unwrap the struct representation for well known primitive 
wrapper types when
   // deserializing. By default, the wrapper types for primitives (i.e. 
google.protobuf.Int32Value,
@@ -206,7 +238,7 @@ private[sql] class ProtobufOptions(
   // Concretely, the behavior with emit defaults and this option set is:
   //    nil => nil, Int32Value(0) => 0, Int32Value(100) => 100.
   val unwrapWellKnownTypes: Boolean =
-    parameters.getOrElse("unwrap.primitive.wrapper.types", 
false.toString).toBoolean
+    getBoolean("unwrap.primitive.wrapper.types", defaultValue = false)
 
   // Since Spark doesn't allow writing empty StructType, empty proto message 
type will be
   // dropped by default. Setting this option to true will insert a dummy 
column to empty proto
@@ -223,7 +255,7 @@ private[sql] class ProtobufOptions(
   // If retain.empty.message.types=true, field a will be retained by inserting 
a dummy column.
   // b struct<a: struct<__dummy_field_in_empty_struct: string>, name: string>
   val retainEmptyMessage: Boolean =
-    parameters.getOrElse("retain.empty.message.types", 
false.toString).toBoolean
+    getBoolean("retain.empty.message.types", defaultValue = false)
 }
 
 private[sql] object ProtobufOptions {
diff --git 
a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
 
b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
index c90ca239c1d4..e46fcb1a1735 100644
--- 
a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
+++ 
b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
@@ -2232,6 +2232,43 @@ class ProtobufFunctionsSuite extends QueryTest with 
SharedSparkSession with Prot
     }
   }
 
+  test("SPARK-54156: boolean Protobuf options reject non-boolean values") {
+    Seq(
+      "emit.default.values",
+      "enums.as.ints",
+      "upcast.unsigned.ints",
+      "unwrap.primitive.wrapper.types",
+      "retain.empty.message.types",
+      "convert.any.fields.to.json"
+    ).foreach { opt =>
+      val e = intercept[AnalysisException] {
+        ProtobufOptions(Map(opt -> "not_a_bool"))
+      }
+      checkError(
+        exception = e,
+        condition = "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE",
+        parameters = Map(
+          "optionName" -> opt,
+          "message" -> "Cannot cast value 'not_a_bool' to Boolean."
+        )
+      )
+    }
+  }
+
+  test("SPARK-54156: integer Protobuf options reject non-integer values") {
+    val e = intercept[AnalysisException] {
+      ProtobufOptions(Map("recursive.fields.max.depth" -> "not_an_int"))
+    }
+    checkError(
+      exception = e,
+      condition = "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE",
+      parameters = Map(
+        "optionName" -> "recursive.fields.max.depth",
+        "message" -> "Cannot cast value 'not_an_int' to Int."
+      )
+    )
+  }
+
   def testFromProtobufWithOptions(
     df: DataFrame,
     expectedDf: DataFrame,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to