This is an automated email from the ASF dual-hosted git repository.
ashrigondekar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3d18fe1927f1 [SPARK-53973][AVRO] Classify errors for AvroOptions
boolean casting failure
3d18fe1927f1 is described below
commit 3d18fe1927f140b7f2429c7b88e5156a6e9155d7
Author: Siying Dong <[email protected]>
AuthorDate: Tue Oct 21 18:07:15 2025 -0700
[SPARK-53973][AVRO] Classify errors for AvroOptions boolean casting failure
### What changes were proposed in this pull request?
When an option of AvroOptions requires boolean value, but a value that
cannot be casted to boolean is passed in, classify the error.
### Why are the changes needed?
The error should be classified to have better user experience.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added a unit test.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52686 from siying/avro_boolean_option.
Authored-by: Siying Dong <[email protected]>
Signed-off-by: Anish Shrigondekar <[email protected]>
---
.../org/apache/spark/sql/avro/AvroSuite.scala | 20 +++++++++++++++++
.../org/apache/spark/sql/avro/AvroOptions.scala | 25 ++++++++++++++++------
2 files changed, 38 insertions(+), 7 deletions(-)
diff --git
a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index c78bc6212662..0b3823ca1616 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -3094,6 +3094,26 @@ abstract class AvroSuite
assert(AvroOptions.isValidOption("recursiveFieldMaxDepth"))
}
+ test("SPARK-53973: boolean Avro options reject non-boolean values") {
+ Seq(
+ AvroOptions.STABLE_ID_FOR_UNION_TYPE,
+ AvroOptions.POSITIONAL_FIELD_MATCHING,
+ AvroOptions.IGNORE_EXTENSION
+ ).foreach { opt =>
+ val e = intercept[AnalysisException] {
+ AvroOptions(Map(opt -> "not_bool"))
+ }
+ checkError(
+ exception = e,
+ condition = "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE",
+ parameters = Map(
+ "optionName" -> opt,
+ "message" -> "Cannot cast value 'not_bool' to Boolean."
+ )
+ )
+ }
+ }
+
test("SPARK-46633: read file with empty blocks") {
for (maxPartitionBytes <- Seq(100, 100000, 100000000)) {
withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key ->
s"$maxPartitionBytes") {
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
index d358f2fc0a76..967a21613791 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
@@ -41,6 +41,21 @@ private[sql] class AvroOptions(
import AvroOptions._
+ private def parseBoolean(optionName: String, value: String): Boolean = {
+ try {
+ value.toBoolean
+ } catch {
+ case _: IllegalArgumentException =>
+ throw QueryCompilationErrors.avroOptionsException(
+ optionName,
+ s"Cannot cast value '$value' to Boolean.")
+ }
+ }
+
+ private def getBoolean(optionName: String, defaultValue: => Boolean):
Boolean = {
+ parameters.get(optionName).map(v => parseBoolean(optionName,
v)).getOrElse(defaultValue)
+ }
+
def this(parameters: Map[String, String], conf: Configuration) = {
this(CaseInsensitiveMap(parameters), conf)
}
@@ -78,8 +93,7 @@ private[sql] class AvroOptions(
* name. This allows for a structurally equivalent Catalyst schema to be
used with an Avro schema
* whose field names do not match. Defaults to false.
*/
- val positionalFieldMatching: Boolean =
- parameters.get(POSITIONAL_FIELD_MATCHING).exists(_.toBoolean)
+ val positionalFieldMatching: Boolean = getBoolean(POSITIONAL_FIELD_MATCHING,
defaultValue = false)
/**
* Top level record name in write result, which is required in Avro spec.
@@ -107,10 +121,7 @@ private[sql] class AvroOptions(
AvroFileFormat.IgnoreFilesWithoutExtensionProperty,
ignoreFilesWithoutExtensionByDefault)
- parameters
- .get(IGNORE_EXTENSION)
- .map(_.toBoolean)
- .getOrElse(!ignoreFilesWithoutExtension)
+ getBoolean(IGNORE_EXTENSION, defaultValue = !ignoreFilesWithoutExtension)
}
/**
@@ -134,7 +145,7 @@ private[sql] class AvroOptions(
.getOrElse(SQLConf.get.getConf(SQLConf.AVRO_REBASE_MODE_IN_READ).toString)
val useStableIdForUnionType: Boolean =
- parameters.get(STABLE_ID_FOR_UNION_TYPE).map(_.toBoolean).getOrElse(false)
+ getBoolean(STABLE_ID_FOR_UNION_TYPE, defaultValue = false)
val stableIdPrefixForUnionType: String = parameters
.getOrElse(STABLE_ID_PREFIX_FOR_UNION_TYPE, "member_")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]