This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new a7e4318 [SPARK-33089][SQL] make avro format propagate Hadoop config
from DS options to underlying HDFS file system
a7e4318 is described below
commit a7e43185715549f14decc018f7a58e2119c99aae
Author: Yuning Zhang <[email protected]>
AuthorDate: Thu Oct 8 12:18:06 2020 +0900
[SPARK-33089][SQL] make avro format propagate Hadoop config from DS options
to underlying HDFS file system
### What changes were proposed in this pull request?
In `AvroUtils`'s `inferSchema()`, propagate Hadoop config from DS options
to underlying HDFS file system.
### Why are the changes needed?
There is a bug that when running:
```scala
spark.read.format("avro").options(conf).load(path)
```
The underlying file system will not receive the `conf` options.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
unit test added
Closes #29971 from yuningzh-db/avro_options.
Authored-by: Yuning Zhang <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
(cherry picked from commit bbc887bf73233b8c65ace05929290c0de4f63de8)
Signed-off-by: HyukjinKwon <[email protected]>
---
.../src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 2 +-
.../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git
a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index 9ff89f6..a9f34bb 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -42,7 +42,7 @@ private[sql] object AvroUtils extends Logging {
spark: SparkSession,
options: Map[String, String],
files: Seq[FileStatus]): Option[StructType] = {
- val conf = spark.sessionState.newHadoopConf()
+ val conf = spark.sessionState.newHadoopConfWithOptions(options)
val parsedOptions = new AvroOptions(options, conf)
if (parsedOptions.parameters.contains(ignoreExtensionKey)) {
diff --git
a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index e2ae489..d2f49ae 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -1799,6 +1799,16 @@ abstract class AvroSuite extends QueryTest with
SharedSparkSession {
assert(version === SPARK_VERSION_SHORT)
}
}
+
+ test("SPARK-33089: should propagate Hadoop config from DS options to
underlying file system") {
+ withSQLConf(
+ "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
+ "fs.file.impl.disable.cache" -> "true") {
+ val conf = Map("ds_option" -> "value")
+ val path = "file:" + testAvro.stripPrefix("file:")
+ spark.read.format("avro").options(conf).load(path)
+ }
+ }
}
class AvroV1Suite extends AvroSuite {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]