This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 60393202ccdb [SPARK-45225][SQL] XML: XSD file URL support
60393202ccdb is described below

commit 60393202ccdb12d5c25c68dfc96a93ab4c897b6b
Author: Sandip Agarwala <[email protected]>
AuthorDate: Wed Sep 20 13:19:06 2023 +0900

    [SPARK-45225][SQL] XML: XSD file URL support
    
    ### What changes were proposed in this pull request?
    Add support to read XSD file URL.
    
    ### Why are the changes needed?
    Add support to read XSD file URL.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes
    
    ### How was this patch tested?
    Unit test
    Manual test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #43000 from sandip-db/xml-xsd-url-master.
    
    Authored-by: Sandip Agarwala <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../spark/sql/catalyst/xml/ValidatorUtil.scala     | 29 +++++++++++++++-------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
index 6509842fc6d1..f8b546332c2a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
@@ -16,32 +16,43 @@
  */
 package org.apache.spark.sql.catalyst.xml
 
-import java.nio.file.Paths
 import javax.xml.XMLConstants
+import javax.xml.transform.stream.StreamSource
 import javax.xml.validation.{Schema, SchemaFactory}
 
 import com.google.common.cache.{CacheBuilder, CacheLoader}
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkFiles
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.util.Utils
 
 /**
  * Utilities for working with XSD validation.
  */
 private[sql] object ValidatorUtil {
-
   // Parsing XSDs may be slow, so cache them by path:
 
   private val cache = CacheBuilder.newBuilder().softValues().build(
     new CacheLoader[String, Schema] {
       override def load(key: String): Schema = {
-        // Handle case where file exists as specified
-        var path = Paths.get(key)
-        if (!path.toFile.exists()) {
-          // Handle case where it was added with sc.addFile
-          path = Paths.get(SparkFiles.get(key))
+        val in = try {
+          // Handle case where file exists as specified
+          val fs = Utils.getHadoopFileSystem(key, SparkHadoopUtil.get.conf)
+          fs.open(new Path(key))
+        } catch {
+          case _: Throwable =>
+            // Handle case where it was added with sc.addFile
+            val addFileUrl = SparkFiles.get(key)
+            val fs = Utils.getHadoopFileSystem(addFileUrl, 
SparkHadoopUtil.get.conf)
+            fs.open(new Path(addFileUrl))
+        }
+        try {
+          val schemaFactory = 
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
+          schemaFactory.newSchema(new StreamSource(in))
+        } finally {
+          in.close()
         }
-        val schemaFactory = 
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
-        schemaFactory.newSchema(path.toFile)
       }
     })
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to