This is an automated email from the ASF dual-hosted git repository.

sarutak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 23757f22306f [SPARK-56867][SQL] Respect `spark.jars.ivySettings` when 
downloading Hive metastore jars
23757f22306f is described below

commit 23757f22306f638503fcf0683426adbed53c8cf3
Author: Kousuke Saruta <[email protected]>
AuthorDate: Wed May 20 08:39:38 2026 +0900

    [SPARK-56867][SQL] Respect `spark.jars.ivySettings` when downloading Hive 
metastore jars
    
    ### What changes were proposed in this pull request?
    This PR modifies `IsolatedClientLoader.downloadVersion` to respect the 
`spark.jars.ivySettings` configuration when resolving Hive metastore jars via 
`spark.sql.hive.metastore.jars=maven`.
    
    Previously, `downloadVersion` always called `MavenUtils.buildIvySettings` 
directly, ignoring any custom Ivy settings file. With this change, if 
`spark.jars.ivySettings` is set, `MavenUtils.loadIvySettings` is used instead, 
consistent with how `DependencyUtils.resolveMavenDependencies` handles 
`--packages`.
    
    ### Why are the changes needed?
    When `spark.sql.hive.metastore.jars=maven` is configured, Spark downloads 
Hive metastore jars using Apache Ivy. However, unlike `--packages` / 
`spark.jars.packages`, this code path did not honor `spark.jars.ivySettings`. 
This made it impossible to download Hive metastore jars from authenticated 
private repositories, since credentials can only be configured through an Ivy 
settings file.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. Users who set `spark.jars.ivySettings` can now have that configuration 
apply to Hive metastore jar resolution as well. Previously, the Ivy settings 
file was only used for `spark.jars.packages`.
    
    ### How was this patch tested?
    Confirmed it works with a private repository which requires authentication.
    Also, added new test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    Kiro CLI / Opus 4.7
    
    Closes #55881 from sarutak/hive-metastore-ivy-settings.
    
    Authored-by: Kousuke Saruta <[email protected]>
    Signed-off-by: Kousuke Saruta <[email protected]>
---
 .../sql/hive/client/IsolatedClientLoader.scala     | 29 ++++++---
 .../IsolatedClientLoaderIvySettingsSuite.scala     | 70 ++++++++++++++++++++++
 2 files changed, 90 insertions(+), 9 deletions(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index fa318d939209..eb649d196ff6 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -58,9 +58,11 @@ private[hive] object IsolatedClientLoader extends Logging {
       resolvedVersions((resolvedVersion, hadoopVersion))
     } else {
       val remoteRepos = sparkConf.get(SQLConf.ADDITIONAL_REMOTE_REPOSITORIES)
+      val ivySettingsPath = 
sparkConf.getOption(MavenUtils.JAR_IVY_SETTING_PATH_KEY)
       val (downloadedFiles, actualHadoopVersion) =
         try {
-          (downloadVersion(resolvedVersion, hadoopVersion, ivyPath, 
remoteRepos), hadoopVersion)
+          (downloadVersion(resolvedVersion, hadoopVersion, ivyPath, 
ivySettingsPath, remoteRepos),
+            hadoopVersion)
         } catch {
           case e: RuntimeException if e.getMessage.contains("hadoop") =>
             // If the error message contains hadoop, it is probably because 
the hadoop
@@ -73,7 +75,8 @@ private[hive] object IsolatedClientLoader extends Logging {
               log"set jars used by Hive metastore client through 
spark.sql.hive.metastore.jars " +
               log"in the production environment.")
             (downloadVersion(
-              resolvedVersion, fallbackVersion, ivyPath, remoteRepos), 
fallbackVersion)
+              resolvedVersion, fallbackVersion, ivyPath, ivySettingsPath, 
remoteRepos),
+              fallbackVersion)
         }
       resolvedVersions.put((resolvedVersion, actualHadoopVersion), 
downloadedFiles)
       resolvedVersions((resolvedVersion, actualHadoopVersion))
@@ -120,6 +123,7 @@ private[hive] object IsolatedClientLoader extends Logging {
       version: HiveVersion,
       hadoopVersion: String,
       ivyPath: Option[String],
+      ivySettingsPath: Option[String],
       remoteRepos: String): Seq[URL] = {
     val hadoopJarNames = if (supportsHadoopShadedClient(hadoopVersion)) {
       Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion",
@@ -132,16 +136,23 @@ private[hive] object IsolatedClientLoader extends Logging 
{
         .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ 
hadoopJarNames
 
     implicit val printStream: PrintStream = SparkSubmit.printStream
+    val ivySettings = ivySettingsPath match {
+      case Some(path) =>
+        MavenUtils.loadIvySettings(path, Some(remoteRepos), ivyPath)
+      case None =>
+        MavenUtils.buildIvySettings(Some(remoteRepos), ivyPath)
+    }
+    val noCacheIvySettings = ivySettingsPath match {
+      case Some(path) =>
+        Some(MavenUtils.loadIvySettings(path, Some(remoteRepos), ivyPath))
+      case None =>
+        Some(MavenUtils.buildIvySettings(Some(remoteRepos), ivyPath, 
useLocalM2AsCache = false))
+    }
     val classpaths = quietly {
       MavenUtils.resolveMavenCoordinates(
         hiveArtifacts.mkString(","),
-        MavenUtils.buildIvySettings(
-          Some(remoteRepos),
-          ivyPath),
-        Some(MavenUtils.buildIvySettings(
-          Some(remoteRepos),
-          ivyPath,
-          useLocalM2AsCache = false)),
+        ivySettings,
+        noCacheIvySettings,
         transitive = true,
         exclusions = version.exclusions)
     }
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/IsolatedClientLoaderIvySettingsSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/IsolatedClientLoaderIvySettingsSuite.scala
new file mode 100644
index 000000000000..186f15239039
--- /dev/null
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/IsolatedClientLoaderIvySettingsSuite.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import java.io.{File, PrintWriter}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.util.VersionInfo
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.util.{MavenUtils, Utils}
+
+class IsolatedClientLoaderIvySettingsSuite extends SparkFunSuite {
+  override protected val enableAutoThreadAudit = false
+
+  test("SPARK-56867: respect spark.jars.ivySettings when downloading Hive 
metastore jars") {
+    val ivyPath = Utils.createTempDir(namePrefix = "ivy-settings-test")
+    val ivySettingsFile = new File(ivyPath, "ivysettings.xml")
+    val writer = new PrintWriter(ivySettingsFile)
+    try {
+      writer.write(
+        s"""<ivysettings>
+           |  <settings defaultResolver="main"/>
+           |  <resolvers>
+           |    <chain name="main">
+           |      <ibiblio name="central" m2compatible="true"
+           |               root="https://repo1.maven.org/maven2/"/>
+           |    </chain>
+           |  </resolvers>
+           |</ivysettings>""".stripMargin)
+    } finally {
+      writer.close()
+    }
+
+    try {
+      val sparkConf = new SparkConf()
+      sparkConf.set(MavenUtils.JAR_IVY_SETTING_PATH_KEY, 
ivySettingsFile.getCanonicalPath)
+      val hadoopConf = new Configuration()
+      hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
+      hadoopConf.set("hive.metastore.schema.verification", "false")
+
+      val loader = IsolatedClientLoader.forVersion(
+        hiveMetastoreVersion = "2.3",
+        hadoopVersion = VersionInfo.getVersion,
+        sparkConf = sparkConf,
+        hadoopConf = hadoopConf,
+        config = HiveClientBuilder.buildConf(Map.empty),
+        ivyPath = Some(ivyPath.getCanonicalPath))
+      // Verify that the client was created successfully using the custom Ivy 
settings
+      assert(loader.createClient() != null)
+    } finally {
+      Utils.deleteRecursively(ivyPath)
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to