This is an automated email from the ASF dual-hosted git repository.
sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 20e07bf51a9 [SPARK-43211][HIVE] Remove Hadoop2 support in
IsolatedClientLoader
20e07bf51a9 is described below
commit 20e07bf51a9b797be76e7921297ac0d4319a4be8
Author: Cheng Pan <[email protected]>
AuthorDate: Thu Apr 20 13:10:15 2023 -0700
[SPARK-43211][HIVE] Remove Hadoop2 support in IsolatedClientLoader
### What changes were proposed in this pull request?
Remove Hadoop2 support in `IsolatedClientLoader`.
### Why are the changes needed?
Clean up Hadoop2 related code since SPARK-42452 removed support for Hadoop2.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Updated test cases introduced in SPARK-32256, pass GA.
Closes #40870 from pan3793/SPARK-43211.
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Chao Sun <[email protected]>
---
.../scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala | 3 ++-
.../org/apache/spark/sql/hive/client/IsolatedClientLoader.scala | 8 +-------
.../apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala | 9 ++++-----
3 files changed, 7 insertions(+), 13 deletions(-)
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 90e8f9b9d0e..5cd3b9c3abf 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -62,7 +62,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf,
hadoopConf: Configurat
import CatalogTableType._
// SPARK-32256: Make sure `VersionInfo` is initialized before touching the
isolated classloader.
- // This is to ensure Hive can get the Hadoop version when using the isolated
classloader.
+ // This is a workaround for HADOOP-14067, to ensure Hive can get the Hadoop
version when using
+ // the isolated classloader.
org.apache.hadoop.util.VersionInfo.getVersion()
/**
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 7a122f22e3d..64718a9d35c 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -55,8 +55,6 @@ private[hive] object IsolatedClientLoader extends Logging {
sharedPrefixes: Seq[String] = Seq.empty,
barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader =
synchronized {
val resolvedVersion = hiveVersion(hiveMetastoreVersion)
- // We will use Hadoop 3.x if we can't resolve the hadoop artifact
- // when builtin hadoop is Hadoop 3. Otherwise we will use Hadoop 2.x.
val files = if (resolvedVersions.contains((resolvedVersion,
hadoopVersion))) {
resolvedVersions((resolvedVersion, hadoopVersion))
} else {
@@ -68,11 +66,7 @@ private[hive] object IsolatedClientLoader extends Logging {
case e: RuntimeException if e.getMessage.contains("hadoop") =>
// If the error message contains hadoop, it is probably because
the hadoop
// version cannot be resolved.
- val fallbackVersion = if (VersionUtils.isHadoop3) {
- "3.3.5"
- } else {
- "2.7.4"
- }
+ val fallbackVersion = "3.3.5"
logWarning(s"Failed to resolve Hadoop artifacts for the version
$hadoopVersion. We " +
s"will change the hadoop version from $hadoopVersion to
$fallbackVersion and try " +
"again. It is recommended to set jars used by Hive metastore
client through " +
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
index 6ada46412bf..2a921c3fd85 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
@@ -46,7 +46,8 @@ class HadoopVersionInfoSuite extends SparkFunSuite {
// Download jars for Hive 2.0
val client = IsolatedClientLoader.forVersion(
hiveMetastoreVersion = "2.0",
- hadoopVersion = "2.7.4",
+ // 3.0.x is chosen because that HADOOP-14067 got fixed in 3.1.0
+ hadoopVersion = "3.0.3",
sparkConf = new SparkConf(),
hadoopConf = hadoopConf,
config = HiveClientBuilder.buildConf(Map.empty),
@@ -81,10 +82,8 @@ class HadoopVersionInfoSuite extends SparkFunSuite {
}
}
- test("SPARK-32212: built-in Hadoop version should support shaded client if
it is not hadoop 2") {
+ test("SPARK-32212: built-in Hadoop version should support shaded client") {
val hadoopVersion = VersionInfo.getVersion
- if (!hadoopVersion.startsWith("2")) {
- assert(IsolatedClientLoader.supportsHadoopShadedClient(hadoopVersion))
- }
+ assert(IsolatedClientLoader.supportsHadoopShadedClient(hadoopVersion))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]