This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 7c96b3e36e3d [SPARK-53061][CORE][SQL] Support `copyFileToDirectory` in `SparkFileUtils` 7c96b3e36e3d is described below commit 7c96b3e36e3dafad5a025a4f53a3a72e9c089ea3 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Fri Aug 1 10:55:23 2025 -0700 [SPARK-53061][CORE][SQL] Support `copyFileToDirectory` in `SparkFileUtils` ### What changes were proposed in this pull request? This PR aims to support `copyFileToDirectory` in `SparkFileUtils`. ### Why are the changes needed? To provide more features in `SparkFileUtils`. ### Does this PR introduce _any_ user-facing change? No user-facing behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51770 from dongjoon-hyun/SPARK-53061. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../main/scala/org/apache/spark/util/SparkFileUtils.scala | 12 +++++++++++- scalastyle-config.xml | 5 +++++ .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 6 ++---- .../apache/spark/sql/hive/client/IsolatedClientLoader.scala | 3 +-- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala index d13dba5cea4e..8d023dd0bc07 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala @@ -18,7 +18,7 @@ package org.apache.spark.util import java.io.File import java.net.{URI, URISyntaxException} -import java.nio.file.{Files, Path} +import java.nio.file.{Files, Path, StandardCopyOption} import org.apache.spark.internal.{Logging, LogKeys, MDC} import org.apache.spark.network.util.JavaUtils @@ -146,6 +146,16 @@ private[spark] trait SparkFileUtils extends Logging { path.resolve(part) }.toFile } + + /** Copy file to the target directory simply. File attribute times are not copied. */ + def copyFileToDirectory(file: File, dir: File): Unit = { + if (file == null || dir == null || !file.exists() || (dir.exists() && !dir.isDirectory())) { + throw new IllegalArgumentException(s"Invalid input file $file or directory $dir") + } + Files.createDirectories(dir.toPath()) + val newFile = new File(dir, file.getName()) + Files.copy(file.toPath(), newFile.toPath(), StandardCopyOption.REPLACE_EXISTING) + } } private[spark] object SparkFileUtils extends SparkFileUtils diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 47fcfc5f0c18..a941569c7ef0 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -302,6 +302,11 @@ This file is divided into 3 sections: <customMessage>Use sizeOf of JavaUtils or Utils instead.</customMessage> </check> + <check customId="copyFileToDirectory" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex">FileUtils\.copyFileToDirectory</parameter></parameters> + <customMessage>Use copyFileToDirectory of SparkFileUtils or Utils instead.</customMessage> + </check> + <check customId="commonslang2" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters> <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index dd4a6535619f..107514edbc87 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -26,8 +26,6 @@ import java.util.concurrent.atomic.AtomicBoolean import scala.collection.mutable -import org.apache.commons.io.FileUtils - import org.apache.spark.{AccumulatorSuite, SPARK_DOC_ROOT, SparkArithmeticException, SparkDateTimeException, SparkException, SparkNumberFormatException, SparkRuntimeException} import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} import org.apache.spark.sql.catalyst.ExtendedAnalysisException @@ -57,7 +55,7 @@ import org.apache.spark.sql.test.SQLTestData._ import org.apache.spark.sql.types._ import org.apache.spark.tags.ExtendedSQLTest import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} -import org.apache.spark.util.ResetSystemProperties +import org.apache.spark.util.{ResetSystemProperties, Utils} @ExtendedSQLTest class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper @@ -3874,7 +3872,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark "/local/org.apache.spark/SPARK-33084/1.0/jars/") targetCacheJarDir.mkdir() // copy jar to local cache - FileUtils.copyFileToDirectory(sourceJar, targetCacheJarDir) + Utils.copyFileToDirectory(sourceJar, targetCacheJarDir) withTempView("v1") { withUserDefinedFunction( s"default.$functionName" -> false, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index e2d3d4991840..56c1d402e1b2 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -24,7 +24,6 @@ import java.util import scala.util.Try -import org.apache.commons.io.FileUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.shims.ShimLoader @@ -149,7 +148,7 @@ private[hive] object IsolatedClientLoader extends Logging { // TODO: Remove copy logic. val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}") - allFiles.foreach(f => FileUtils.copyFileToDirectory(f, tempDir)) + allFiles.foreach(f => Utils.copyFileToDirectory(f, tempDir)) logInfo(log"Downloaded metastore jars to ${MDC(PATH, tempDir.getCanonicalPath)}") tempDir.listFiles().map(_.toURI.toURL).toImmutableArraySeq } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org