This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c96b3e36e3d [SPARK-53061][CORE][SQL] Support `copyFileToDirectory` in 
`SparkFileUtils`
7c96b3e36e3d is described below

commit 7c96b3e36e3dafad5a025a4f53a3a72e9c089ea3
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Fri Aug 1 10:55:23 2025 -0700

    [SPARK-53061][CORE][SQL] Support `copyFileToDirectory` in `SparkFileUtils`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to support `copyFileToDirectory` in `SparkFileUtils`.
    
    ### Why are the changes needed?
    
    To provide more features in `SparkFileUtils`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No user-facing behavior change.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51770 from dongjoon-hyun/SPARK-53061.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../main/scala/org/apache/spark/util/SparkFileUtils.scala    | 12 +++++++++++-
 scalastyle-config.xml                                        |  5 +++++
 .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala  |  6 ++----
 .../apache/spark/sql/hive/client/IsolatedClientLoader.scala  |  3 +--
 4 files changed, 19 insertions(+), 7 deletions(-)

diff --git 
a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala 
b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
index d13dba5cea4e..8d023dd0bc07 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
@@ -18,7 +18,7 @@ package org.apache.spark.util
 
 import java.io.File
 import java.net.{URI, URISyntaxException}
-import java.nio.file.{Files, Path}
+import java.nio.file.{Files, Path, StandardCopyOption}
 
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.network.util.JavaUtils
@@ -146,6 +146,16 @@ private[spark] trait SparkFileUtils extends Logging {
       path.resolve(part)
     }.toFile
   }
+
+  /** Copy file to the target directory simply. File attribute times are not 
copied. */
+  def copyFileToDirectory(file: File, dir: File): Unit = {
+    if (file == null || dir == null || !file.exists() || (dir.exists() && 
!dir.isDirectory())) {
+      throw new IllegalArgumentException(s"Invalid input file $file or 
directory $dir")
+    }
+    Files.createDirectories(dir.toPath())
+    val newFile = new File(dir, file.getName())
+    Files.copy(file.toPath(), newFile.toPath(), 
StandardCopyOption.REPLACE_EXISTING)
+  }
 }
 
 private[spark] object SparkFileUtils extends SparkFileUtils
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 47fcfc5f0c18..a941569c7ef0 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -302,6 +302,11 @@ This file is divided into 3 sections:
     <customMessage>Use sizeOf of JavaUtils or Utils instead.</customMessage>
   </check>
 
+  <check customId="copyFileToDirectory" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">FileUtils\.copyFileToDirectory</parameter></parameters>
+    <customMessage>Use copyFileToDirectory of SparkFileUtils or Utils 
instead.</customMessage>
+  </check>
+
   <check customId="commonslang2" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter 
name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
     <customMessage>Use Commons Lang 3 classes (package 
org.apache.commons.lang3.*) instead
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index dd4a6535619f..107514edbc87 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -26,8 +26,6 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.mutable
 
-import org.apache.commons.io.FileUtils
-
 import org.apache.spark.{AccumulatorSuite, SPARK_DOC_ROOT, 
SparkArithmeticException, SparkDateTimeException, SparkException, 
SparkNumberFormatException, SparkRuntimeException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
@@ -57,7 +55,7 @@ import org.apache.spark.sql.test.SQLTestData._
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
-import org.apache.spark.util.ResetSystemProperties
+import org.apache.spark.util.{ResetSystemProperties, Utils}
 
 @ExtendedSQLTest
 class SQLQuerySuite extends QueryTest with SharedSparkSession with 
AdaptiveSparkPlanHelper
@@ -3874,7 +3872,7 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
         "/local/org.apache.spark/SPARK-33084/1.0/jars/")
       targetCacheJarDir.mkdir()
       // copy jar to local cache
-      FileUtils.copyFileToDirectory(sourceJar, targetCacheJarDir)
+      Utils.copyFileToDirectory(sourceJar, targetCacheJarDir)
       withTempView("v1") {
         withUserDefinedFunction(
           s"default.$functionName" -> false,
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index e2d3d4991840..56c1d402e1b2 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -24,7 +24,6 @@ import java.util
 
 import scala.util.Try
 
-import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.shims.ShimLoader
 
@@ -149,7 +148,7 @@ private[hive] object IsolatedClientLoader extends Logging {
 
     // TODO: Remove copy logic.
     val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}")
-    allFiles.foreach(f => FileUtils.copyFileToDirectory(f, tempDir))
+    allFiles.foreach(f => Utils.copyFileToDirectory(f, tempDir))
     logInfo(log"Downloaded metastore jars to ${MDC(PATH, 
tempDir.getCanonicalPath)}")
     tempDir.listFiles().map(_.toURI.toURL).toImmutableArraySeq
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to