This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 322b3d06d3f8 [SPARK-53075][CORE][TESTS] Use Java 
`Files.readAllLines/write` instead of `FileUtils.(read|write)Lines`
322b3d06d3f8 is described below

commit 322b3d06d3f86c23bbe14d3514c1ffd731c216bb
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sat Aug 2 19:29:16 2025 +0800

    [SPARK-53075][CORE][TESTS] Use Java `Files.readAllLines/write` instead of 
`FileUtils.(read|write)Lines`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to use Java `Files.readAllLines/write` instead of 
`FileUtils.(read|write)Lines`.
    
    In addition,
    - `commons-io` test dependency is removed from `commons-utils` module
    - Two Scalastyle rules are added to prevent a future regression.
    
    ### Why are the changes needed?
    
    Java implementations are faster.
    
    **SAMPLE DATA**
    
    ```scala
    scala> val array = new java.util.ArrayList[String]()
    val array: java.util.ArrayList[String] = []
    
    scala> (1 to 100_000_000).foreach { _ => array.add("a") }
    ```
    
    **BEFORE (WRITE)**
    
    ```scala
    scala> spark.time(org.apache.commons.io.FileUtils.writeLines(new 
java.io.File("/tmp/text"), array))
    Time taken: 5013 ms
    ```
    
    **AFTER (WRITE)**
    
    ```scala
    scala> 
spark.time(java.nio.file.Files.write(java.nio.file.Paths.get("/tmp/text"), 
array))
    Time taken: 1191 ms
    ```
    
    **BEFORE(READ)**
    
    ```scala
    scala> spark.time(org.apache.commons.io.FileUtils.readLines(new 
java.io.File("/tmp/text")))
    Time taken: 2377 ms
    ```
    
    **AFTER(READ)**
    
    ```scala
    scala> 
spark.time(java.nio.file.Files.readAllLines(java.nio.file.Paths.get("/tmp/text")))
    Time taken: 2279 ms
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51787 from dongjoon-hyun/SPARK-53075.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
---
 common/utils/pom.xml                                          |  5 -----
 .../src/test/scala/org/apache/spark/util/LogKeySuite.scala    | 11 ++++-------
 scalastyle-config.xml                                         | 10 ++++++++++
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 6067ec40ecdd..abcadd83fc56 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -51,11 +51,6 @@
       <groupId>com.fasterxml.jackson.module</groupId>
       <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
     </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.ivy</groupId>
       <artifactId>ivy</artifactId>
diff --git 
a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala 
b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
index 17e360f510a2..742d4066ffab 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
@@ -17,14 +17,12 @@
 
 package org.apache.spark.util
 
-import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Path}
 import java.util.{ArrayList => JList}
 
 import scala.jdk.CollectionConverters._
 import scala.reflect.runtime.universe._
 
-import org.apache.commons.io.FileUtils
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.internal.{Logging, LogKeys}
@@ -61,9 +59,8 @@ class LogKeySuite
   private def regenerateLogKeyFile(
       originalKeys: Seq[String], sortedKeys: Seq[String]): Unit = {
     if (originalKeys != sortedKeys) {
-      val logKeyFile = logKeyFilePath.toFile
-      logInfo(s"Regenerating the file $logKeyFile")
-      val originalContents = FileUtils.readLines(logKeyFile, 
StandardCharsets.UTF_8)
+      logInfo(s"Regenerating the file $logKeyFilePath")
+      val originalContents = Files.readAllLines(logKeyFilePath)
       val sortedContents = new JList[String]()
       var firstMatch = false
       originalContents.asScala.foreach { line =>
@@ -78,8 +75,8 @@ class LogKeySuite
           sortedContents.add(line)
         }
       }
-      Files.delete(logKeyFile.toPath)
-      FileUtils.writeLines(logKeyFile, StandardCharsets.UTF_8.name(), 
sortedContents)
+      Files.delete(logKeyFilePath)
+      Files.write(logKeyFilePath, sortedContents)
     }
   }
 
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 7c05199d02b5..740cf48a2f76 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -282,6 +282,16 @@ This file is divided into 3 sections:
       scala.jdk.CollectionConverters._ and use .asScala / .asJava 
methods</customMessage>
   </check>
 
+  <check customId="readLines" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">FileUtils\.readLines</parameter></parameters>
+    <customMessage>Use Files.readAllLines instead.</customMessage>
+  </check>
+
+  <check customId="writeLines" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">FileUtils\.writeLines</parameter></parameters>
+    <customMessage>Use Files.write instead.</customMessage>
+  </check>
+
   <check customId="deleteRecursively" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter 
name="regex">FileUtils\.deleteDirectory</parameter></parameters>
     <customMessage>Use deleteRecursively of SparkFileUtils or 
Utils</customMessage>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to