This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 322b3d06d3f8 [SPARK-53075][CORE][TESTS] Use Java
`Files.readAllLines/write` instead of `FileUtils.(read|write)Lines`
322b3d06d3f8 is described below
commit 322b3d06d3f86c23bbe14d3514c1ffd731c216bb
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sat Aug 2 19:29:16 2025 +0800
[SPARK-53075][CORE][TESTS] Use Java `Files.readAllLines/write` instead of
`FileUtils.(read|write)Lines`
### What changes were proposed in this pull request?
This PR aims to use Java `Files.readAllLines/write` instead of
`FileUtils.(read|write)Lines`.
In addition,
- `commons-io` test dependency is removed from `commons-utils` module
- Two Scalastyle rules are added to prevent a future regression.
### Why are the changes needed?
Java implementations are faster.
**SAMPLE DATA**
```scala
scala> val array = new java.util.ArrayList[String]()
val array: java.util.ArrayList[String] = []
scala> (1 to 100_000_000).foreach { _ => array.add("a") }
```
**BEFORE (WRITE)**
```scala
scala> spark.time(org.apache.commons.io.FileUtils.writeLines(new
java.io.File("/tmp/text"), array))
Time taken: 5013 ms
```
**AFTER (WRITE)**
```scala
scala>
spark.time(java.nio.file.Files.write(java.nio.file.Paths.get("/tmp/text"),
array))
Time taken: 1191 ms
```
**BEFORE(READ)**
```scala
scala> spark.time(org.apache.commons.io.FileUtils.readLines(new
java.io.File("/tmp/text")))
Time taken: 2377 ms
```
**AFTER(READ)**
```scala
scala>
spark.time(java.nio.file.Files.readAllLines(java.nio.file.Paths.get("/tmp/text")))
Time taken: 2279 ms
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass the CIs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #51787 from dongjoon-hyun/SPARK-53075.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
common/utils/pom.xml | 5 -----
.../src/test/scala/org/apache/spark/util/LogKeySuite.scala | 11 ++++-------
scalastyle-config.xml | 10 ++++++++++
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 6067ec40ecdd..abcadd83fc56 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -51,11 +51,6 @@
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
</dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.ivy</groupId>
<artifactId>ivy</artifactId>
diff --git
a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
index 17e360f510a2..742d4066ffab 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
@@ -17,14 +17,12 @@
package org.apache.spark.util
-import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path}
import java.util.{ArrayList => JList}
import scala.jdk.CollectionConverters._
import scala.reflect.runtime.universe._
-import org.apache.commons.io.FileUtils
import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
import org.apache.spark.internal.{Logging, LogKeys}
@@ -61,9 +59,8 @@ class LogKeySuite
private def regenerateLogKeyFile(
originalKeys: Seq[String], sortedKeys: Seq[String]): Unit = {
if (originalKeys != sortedKeys) {
- val logKeyFile = logKeyFilePath.toFile
- logInfo(s"Regenerating the file $logKeyFile")
- val originalContents = FileUtils.readLines(logKeyFile,
StandardCharsets.UTF_8)
+ logInfo(s"Regenerating the file $logKeyFilePath")
+ val originalContents = Files.readAllLines(logKeyFilePath)
val sortedContents = new JList[String]()
var firstMatch = false
originalContents.asScala.foreach { line =>
@@ -78,8 +75,8 @@ class LogKeySuite
sortedContents.add(line)
}
}
- Files.delete(logKeyFile.toPath)
- FileUtils.writeLines(logKeyFile, StandardCharsets.UTF_8.name(),
sortedContents)
+ Files.delete(logKeyFilePath)
+ Files.write(logKeyFilePath, sortedContents)
}
}
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 7c05199d02b5..740cf48a2f76 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -282,6 +282,16 @@ This file is divided into 3 sections:
scala.jdk.CollectionConverters._ and use .asScala / .asJava
methods</customMessage>
</check>
+ <check customId="readLines" level="error"
class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter
name="regex">FileUtils\.readLines</parameter></parameters>
+ <customMessage>Use Files.readAllLines instead.</customMessage>
+ </check>
+
+ <check customId="writeLines" level="error"
class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter
name="regex">FileUtils\.writeLines</parameter></parameters>
+ <customMessage>Use Files.write instead.</customMessage>
+ </check>
+
<check customId="deleteRecursively" level="error"
class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter
name="regex">FileUtils\.deleteDirectory</parameter></parameters>
<customMessage>Use deleteRecursively of SparkFileUtils or
Utils</customMessage>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]