This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cb9cb7cd10c5 [SPARK-53083][CORE][SQL][K8S][SS][TESTS] Use Java
`Files.writeString` instead of `FileUtils.write`
cb9cb7cd10c5 is described below
commit cb9cb7cd10c5441009925c0e8f547ce91bf59334
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Aug 3 00:30:28 2025 -0700
[SPARK-53083][CORE][SQL][K8S][SS][TESTS] Use Java `Files.writeString`
instead of `FileUtils.write`
### What changes were proposed in this pull request?
This PR aims to use Java `Files.writeString` instead of `FileUtils.write`.
### Why are the changes needed?
Since Java 11, `Files.writeString` is supported and about 7x faster than
`commons-io` library.
```scala
scala> val s = "a".repeat(100_000_000)
scala> spark.time(org.apache.commons.io.FileUtils.write(new
java.io.File("/tmp/a"), s, java.nio.charset.StandardCharsets.UTF_8))
Time taken: 254 ms
scala> spark.time(java.nio.file.Files.writeString(new
java.io.File("/tmp/a").toPath(), s))
Time taken: 35 ms
val res1: java.nio.file.Path = /tmp/a
```
### Does this PR introduce _any_ user-facing change?
No behavior change.
### How was this patch tested?
Pass the CIs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #51795 from dongjoon-hyun/SPARK-53083.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 9 ++++-----
.../org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala | 7 ++++---
scalastyle-config.xml | 5 +++++
.../spark/sql/execution/streaming/state/RocksDBSuite.scala | 10 +++++-----
.../org/apache/spark/sql/streaming/StreamingQuerySuite.scala | 7 +++----
.../org/apache/spark/streaming/StreamingContextSuite.scala | 5 ++---
6 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index c92cb4eb6be1..0116b2aa781a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -26,7 +26,6 @@ import scala.collection.mutable.ArrayBuffer
import scala.io.{Codec, Source}
import com.google.common.io.ByteStreams
-import org.apache.commons.io.FileUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, FSDataInputStream, Path}
import org.scalatest.BeforeAndAfterEach
@@ -111,7 +110,7 @@ class SparkSubmitSuite
with TestPrematureExit {
private val emptyIvySettings = File.createTempFile("ivy", ".xml")
- FileUtils.write(emptyIvySettings, "<ivysettings />", StandardCharsets.UTF_8)
+ Files.writeString(emptyIvySettings.toPath, "<ivysettings />")
private val submit = new SparkSubmit()
@@ -1336,7 +1335,7 @@ class SparkSubmitSuite
val jarFile = File.createTempFile("test", ".jar")
jarFile.deleteOnExit()
val content = "hello, world"
- FileUtils.write(jarFile, content, StandardCharsets.UTF_8)
+ Files.writeString(jarFile.toPath, content)
val hadoopConf = new Configuration()
val tmpDir = Files.createTempDirectory("tmp").toFile
updateConfWithFakeS3Fs(hadoopConf)
@@ -1351,7 +1350,7 @@ class SparkSubmitSuite
val jarFile = File.createTempFile("test", ".jar")
jarFile.deleteOnExit()
val content = "hello, world"
- FileUtils.write(jarFile, content, StandardCharsets.UTF_8)
+ Files.writeString(jarFile.toPath, content)
val hadoopConf = new Configuration()
val tmpDir = Files.createTempDirectory("tmp").toFile
updateConfWithFakeS3Fs(hadoopConf)
@@ -1818,7 +1817,7 @@ class SparkSubmitSuite
"spark = SparkSession.builder.getOrCreate();" +
"assert 'connect' in str(type(spark));" +
"assert spark.range(1).first()[0] == 0"
- FileUtils.write(pyFile, content, StandardCharsets.UTF_8)
+ Files.writeString(pyFile.toPath, content)
val args = Seq(
"--name", "testPyApp",
"--remote", "local",
diff --git
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
index be10d21aa527..9c0251936df2 100644
---
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
+++
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
@@ -19,11 +19,12 @@ package org.apache.spark.deploy.k8s
import java.io.File
import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.StandardOpenOption._
import scala.jdk.CollectionConverters._
import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder,
EnvVarSourceBuilder, PodBuilder}
-import org.apache.commons.io.FileUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.scalatest.PrivateMethodTester
@@ -91,7 +92,7 @@ class KubernetesUtilsSuite extends SparkFunSuite with
PrivateMethodTester {
}
def appendFileAndUpload(content: String, delSrc: Boolean, overwrite:
Boolean): Unit = {
- FileUtils.write(srcFile, content, StandardCharsets.UTF_8, true)
+ Files.writeString(srcFile.toPath, content, StandardCharsets.UTF_8,
CREATE, WRITE, APPEND)
KubernetesUtils.invokePrivate(upload(src, dest, fs, delSrc,
overwrite))
}
@@ -121,7 +122,7 @@ class KubernetesUtilsSuite extends SparkFunSuite with
PrivateMethodTester {
// Rewrite a new file, upload file with delSrc = true and overwrite =
false.
// Upload failed because dest exists, src still exists.
- FileUtils.write(srcFile, "re-init-content", StandardCharsets.UTF_8,
true)
+ Files.writeString(srcFile.toPath, "re-init-content")
checkUploadException(delSrc = true, overwrite = false)
assert(fs.exists(src))
}
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 9f6fb363830c..dbddba673285 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -292,6 +292,11 @@ This file is divided into 3 sections:
<customMessage>Use Files.readString instead.</customMessage>
</check>
+ <check customId="write" level="error"
class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter
name="regex">[^k]FileUtils\.write\(</parameter></parameters>
+ <customMessage>Use Files.writeString instead.</customMessage>
+ </check>
+
<check customId="writeLines" level="error"
class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter
name="regex">FileUtils\.writeLines</parameter></parameters>
<customMessage>Use Files.write instead.</customMessage>
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index ed32b4f33ee6..69373641e850 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.execution.streaming.state
import java.io._
-import java.nio.charset.StandardCharsets
+import java.nio.file.Files
import java.util.UUID
import java.util.concurrent.Executors
@@ -28,7 +28,6 @@ import scala.concurrent.duration._
import scala.language.implicitConversions
import scala.util.Random
-import org.apache.commons.io.FileUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, Path}
import org.rocksdb.CompressionType
@@ -2176,7 +2175,7 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures
with SharedSparkSession
withTempDir { dir =>
val file2 = new File(dir, "json")
val json2 = """{"sstFiles":[],"numKeys":0}"""
- FileUtils.write(file2, s"v2\n$json2", StandardCharsets.UTF_8)
+ Files.writeString(file2.toPath, s"v2\n$json2")
val e = intercept[SparkException] {
RocksDBCheckpointMetadata.readFromFile(file2)
}
@@ -2194,7 +2193,7 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures
with SharedSparkSession
assert(metadata.json == json)
withTempDir { dir =>
val file = new File(dir, "json")
- FileUtils.write(file, s"v1\n$json", StandardCharsets.UTF_8)
+ Files.writeString(file.toPath, s"v1\n$json")
assert(metadata == RocksDBCheckpointMetadata.readFromFile(file))
}
}
@@ -3611,7 +3610,8 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures
with SharedSparkSession
def generateFiles(dir: String, fileToLengths: Seq[(String, Int)]): Unit = {
fileToLengths.foreach { case (fileName, length) =>
val file = new File(dir, fileName)
- FileUtils.write(file, "a".repeat(length), StandardCharsets.UTF_8)
+ file.getParentFile().mkdirs()
+ Files.writeString(file.toPath, "a".repeat(length))
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 1ccd19962831..174aa075be13 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -18,7 +18,6 @@
package org.apache.spark.sql.streaming
import java.io.File
-import java.nio.charset.StandardCharsets.UTF_8
import java.nio.file.Files
import java.util.Collections
import java.util.concurrent.CountDownLatch
@@ -1111,9 +1110,9 @@ class StreamingQuerySuite extends StreamTest with
BeforeAndAfter with Logging wi
// migrate to new version. However, in our test, "tempDir" will be
different in each run and
// we need to fix the absolute path in the metadata to match "tempDir".
val sparkMetadata = Files.readString(new File(legacySparkMetadataDir,
"0").toPath)
- FileUtils.write(
- new File(legacySparkMetadataDir, "0"),
- sparkMetadata.replaceAll("TEMPDIR", dir.getCanonicalPath), UTF_8)
+ Files.writeString(
+ new File(legacySparkMetadataDir, "0").toPath,
+ sparkMetadata.replaceAll("TEMPDIR", dir.getCanonicalPath))
}
test("detect escaped path and report the migration guide") {
diff --git
a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index b2d060b8e042..564713af88ae 100644
---
a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++
b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -18,7 +18,7 @@
package org.apache.spark.streaming
import java.io.{File, NotSerializableException}
-import java.nio.charset.StandardCharsets
+import java.nio.file.Files
import java.util.Locale
import java.util.concurrent.{CountDownLatch, TimeUnit}
import java.util.concurrent.atomic.AtomicInteger
@@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicInteger
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.Queue
-import org.apache.commons.io.FileUtils
import org.scalatest.{Assertions, PrivateMethodTester}
import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
import org.scalatest.concurrent.Eventually._
@@ -910,7 +909,7 @@ class StreamingContextSuite
def createCorruptedCheckpoint(): String = {
val checkpointDirectory = Utils.createTempDir().getAbsolutePath()
val fakeCheckpointFile = Checkpoint.checkpointFile(checkpointDirectory,
Time(1000))
- FileUtils.write(new File(fakeCheckpointFile.toString()), "blablabla",
StandardCharsets.UTF_8)
+ Files.writeString(new File(fakeCheckpointFile.toString()).toPath,
"blablabla")
assert(Checkpoint.getCheckpointFiles(checkpointDirectory).nonEmpty)
checkpointDirectory
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]