This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new cb9cb7cd10c5 [SPARK-53083][CORE][SQL][K8S][SS][TESTS] Use Java `Files.writeString` instead of `FileUtils.write` cb9cb7cd10c5 is described below commit cb9cb7cd10c5441009925c0e8f547ce91bf59334 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Sun Aug 3 00:30:28 2025 -0700 [SPARK-53083][CORE][SQL][K8S][SS][TESTS] Use Java `Files.writeString` instead of `FileUtils.write` ### What changes were proposed in this pull request? This PR aims to use Java `Files.writeString` instead of `FileUtils.write`. ### Why are the changes needed? Since Java 11, `Files.writeString` is supported and about 7x faster than `commons-io` library. ```scala scala> val s = "a".repeat(100_000_000) scala> spark.time(org.apache.commons.io.FileUtils.write(new java.io.File("/tmp/a"), s, java.nio.charset.StandardCharsets.UTF_8)) Time taken: 254 ms scala> spark.time(java.nio.file.Files.writeString(new java.io.File("/tmp/a").toPath(), s)) Time taken: 35 ms val res1: java.nio.file.Path = /tmp/a ``` ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51795 from dongjoon-hyun/SPARK-53083. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 9 ++++----- .../org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala | 7 ++++--- scalastyle-config.xml | 5 +++++ .../spark/sql/execution/streaming/state/RocksDBSuite.scala | 10 +++++----- .../org/apache/spark/sql/streaming/StreamingQuerySuite.scala | 7 +++---- .../org/apache/spark/streaming/StreamingContextSuite.scala | 5 ++--- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index c92cb4eb6be1..0116b2aa781a 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -26,7 +26,6 @@ import scala.collection.mutable.ArrayBuffer import scala.io.{Codec, Source} import com.google.common.io.ByteStreams -import org.apache.commons.io.FileUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, FSDataInputStream, Path} import org.scalatest.BeforeAndAfterEach @@ -111,7 +110,7 @@ class SparkSubmitSuite with TestPrematureExit { private val emptyIvySettings = File.createTempFile("ivy", ".xml") - FileUtils.write(emptyIvySettings, "<ivysettings />", StandardCharsets.UTF_8) + Files.writeString(emptyIvySettings.toPath, "<ivysettings />") private val submit = new SparkSubmit() @@ -1336,7 +1335,7 @@ class SparkSubmitSuite val jarFile = File.createTempFile("test", ".jar") jarFile.deleteOnExit() val content = "hello, world" - FileUtils.write(jarFile, content, StandardCharsets.UTF_8) + Files.writeString(jarFile.toPath, content) val hadoopConf = new Configuration() val tmpDir = Files.createTempDirectory("tmp").toFile updateConfWithFakeS3Fs(hadoopConf) @@ -1351,7 +1350,7 @@ class SparkSubmitSuite val jarFile = File.createTempFile("test", ".jar") jarFile.deleteOnExit() val content = "hello, world" - FileUtils.write(jarFile, content, StandardCharsets.UTF_8) + Files.writeString(jarFile.toPath, content) val hadoopConf = new Configuration() val tmpDir = Files.createTempDirectory("tmp").toFile updateConfWithFakeS3Fs(hadoopConf) @@ -1818,7 +1817,7 @@ class SparkSubmitSuite "spark = SparkSession.builder.getOrCreate();" + "assert 'connect' in str(type(spark));" + "assert spark.range(1).first()[0] == 0" - FileUtils.write(pyFile, content, StandardCharsets.UTF_8) + Files.writeString(pyFile.toPath, content) val args = Seq( "--name", "testPyApp", "--remote", "local", diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala index be10d21aa527..9c0251936df2 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala @@ -19,11 +19,12 @@ package org.apache.spark.deploy.k8s import java.io.File import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.StandardOpenOption._ import scala.jdk.CollectionConverters._ import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, EnvVarSourceBuilder, PodBuilder} -import org.apache.commons.io.FileUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.scalatest.PrivateMethodTester @@ -91,7 +92,7 @@ class KubernetesUtilsSuite extends SparkFunSuite with PrivateMethodTester { } def appendFileAndUpload(content: String, delSrc: Boolean, overwrite: Boolean): Unit = { - FileUtils.write(srcFile, content, StandardCharsets.UTF_8, true) + Files.writeString(srcFile.toPath, content, StandardCharsets.UTF_8, CREATE, WRITE, APPEND) KubernetesUtils.invokePrivate(upload(src, dest, fs, delSrc, overwrite)) } @@ -121,7 +122,7 @@ class KubernetesUtilsSuite extends SparkFunSuite with PrivateMethodTester { // Rewrite a new file, upload file with delSrc = true and overwrite = false. // Upload failed because dest exists, src still exists. - FileUtils.write(srcFile, "re-init-content", StandardCharsets.UTF_8, true) + Files.writeString(srcFile.toPath, "re-init-content") checkUploadException(delSrc = true, overwrite = false) assert(fs.exists(src)) } diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 9f6fb363830c..dbddba673285 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -292,6 +292,11 @@ This file is divided into 3 sections: <customMessage>Use Files.readString instead.</customMessage> </check> + <check customId="write" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex">[^k]FileUtils\.write\(</parameter></parameters> + <customMessage>Use Files.writeString instead.</customMessage> + </check> + <check customId="writeLines" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <parameters><parameter name="regex">FileUtils\.writeLines</parameter></parameters> <customMessage>Use Files.write instead.</customMessage> diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala index ed32b4f33ee6..69373641e850 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.streaming.state import java.io._ -import java.nio.charset.StandardCharsets +import java.nio.file.Files import java.util.UUID import java.util.concurrent.Executors @@ -28,7 +28,6 @@ import scala.concurrent.duration._ import scala.language.implicitConversions import scala.util.Random -import org.apache.commons.io.FileUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FSDataInputStream, Path} import org.rocksdb.CompressionType @@ -2176,7 +2175,7 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession withTempDir { dir => val file2 = new File(dir, "json") val json2 = """{"sstFiles":[],"numKeys":0}""" - FileUtils.write(file2, s"v2\n$json2", StandardCharsets.UTF_8) + Files.writeString(file2.toPath, s"v2\n$json2") val e = intercept[SparkException] { RocksDBCheckpointMetadata.readFromFile(file2) } @@ -2194,7 +2193,7 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession assert(metadata.json == json) withTempDir { dir => val file = new File(dir, "json") - FileUtils.write(file, s"v1\n$json", StandardCharsets.UTF_8) + Files.writeString(file.toPath, s"v1\n$json") assert(metadata == RocksDBCheckpointMetadata.readFromFile(file)) } } @@ -3611,7 +3610,8 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession def generateFiles(dir: String, fileToLengths: Seq[(String, Int)]): Unit = { fileToLengths.foreach { case (fileName, length) => val file = new File(dir, fileName) - FileUtils.write(file, "a".repeat(length), StandardCharsets.UTF_8) + file.getParentFile().mkdirs() + Files.writeString(file.toPath, "a".repeat(length)) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala index 1ccd19962831..174aa075be13 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.streaming import java.io.File -import java.nio.charset.StandardCharsets.UTF_8 import java.nio.file.Files import java.util.Collections import java.util.concurrent.CountDownLatch @@ -1111,9 +1110,9 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi // migrate to new version. However, in our test, "tempDir" will be different in each run and // we need to fix the absolute path in the metadata to match "tempDir". val sparkMetadata = Files.readString(new File(legacySparkMetadataDir, "0").toPath) - FileUtils.write( - new File(legacySparkMetadataDir, "0"), - sparkMetadata.replaceAll("TEMPDIR", dir.getCanonicalPath), UTF_8) + Files.writeString( + new File(legacySparkMetadataDir, "0").toPath, + sparkMetadata.replaceAll("TEMPDIR", dir.getCanonicalPath)) } test("detect escaped path and report the migration guide") { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala index b2d060b8e042..564713af88ae 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.streaming import java.io.{File, NotSerializableException} -import java.nio.charset.StandardCharsets +import java.nio.file.Files import java.util.Locale import java.util.concurrent.{CountDownLatch, TimeUnit} import java.util.concurrent.atomic.AtomicInteger @@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Queue -import org.apache.commons.io.FileUtils import org.scalatest.{Assertions, PrivateMethodTester} import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits} import org.scalatest.concurrent.Eventually._ @@ -910,7 +909,7 @@ class StreamingContextSuite def createCorruptedCheckpoint(): String = { val checkpointDirectory = Utils.createTempDir().getAbsolutePath() val fakeCheckpointFile = Checkpoint.checkpointFile(checkpointDirectory, Time(1000)) - FileUtils.write(new File(fakeCheckpointFile.toString()), "blablabla", StandardCharsets.UTF_8) + Files.writeString(new File(fakeCheckpointFile.toString()).toPath, "blablabla") assert(Checkpoint.getCheckpointFiles(checkpointDirectory).nonEmpty) checkpointDirectory } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org