This is an automated email from the ASF dual-hosted git repository.
sarutak pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new ec5c0b36f9ba [SPARK-57262][SQL][WEBUI] Job description derived from a
query should respect `spark.sql.redaction.string.regex`
ec5c0b36f9ba is described below
commit ec5c0b36f9ba248702bf32084ffa3661cc289b6a
Author: Kousuke Saruta <[email protected]>
AuthorDate: Sun Jun 7 01:41:27 2026 +0900
[SPARK-57262][SQL][WEBUI] Job description derived from a query should
respect `spark.sql.redaction.string.regex`
### What changes were proposed in this pull request?
This PR changes `SparkSQLDriver.scala` to redact a query before
`setJobDescription`.
### Why are the changes needed?
In the current implementation, redaction is done in `SQLExecution.scala` so
the description in the table on the top of `/SQL/execution` is redacted.
<img width="1083" height="349" alt="sql-execution-page-top-table"
src="https://github.com/user-attachments/assets/b06fb255-2b46-473d-9046-1b2d578e3bda"
/>
But the description in the table on the `/jobs` page and the one in the
table on the bottom of `/SQL/execution` page are not redacted.
<img width="525" height="692" alt="jobs-page-before"
src="https://github.com/user-attachments/assets/0a5a8ce8-e4be-4669-bd7d-a6c62fe316ca"
/>
<img width="515" height="274" alt="sql-execution-page-before"
src="https://github.com/user-attachments/assets/bd0406cc-5b0b-40a0-96c4-9f9fa1aa048a"
/>
### Does this PR introduce _any_ user-facing change?
Yes.
### How was this patch tested?
Added new test.
Also confirmed descriptions are redacted in UI.
```
$ bin/spark-sql --conf spark.sql.redaction.string.regex="secret.*=.*"
spark-sql (default)> CREATE TABLE test1(secret string);
spark-sql (default)> SELECT * FROM test1 WHERE secret=1;
```
<img width="852" height="690" alt="jobs-page-after"
src="https://github.com/user-attachments/assets/8e28e37e-369f-479c-9711-999b431756db"
/>
<img width="598" height="272" alt="sql-execution-page-after"
src="https://github.com/user-attachments/assets/cb734556-619b-45c6-a7f6-d52e60132aff"
/>
### Was this patch authored or co-authored using generative AI tooling?
Kiro CLI / Claude
Closes #56326 from sarutak/fix-redact-sql-description.
Authored-by: Kousuke Saruta <[email protected]>
Signed-off-by: Kousuke Saruta <[email protected]>
(cherry picked from commit 583e5bb0010b52a0201a092985b09b0b2c264a6a)
Signed-off-by: Kousuke Saruta <[email protected]>
---
.../apache/spark/sql/execution/SQLExecution.scala | 4 +-
.../sql/hive/thriftserver/SparkSQLDriver.scala | 4 +-
.../hive/thriftserver/SparkSQLDriverSuite.scala | 51 ++++++++++++++++++++++
3 files changed, 55 insertions(+), 4 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index f25e908a9cdb..a5144bfd1195 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -170,9 +170,7 @@ object SQLExecution extends Logging {
val desc = Option(sc.getLocalProperty(SPARK_JOB_DESCRIPTION))
.filter(_ => truncateLength > 0)
.map { sqlStr =>
- val redactedStr = Utils
- .redact(sparkSession.sessionState.conf.stringRedactionPattern,
sqlStr)
- redactedStr.substring(0, Math.min(truncateLength,
redactedStr.length))
+ sqlStr.substring(0, Math.min(truncateLength, sqlStr.length))
}.getOrElse(callSite.shortForm)
val globalConfigs = sparkSession.sharedState.conf.getAll.toMap
diff --git
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 2040f8f565a2..f6f88cf8a012 100644
---
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -66,7 +66,9 @@ private[hive] class SparkSQLDriver(val sparkSession:
SparkSession = SparkSQLEnv.
val substitutorCommand =
SQLConf.withExistingConf(sparkSession.sessionState.conf) {
new VariableSubstitution().substitute(command)
}
- sparkSession.sparkContext.setJobDescription(substitutorCommand)
+ val redactedCommand =
+ Utils.redact(sparkSession.sessionState.conf.stringRedactionPattern,
substitutorCommand)
+ sparkSession.sparkContext.setJobDescription(redactedCommand)
// Parse with an empty parameter context to enable pre-parsing phase
that scans for
// parameter markers. If any parameter markers (:name or ?) are found in
the SQL,
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
new file mode 100644
index 000000000000..a0a36ea1e796
--- /dev/null
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import org.apache.spark.SparkContext
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.Utils.REDACTION_REPLACEMENT_TEXT
+
+class SparkSQLDriverSuite extends SharedSparkSession {
+
+ test("SPARK-57262: job description should be redacted by
spark.sql.redaction.string.regex") {
+ withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key ->
"password=([^\\s]+)") {
+ var jobDescription: String = null
+ spark.sparkContext.addSparkListener(new SparkListener {
+ override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+ jobDescription =
+ jobStart.properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION)
+ }
+ })
+
+ val driver = new SparkSQLDriver(spark)
+ try {
+ driver.run("SELECT 'password=secret123'")
+ } finally {
+ driver.close()
+ }
+
+ spark.sparkContext.listenerBus.waitUntilEmpty()
+ assert(jobDescription != null)
+ assert(!jobDescription.contains("secret123"))
+ assert(jobDescription.contains(REDACTION_REPLACEMENT_TEXT))
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]