This is an automated email from the ASF dual-hosted git repository.

sarutak pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new ec5c0b36f9ba [SPARK-57262][SQL][WEBUI] Job description derived from a 
query should respect `spark.sql.redaction.string.regex`
ec5c0b36f9ba is described below

commit ec5c0b36f9ba248702bf32084ffa3661cc289b6a
Author: Kousuke Saruta <[email protected]>
AuthorDate: Sun Jun 7 01:41:27 2026 +0900

    [SPARK-57262][SQL][WEBUI] Job description derived from a query should 
respect `spark.sql.redaction.string.regex`
    
    ### What changes were proposed in this pull request?
    This PR changes `SparkSQLDriver.scala` to redact a query before 
`setJobDescription`.
    
    ### Why are the changes needed?
    In the current implementation, redaction is done in `SQLExecution.scala` so 
the description in the table on the top of `/SQL/execution` is redacted.
    <img width="1083" height="349" alt="sql-execution-page-top-table" 
src="https://github.com/user-attachments/assets/b06fb255-2b46-473d-9046-1b2d578e3bda";
 />
    
    But the description in the table on the `/jobs` page and the one in the 
table on the bottom of `/SQL/execution` page are not redacted.
    <img width="525" height="692" alt="jobs-page-before" 
src="https://github.com/user-attachments/assets/0a5a8ce8-e4be-4669-bd7d-a6c62fe316ca";
 />
    <img width="515" height="274" alt="sql-execution-page-before" 
src="https://github.com/user-attachments/assets/bd0406cc-5b0b-40a0-96c4-9f9fa1aa048a";
 />
    
    ### Does this PR introduce _any_ user-facing change?
    Yes.
    
    ### How was this patch tested?
    Added new test.
    Also confirmed descriptions are redacted in UI.
    ```
    $ bin/spark-sql --conf spark.sql.redaction.string.regex="secret.*=.*"
    spark-sql (default)>  CREATE TABLE test1(secret string);
    spark-sql (default)> SELECT * FROM test1 WHERE secret=1;
    ```
    <img width="852" height="690" alt="jobs-page-after" 
src="https://github.com/user-attachments/assets/8e28e37e-369f-479c-9711-999b431756db";
 />
    <img width="598" height="272" alt="sql-execution-page-after" 
src="https://github.com/user-attachments/assets/cb734556-619b-45c6-a7f6-d52e60132aff";
 />
    
    ### Was this patch authored or co-authored using generative AI tooling?
    Kiro CLI / Claude
    
    Closes #56326 from sarutak/fix-redact-sql-description.
    
    Authored-by: Kousuke Saruta <[email protected]>
    Signed-off-by: Kousuke Saruta <[email protected]>
    (cherry picked from commit 583e5bb0010b52a0201a092985b09b0b2c264a6a)
    Signed-off-by: Kousuke Saruta <[email protected]>
---
 .../apache/spark/sql/execution/SQLExecution.scala  |  4 +-
 .../sql/hive/thriftserver/SparkSQLDriver.scala     |  4 +-
 .../hive/thriftserver/SparkSQLDriverSuite.scala    | 51 ++++++++++++++++++++++
 3 files changed, 55 insertions(+), 4 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index f25e908a9cdb..a5144bfd1195 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -170,9 +170,7 @@ object SQLExecution extends Logging {
       val desc = Option(sc.getLocalProperty(SPARK_JOB_DESCRIPTION))
         .filter(_ => truncateLength > 0)
         .map { sqlStr =>
-          val redactedStr = Utils
-            .redact(sparkSession.sessionState.conf.stringRedactionPattern, 
sqlStr)
-          redactedStr.substring(0, Math.min(truncateLength, 
redactedStr.length))
+          sqlStr.substring(0, Math.min(truncateLength, sqlStr.length))
         }.getOrElse(callSite.shortForm)
 
       val globalConfigs = sparkSession.sharedState.conf.getAll.toMap
diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 2040f8f565a2..f6f88cf8a012 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -66,7 +66,9 @@ private[hive] class SparkSQLDriver(val sparkSession: 
SparkSession = SparkSQLEnv.
       val substitutorCommand = 
SQLConf.withExistingConf(sparkSession.sessionState.conf) {
         new VariableSubstitution().substitute(command)
       }
-      sparkSession.sparkContext.setJobDescription(substitutorCommand)
+      val redactedCommand =
+        Utils.redact(sparkSession.sessionState.conf.stringRedactionPattern, 
substitutorCommand)
+      sparkSession.sparkContext.setJobDescription(redactedCommand)
 
       // Parse with an empty parameter context to enable pre-parsing phase 
that scans for
       // parameter markers. If any parameter markers (:name or ?) are found in 
the SQL,
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
new file mode 100644
index 000000000000..a0a36ea1e796
--- /dev/null
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import org.apache.spark.SparkContext
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.Utils.REDACTION_REPLACEMENT_TEXT
+
+class SparkSQLDriverSuite extends SharedSparkSession {
+
+  test("SPARK-57262: job description should be redacted by 
spark.sql.redaction.string.regex") {
+    withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> 
"password=([^\\s]+)") {
+      var jobDescription: String = null
+      spark.sparkContext.addSparkListener(new SparkListener {
+        override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+          jobDescription =
+            jobStart.properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION)
+        }
+      })
+
+      val driver = new SparkSQLDriver(spark)
+      try {
+        driver.run("SELECT 'password=secret123'")
+      } finally {
+        driver.close()
+      }
+
+      spark.sparkContext.listenerBus.waitUntilEmpty()
+      assert(jobDescription != null)
+      assert(!jobDescription.contains("secret123"))
+      assert(jobDescription.contains(REDACTION_REPLACEMENT_TEXT))
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to