This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1f449817af93 [SPARK-52554][PS] Avoid multiple roundtrips for config
check in Spark Connect
1f449817af93 is described below
commit 1f449817af93978141aefe1c387b10c3c97930f1
Author: Takuya Ueshin <[email protected]>
AuthorDate: Tue Jun 24 09:21:04 2025 +0900
[SPARK-52554][PS] Avoid multiple roundtrips for config check in Spark
Connect
### What changes were proposed in this pull request?
Avoids multiple roundtrips for config check in Spark Connect.
### Why are the changes needed?
Some APIs for pandas API on Spark now need to check the server configs,
which could cause a performance issue in Spark Connect.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Manually, and the existing tests should pass.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #51252 from ueshin/issues/SPARK-52554/is_ansi_mode_enabled.
Authored-by: Takuya Ueshin <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/utils.py | 27 +++++++++++++++++++++++----
1 file changed, 23 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index 23350c06a147..b91f7011851c 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -20,6 +20,7 @@ Commonly used utils in pandas-on-Spark.
import functools
from contextlib import contextmanager
+import json
import os
from typing import (
Any,
@@ -1071,10 +1072,28 @@ def xor(df1: PySparkDataFrame, df2: PySparkDataFrame)
-> PySparkDataFrame:
def is_ansi_mode_enabled(spark: SparkSession) -> bool:
- return (
- ps.get_option("compute.ansi_mode_support", spark_session=spark)
- and spark.conf.get("spark.sql.ansi.enabled") == "true"
- )
+ if is_remote():
+ from pyspark.sql.connect.session import SparkSession as ConnectSession
+ from pyspark.pandas.config import _key_format, _options_dict
+
+ client = cast(ConnectSession, spark).client
+ (ansi_mode_support, ansi_enabled) = client.get_config_with_defaults(
+ (
+ _key_format("compute.ansi_mode_support"),
+ json.dumps(_options_dict["compute.ansi_mode_support"].default),
+ ),
+ ("spark.sql.ansi.enabled", None),
+ )
+ if ansi_enabled is None:
+ ansi_enabled = spark.conf.get("spark.sql.ansi.enabled")
+ # Explicitly set the default value to reduce the roundtrip for the
next time.
+ spark.conf.set("spark.sql.ansi.enabled", ansi_enabled)
+ return json.loads(ansi_mode_support) and ansi_enabled.lower() == "true"
+ else:
+ return (
+ ps.get_option("compute.ansi_mode_support", spark_session=spark)
+ and spark.conf.get("spark.sql.ansi.enabled").lower() == "true"
+ )
def _test() -> None:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]