This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1fbb94b87c0 [SPARK-44284][CONNECT] Create simple conf system for
sql/api
1fbb94b87c0 is described below
commit 1fbb94b87c0b10b6f2ded93f0c7eb253d086887e
Author: Herman van Hovell <[email protected]>
AuthorDate: Thu Jul 6 08:22:18 2023 -0400
[SPARK-44284][CONNECT] Create simple conf system for sql/api
### What changes were proposed in this pull request?
This PR introduces a configuration system for classes that are in sql/api.
### Why are the changes needed?
We are moving a number of components into sql/api that rely on confs being
set when used with sql/core. The conf system added here gives us that
flexibility to do so.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
Closes #41838 from hvanhovell/SPARK-44284.
Lead-authored-by: Herman van Hovell <[email protected]>
Co-authored-by: Herman van Hovell <[email protected]>
Signed-off-by: Herman van Hovell <[email protected]>
---
.../scala/org/apache/spark/sql/SqlApiConf.scala | 63 ++++++++++++++++++++++
.../spark/sql/catalyst/types/DataTypeUtils.scala | 4 +-
.../org/apache/spark/sql/internal/SQLConf.scala | 14 +++--
.../org/apache/spark/sql/types/StructType.scala | 4 +-
4 files changed, 77 insertions(+), 8 deletions(-)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/SqlApiConf.scala
b/sql/api/src/main/scala/org/apache/spark/sql/SqlApiConf.scala
new file mode 100644
index 00000000000..07943683ab6
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/SqlApiConf.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.util.concurrent.atomic.AtomicReference
+
+import scala.util.Try
+
+import org.apache.spark.util.SparkClassUtils
+
+/**
+ * Configuration for all objects that are placed in the `sql/api` project. The
normal way of
+ * accessing this class is through `SqlApiConf.get`. If this code is being
used with sql/core
+ * then its values are bound to the currently set SQLConf. With Spark Connect,
it will default to
+ * hardcoded values.
+ */
+private[sql] trait SqlApiConf {
+ def ansiEnabled: Boolean
+ def caseSensitiveAnalysis: Boolean
+ def maxToStringFields: Int
+}
+
+private[sql] object SqlApiConf {
+ /**
+ * Defines a getter that returns the [[SqlApiConf]] within scope.
+ */
+ private val confGetter = new AtomicReference[() => SqlApiConf](() =>
DefaultSqlApiConf)
+
+ /**
+ * Sets the active config getter.
+ */
+ private[sql] def setConfGetter(getter: () => SqlApiConf): Unit = {
+ confGetter.set(getter)
+ }
+
+ def get: SqlApiConf = confGetter.get()()
+
+ // Force load SQLConf. This will trigger the installation of a confGetter
that points to SQLConf.
+ Try(SparkClassUtils.classForName("org.apache.spark.sql.internal.SQLConf$"))
+}
+
+/**
+ * Defaults configurations used when no other [[SqlApiConf]] getter is set.
+ */
+private[sql] object DefaultSqlApiConf extends SqlApiConf {
+ override def ansiEnabled: Boolean = false
+ override def caseSensitiveAnalysis: Boolean = false
+ override def maxToStringFields: Int = 50
+}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
index 0d2d6c0262c..da0607e0920 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
@@ -16,9 +16,9 @@
*/
package org.apache.spark.sql.catalyst.types
+import org.apache.spark.sql.SqlApiConf
import org.apache.spark.sql.catalyst.analysis.Resolver
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast,
Literal}
-import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy.{ANSI,
STRICT}
import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, Decimal,
DecimalType, MapType, NullType, StructField, StructType}
@@ -30,7 +30,7 @@ object DataTypeUtils {
* (`StructField.nullable`, `ArrayType.containsNull`, and
`MapType.valueContainsNull`).
*/
def sameType(left: DataType, right: DataType): Boolean =
- if (SQLConf.get.caseSensitiveAnalysis) {
+ if (SqlApiConf.get.caseSensitiveAnalysis) {
equalsIgnoreNullability(left, right)
} else {
equalsIgnoreCaseAndNullability(left, right)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ecff6bef8ae..4d7322c6e53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -36,6 +36,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._
import org.apache.spark.internal.config.{IGNORE_MISSING_FILES =>
SPARK_IGNORE_MISSING_FILES}
import org.apache.spark.network.util.ByteUnit
+import org.apache.spark.sql.SqlApiConf
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
@@ -176,9 +177,14 @@ object SQLConf {
* See [[get]] for more information.
*/
def setSQLConfGetter(getter: () => SQLConf): Unit = {
+ SqlApiConf.setConfGetter(getter)
confGetter.set(getter)
}
+ // Make sure SqlApiConf is always in sync with SQLConf. SqlApiConf will
always try to
+ // load SqlConf to make sure both classes are in sync from the get go.
+ SqlApiConf.setConfGetter(() => SQLConf.get)
+
/**
* Returns the active config object within the current scope. If there is an
active SparkSession,
* the proper SQLConf associated with the thread's active session is used.
If it's called from
@@ -4439,7 +4445,7 @@ object SQLConf {
*
* SQLConf is thread-safe (internally synchronized, so safe to be used in
multiple threads).
*/
-class SQLConf extends Serializable with Logging {
+class SQLConf extends Serializable with Logging with SqlApiConf {
import SQLConf._
/** Only low degree of contention is expected for conf, thus NOT using
ConcurrentHashMap. */
@@ -4685,7 +4691,7 @@ class SQLConf extends Serializable with Logging {
def subqueryReuseEnabled: Boolean = getConf(SUBQUERY_REUSE_ENABLED)
- def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
+ override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
def constraintPropagationEnabled: Boolean =
getConf(CONSTRAINT_PROPAGATION_ENABLED)
@@ -5001,7 +5007,7 @@ class SQLConf extends Serializable with Logging {
def storeAssignmentPolicy: StoreAssignmentPolicy.Value =
StoreAssignmentPolicy.withName(getConf(STORE_ASSIGNMENT_POLICY))
- def ansiEnabled: Boolean = getConf(ANSI_ENABLED)
+ override def ansiEnabled: Boolean = getConf(ANSI_ENABLED)
def enableDefaultColumns: Boolean = getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)
@@ -5071,7 +5077,7 @@ class SQLConf extends Serializable with Logging {
def nameNonStructGroupingKeyAsValue: Boolean =
getConf(SQLConf.NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE)
- def maxToStringFields: Int = getConf(SQLConf.MAX_TO_STRING_FIELDS)
+ override def maxToStringFields: Int = getConf(SQLConf.MAX_TO_STRING_FIELDS)
def maxPlanStringLength: Int = getConf(SQLConf.MAX_PLAN_STRING_LENGTH).toInt
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 013d416bc97..56e1356aec7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -24,6 +24,7 @@ import scala.util.control.NonFatal
import org.json4s.JsonDSL._
import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.SqlApiConf
import org.apache.spark.sql.catalyst.analysis.Resolver
import org.apache.spark.sql.catalyst.expressions.{Attribute,
AttributeReference}
import org.apache.spark.sql.catalyst.parser.{DataTypeParser,
LegacyTypeStringParser}
@@ -32,7 +33,6 @@ import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.{SparkStringUtils, StringConcat}
import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
-import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.util.collection.Utils
/**
@@ -425,7 +425,7 @@ case class StructType(fields: Array[StructField]) extends
DataType with Seq[Stru
SparkStringUtils.truncatedString(
fieldTypes,
"struct<", ",", ">",
- SQLConf.get.maxToStringFields)
+ SqlApiConf.get.maxToStringFields)
}
override def catalogString: String = {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]