This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1005cd5576ef [SPARK-46447][SQL] Remove the legacy datetime rebasing
SQL configs
1005cd5576ef is described below
commit 1005cd5576ef073afee243848bcad5e5f4a9d309
Author: Max Gekk <[email protected]>
AuthorDate: Wed Dec 20 20:22:09 2023 +0300
[SPARK-46447][SQL] Remove the legacy datetime rebasing SQL configs
### What changes were proposed in this pull request?
In the PR, I propose to remove already deprecated SQL configs (alternatives
to other configs):
- spark.sql.legacy.parquet.int96RebaseModeInWrite
- spark.sql.legacy.parquet.datetimeRebaseModeInWrite
- spark.sql.legacy.parquet.int96RebaseModeInRead
- spark.sql.legacy.avro.datetimeRebaseModeInWrite
- spark.sql.legacy.avro.datetimeRebaseModeInRead
### Why are the changes needed?
To improve code maintenance.
### Does this PR introduce _any_ user-facing change?
Should not.
### How was this patch tested?
By existing test suites.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44402 from MaxGekk/remove-legacy-rebase-confs-2.
Authored-by: Max Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
docs/sql-migration-guide.md | 6 ++++
.../org/apache/spark/sql/internal/SQLConf.scala | 36 +++++++++++-----------
2 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 4e8e2422d7e0..30a37d97042a 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -30,6 +30,12 @@ license: |
- Since Spark 4.0, `spark.sql.parquet.compression.codec` drops the support of
codec name `lz4raw`, please use `lz4_raw` instead.
- Since Spark 4.0, when overflowing during casting timestamp to byte/short/int
under non-ansi mode, Spark will return null instead a wrapping value.
- Since Spark 4.0, the `encode()` and `decode()` functions support only the
following charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE',
'UTF-16'. To restore the previous behavior when the function accepts charsets
of the current JDK used by Spark, set `spark.sql.legacy.javaCharsets` to `true`.
+- Since Spark 4.0, the legacy datetime rebasing SQL configs with the prefix
`spark.sql.legacy` are removed. To restore the previous behavior, use the
following configs:
+ - `spark.sql.parquet.int96RebaseModeInWrite` instead of
`spark.sql.legacy.parquet.int96RebaseModeInWrite`
+ - `spark.sql.parquet.datetimeRebaseModeInWrite` instead of
`spark.sql.legacy.parquet.datetimeRebaseModeInWrite`
+ - `spark.sql.parquet.int96RebaseModeInRead` instead of
`spark.sql.legacy.parquet.int96RebaseModeInRead`
+ - `spark.sql.avro.datetimeRebaseModeInWrite` instead of
`spark.sql.legacy.avro.datetimeRebaseModeInWrite`
+ - `spark.sql.avro.datetimeRebaseModeInRead` instead of
`spark.sql.legacy.avro.datetimeRebaseModeInRead`
## Upgrading from Spark SQL 3.4 to 3.5
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6404779f30ac..d54cb3756638 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4081,7 +4081,6 @@ object SQLConf {
"When EXCEPTION, which is the default, Spark will fail the writing if
it sees ancient " +
"timestamps that are ambiguous between the two calendars.")
.version("3.1.0")
- .withAlternative("spark.sql.legacy.parquet.int96RebaseModeInWrite")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
@@ -4099,7 +4098,6 @@ object SQLConf {
"TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate
config: " +
s"${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}.")
.version("3.0.0")
- .withAlternative("spark.sql.legacy.parquet.datetimeRebaseModeInWrite")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
@@ -4115,7 +4113,6 @@ object SQLConf {
"timestamps that are ambiguous between the two calendars. This config
is only effective " +
"if the writer info (like Spark, Hive) of the Parquet files is
unknown.")
.version("3.1.0")
- .withAlternative("spark.sql.legacy.parquet.int96RebaseModeInRead")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
@@ -4149,7 +4146,6 @@ object SQLConf {
"When EXCEPTION, which is the default, Spark will fail the writing if
it sees " +
"ancient dates/timestamps that are ambiguous between the two
calendars.")
.version("3.0.0")
- .withAlternative("spark.sql.legacy.avro.datetimeRebaseModeInWrite")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
@@ -4165,7 +4161,6 @@ object SQLConf {
"ancient dates/timestamps that are ambiguous between the two
calendars. This config is " +
"only effective if the writer info (like Spark, Hive) of the Avro
files is unknown.")
.version("3.0.0")
- .withAlternative("spark.sql.legacy.avro.datetimeRebaseModeInRead")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
@@ -4657,22 +4652,12 @@ object SQLConf {
s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead."),
DeprecatedConfig("spark.sql.sources.schemaStringLengthThreshold", "3.2",
s"Use '${HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD.key}' instead."),
- DeprecatedConfig(PARQUET_INT96_REBASE_MODE_IN_WRITE.alternatives.head,
"3.2",
- s"Use '${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}' instead."),
- DeprecatedConfig(PARQUET_INT96_REBASE_MODE_IN_READ.alternatives.head,
"3.2",
- s"Use '${PARQUET_INT96_REBASE_MODE_IN_READ.key}' instead."),
- DeprecatedConfig(PARQUET_REBASE_MODE_IN_WRITE.alternatives.head, "3.2",
- s"Use '${PARQUET_REBASE_MODE_IN_WRITE.key}' instead."),
- DeprecatedConfig(PARQUET_REBASE_MODE_IN_READ.alternatives.head, "3.2",
- s"Use '${PARQUET_REBASE_MODE_IN_READ.key}' instead."),
- DeprecatedConfig(AVRO_REBASE_MODE_IN_WRITE.alternatives.head, "3.2",
- s"Use '${AVRO_REBASE_MODE_IN_WRITE.key}' instead."),
- DeprecatedConfig(AVRO_REBASE_MODE_IN_READ.alternatives.head, "3.2",
- s"Use '${AVRO_REBASE_MODE_IN_READ.key}' instead."),
DeprecatedConfig(LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED.key, "3.2",
"""Use `.format("avro")` in `DataFrameWriter` or `DataFrameReader`
instead."""),
DeprecatedConfig(COALESCE_PARTITIONS_MIN_PARTITION_NUM.key, "3.2",
s"Use '${COALESCE_PARTITIONS_MIN_PARTITION_SIZE.key}' instead."),
+ DeprecatedConfig(PARQUET_REBASE_MODE_IN_READ.alternatives.head, "3.2",
+ s"Use '${PARQUET_REBASE_MODE_IN_READ.key}' instead."),
DeprecatedConfig(ESCAPED_STRING_LITERALS.key, "4.0",
"Use raw string literals with the `r` prefix instead. "),
DeprecatedConfig("spark.connect.copyFromLocalToFs.allowDestLocal", "4.0",
@@ -4735,7 +4720,22 @@ object SQLConf {
RemovedConfig("spark.sql.hive.verifyPartitionPath", "4.0.0", "false",
s"This config was replaced by '${IGNORE_MISSING_FILES.key}'."),
RemovedConfig("spark.sql.optimizer.runtimeFilter.semiJoinReduction.enabled",
"4.0.0", "false",
- "This optimizer config is useless as runtime filter cannot be an IN
subquery now.")
+ "This optimizer config is useless as runtime filter cannot be an IN
subquery now."),
+ RemovedConfig("spark.sql.legacy.parquet.int96RebaseModeInWrite", "4.0.0",
+ LegacyBehaviorPolicy.CORRECTED.toString,
+ s"Use '${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}' instead."),
+ RemovedConfig("spark.sql.legacy.parquet.int96RebaseModeInRead", "4.0.0",
+ LegacyBehaviorPolicy.CORRECTED.toString,
+ s"Use '${PARQUET_INT96_REBASE_MODE_IN_READ.key}' instead."),
+ RemovedConfig("spark.sql.legacy.parquet.datetimeRebaseModeInWrite",
"4.0.0",
+ LegacyBehaviorPolicy.CORRECTED.toString,
+ s"Use '${PARQUET_REBASE_MODE_IN_WRITE.key}' instead."),
+ RemovedConfig("spark.sql.legacy.avro.datetimeRebaseModeInWrite", "4.0.0",
+ LegacyBehaviorPolicy.CORRECTED.toString,
+ s"Use '${AVRO_REBASE_MODE_IN_WRITE.key}' instead."),
+ RemovedConfig("spark.sql.legacy.avro.datetimeRebaseModeInRead", "4.0.0",
+ LegacyBehaviorPolicy.CORRECTED.toString,
+ s"Use '${AVRO_REBASE_MODE_IN_READ.key}' instead.")
)
Map(configs.map { cfg => cfg.key -> cfg } : _*)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]