This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6d25af4a4c8 [MINOR][SQL] Remove unimplemented instances in XML Data
Source
6d25af4a4c8 is described below
commit 6d25af4a4c819bc3a05c2fe9b8bf92e0a5629dcd
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Nov 17 14:44:01 2023 +0900
[MINOR][SQL] Remove unimplemented instances in XML Data Source
### What changes were proposed in this pull request?
This PR removes unimplemented instances in XML Data Source. They are
presumably copied from JSON/CSV Data Source but they are not implemented yet.
### Why are the changes needed?
They are unreachable code.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing CI in this PR should test them out.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43857 from HyukjinKwon/xml-cleanup.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala | 7 -------
.../org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala | 13 +------------
.../org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala | 10 ----------
.../spark/sql/execution/datasources/xml/XmlFileFormat.scala | 5 +----
4 files changed, 2 insertions(+), 33 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
index ae3a64d865c..c8333758229 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
@@ -48,13 +48,6 @@ class StaxXmlGenerator(
legacyFormat = FAST_DATE_FORMAT,
isParsing = false)
- private val timestampNTZFormatter = TimestampFormatter(
- options.timestampNTZFormatInWrite,
- options.zoneId,
- legacyFormat = FAST_DATE_FORMAT,
- isParsing = false,
- forTimestampNTZ = true)
-
private val dateFormatter = DateFormatter(
options.dateFormatInWrite,
options.locale,
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
index b3174b70441..b39b2e63526 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
@@ -40,16 +40,12 @@ import
org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.catalyst.xml.StaxXmlParser.convertStream
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
class StaxXmlParser(
schema: StructType,
- val options: XmlOptions,
- filters: Seq[Filter] = Seq.empty) extends Logging {
-
- private val factory = options.buildXmlFactory()
+ val options: XmlOptions) extends Logging {
private lazy val timestampFormatter = TimestampFormatter(
options.timestampFormatInRead,
@@ -58,13 +54,6 @@ class StaxXmlParser(
legacyFormat = FAST_DATE_FORMAT,
isParsing = true)
- private lazy val timestampNTZFormatter = TimestampFormatter(
- options.timestampNTZFormatInRead,
- options.zoneId,
- legacyFormat = FAST_DATE_FORMAT,
- isParsing = true,
- forTimestampNTZ = true)
-
private lazy val dateFormatter = DateFormatter(
options.dateFormatInRead,
options.locale,
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
index eeb5a9de4ed..53439879772 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
@@ -31,7 +31,6 @@ import scala.util.control.NonFatal
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.ExprUtils
import org.apache.spark.sql.catalyst.util.{DateFormatter, PermissiveMode,
TimestampFormatter}
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.types._
@@ -40,8 +39,6 @@ class XmlInferSchema(options: XmlOptions, caseSensitive:
Boolean)
extends Serializable
with Logging {
- private val decimalParser = ExprUtils.getDecimalParser(options.locale)
-
private val timestampFormatter = TimestampFormatter(
options.timestampFormatInRead,
options.zoneId,
@@ -49,13 +46,6 @@ class XmlInferSchema(options: XmlOptions, caseSensitive:
Boolean)
legacyFormat = FAST_DATE_FORMAT,
isParsing = true)
- private val timestampNTZFormatter = TimestampFormatter(
- options.timestampNTZFormatInRead,
- options.zoneId,
- legacyFormat = FAST_DATE_FORMAT,
- isParsing = true,
- forTimestampNTZ = true)
-
private lazy val dateFormatter = DateFormatter(
options.dateFormatInRead,
options.locale,
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
index 300c0f50042..a7661d8dbf8 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
@@ -111,8 +111,6 @@ class XmlFileFormat extends TextBasedFileFormat with
DataSourceRegister {
ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema,
columnNameOfCorruptRecord)
// Don't push any filter which refers to the "virtual" column which cannot
present in the input.
// Such filters will be applied later on the upper layer.
- val actualFilters =
- filters.filterNot(_.references.contains(columnNameOfCorruptRecord))
val actualRequiredSchema = StructType(
requiredSchema.filterNot(_.name == columnNameOfCorruptRecord))
if (requiredSchema.length == 1 &&
@@ -123,8 +121,7 @@ class XmlFileFormat extends TextBasedFileFormat with
DataSourceRegister {
(file: PartitionedFile) => {
val parser = new StaxXmlParser(
actualRequiredSchema,
- xmlOptions,
- actualFilters)
+ xmlOptions)
XmlDataSource(xmlOptions).readFile(
broadcastedHadoopConf.value.value,
file,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]