(spark) branch master updated: [MINOR][SQL] Remove unimplemented instances in XML Data Source

gurwls223 Thu, 16 Nov 2023 21:46:17 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 6d25af4a4c8 [MINOR][SQL] Remove unimplemented instances in XML Data 
Source
6d25af4a4c8 is described below

commit 6d25af4a4c819bc3a05c2fe9b8bf92e0a5629dcd
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Nov 17 14:44:01 2023 +0900

    [MINOR][SQL] Remove unimplemented instances in XML Data Source
    
    ### What changes were proposed in this pull request?
    
    This PR removes unimplemented instances in XML Data Source. They are 
presumably copied from JSON/CSV Data Source but they are not implemented yet.
    
    ### Why are the changes needed?
    
    They are unreachable code.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing CI in this PR should test them out.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #43857 from HyukjinKwon/xml-cleanup.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala    |  7 -------
 .../org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala   | 13 +------------
 .../org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala  | 10 ----------
 .../spark/sql/execution/datasources/xml/XmlFileFormat.scala |  5 +----
 4 files changed, 2 insertions(+), 33 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
index ae3a64d865c..c8333758229 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
@@ -48,13 +48,6 @@ class StaxXmlGenerator(
     legacyFormat = FAST_DATE_FORMAT,
     isParsing = false)
 
-  private val timestampNTZFormatter = TimestampFormatter(
-    options.timestampNTZFormatInWrite,
-    options.zoneId,
-    legacyFormat = FAST_DATE_FORMAT,
-    isParsing = false,
-    forTimestampNTZ = true)
-
   private val dateFormatter = DateFormatter(
     options.dateFormatInWrite,
     options.locale,
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
index b3174b70441..b39b2e63526 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
@@ -40,16 +40,12 @@ import 
org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.catalyst.xml.StaxXmlParser.convertStream
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 class StaxXmlParser(
     schema: StructType,
-    val options: XmlOptions,
-    filters: Seq[Filter] = Seq.empty) extends Logging {
-
-  private val factory = options.buildXmlFactory()
+    val options: XmlOptions) extends Logging {
 
   private lazy val timestampFormatter = TimestampFormatter(
     options.timestampFormatInRead,
@@ -58,13 +54,6 @@ class StaxXmlParser(
     legacyFormat = FAST_DATE_FORMAT,
     isParsing = true)
 
-  private lazy val timestampNTZFormatter = TimestampFormatter(
-    options.timestampNTZFormatInRead,
-    options.zoneId,
-    legacyFormat = FAST_DATE_FORMAT,
-    isParsing = true,
-    forTimestampNTZ = true)
-
   private lazy val dateFormatter = DateFormatter(
     options.dateFormatInRead,
     options.locale,
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
index eeb5a9de4ed..53439879772 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
@@ -31,7 +31,6 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.{DateFormatter, PermissiveMode, 
TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.types._
@@ -40,8 +39,6 @@ class XmlInferSchema(options: XmlOptions, caseSensitive: 
Boolean)
     extends Serializable
     with Logging {
 
-  private val decimalParser = ExprUtils.getDecimalParser(options.locale)
-
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormatInRead,
     options.zoneId,
@@ -49,13 +46,6 @@ class XmlInferSchema(options: XmlOptions, caseSensitive: 
Boolean)
     legacyFormat = FAST_DATE_FORMAT,
     isParsing = true)
 
-  private val timestampNTZFormatter = TimestampFormatter(
-    options.timestampNTZFormatInRead,
-    options.zoneId,
-    legacyFormat = FAST_DATE_FORMAT,
-    isParsing = true,
-    forTimestampNTZ = true)
-
   private lazy val dateFormatter = DateFormatter(
     options.dateFormatInRead,
     options.locale,
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
index 300c0f50042..a7661d8dbf8 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
@@ -111,8 +111,6 @@ class XmlFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
     ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, 
columnNameOfCorruptRecord)
     // Don't push any filter which refers to the "virtual" column which cannot 
present in the input.
     // Such filters will be applied later on the upper layer.
-    val actualFilters =
-      filters.filterNot(_.references.contains(columnNameOfCorruptRecord))
     val actualRequiredSchema = StructType(
       requiredSchema.filterNot(_.name == columnNameOfCorruptRecord))
     if (requiredSchema.length == 1 &&
@@ -123,8 +121,7 @@ class XmlFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
     (file: PartitionedFile) => {
       val parser = new StaxXmlParser(
         actualRequiredSchema,
-        xmlOptions,
-        actualFilters)
+        xmlOptions)
       XmlDataSource(xmlOptions).readFile(
         broadcastedHadoopConf.value.value,
         file,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [MINOR][SQL] Remove unimplemented instances in XML Data Source

Reply via email to