Repository: spark
Updated Branches:
  refs/heads/branch-2.0 8cf33fb8a -> 4c7b208ab


[SPARK-15585][SQL] Add doc for turning off quotations

## What changes were proposed in this pull request?
This pr is to add doc for turning off quotations because this behavior is 
different from `com.databricks.spark.csv`.

## How was this patch tested?
Check behavior  to put an empty string in csv options.

Author: Takeshi YAMAMURO <[email protected]>

Closes #13616 from maropu/SPARK-15585-2.

(cherry picked from commit cb5d933d86ac4afd947874f1f1c31c7154cb8249)
Signed-off-by: Reynold Xin <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c7b208a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c7b208a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c7b208a

Branch: refs/heads/branch-2.0
Commit: 4c7b208ab6a6ae17fa137627c90256d757ad433f
Parents: 8cf33fb
Author: Takeshi YAMAMURO <[email protected]>
Authored: Sat Jun 11 15:12:21 2016 -0700
Committer: Reynold Xin <[email protected]>
Committed: Sat Jun 11 15:12:27 2016 -0700

----------------------------------------------------------------------
 python/pyspark/sql/readwriter.py                          |  6 ++++--
 .../main/scala/org/apache/spark/sql/DataFrameReader.scala |  4 +++-
 .../spark/sql/execution/datasources/csv/CSVSuite.scala    | 10 ++++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4c7b208a/python/pyspark/sql/readwriter.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 9208a52..7d1f186 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -320,7 +320,8 @@ class DataFrameReader(object):
                          it uses the default value, ``UTF-8``.
         :param quote: sets the single character used for escaping quoted 
values where the
                       separator can be part of the value. If None is set, it 
uses the default
-                      value, ``"``.
+                      value, ``"``. If you would like to turn off quotations, 
you need to set an
+                      empty string.
         :param escape: sets the single character used for escaping quotes 
inside an already
                        quoted value. If None is set, it uses the default 
value, ``\``.
         :param comment: sets the single character used for skipping lines 
beginning with this
@@ -804,7 +805,8 @@ class DataFrameWriter(object):
                     set, it uses the default value, ``,``.
         :param quote: sets the single character used for escaping quoted 
values where the
                       separator can be part of the value. If None is set, it 
uses the default
-                      value, ``"``.
+                      value, ``"``. If you would like to turn off quotations, 
you need to set an
+                      empty string.
         :param escape: sets the single character used for escaping quotes 
inside an already
                        quoted value. If None is set, it uses the default 
value, ``\``
         :param escapeQuotes: A flag indicating whether values containing 
quotes should always

http://git-wip-us.apache.org/repos/asf/spark/blob/4c7b208a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index b248583..bb5fa2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -370,7 +370,9 @@ class DataFrameReader private[sql](sparkSession: 
SparkSession) extends Logging {
    * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given 
encoding
    * type.</li>
    * <li>`quote` (default `"`): sets the single character used for escaping 
quoted values where
-   * the separator can be part of the value.</li>
+   * the separator can be part of the value. If you would like to turn off 
quotations, you need to
+   * set not `null` but an empty string. This behaviour is different form
+   * `com.databricks.spark.csv`.</li>
    * <li>`escape` (default `\`): sets the single character used for escaping 
quotes inside
    * an already quoted value.</li>
    * <li>`comment` (default empty string): sets the single character used for 
skipping lines

http://git-wip-us.apache.org/repos/asf/spark/blob/4c7b208a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index bc95446..f170065 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -655,4 +655,14 @@ class CSVSuite extends QueryTest with SharedSQLContext 
with SQLTestUtils {
       assert(msg.contains("CSV data source does not support array<string> data 
type"))
     }
   }
+
+  test("SPARK-15585 turn off quotations") {
+    val cars = spark.read
+      .format("csv")
+      .option("header", "true")
+      .option("quote", "")
+      .load(testFile(carsUnbalancedQuotesFile))
+
+    verifyCars(cars, withHeader = true, checkValues = false)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to