Repository: spark Updated Branches: refs/heads/master ad102af16 -> cb5d933d8
[SPARK-15585][SQL] Add doc for turning off quotations ## What changes were proposed in this pull request? This pr is to add doc for turning off quotations because this behavior is different from `com.databricks.spark.csv`. ## How was this patch tested? Check behavior to put an empty string in csv options. Author: Takeshi YAMAMURO <[email protected]> Closes #13616 from maropu/SPARK-15585-2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cb5d933d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cb5d933d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cb5d933d Branch: refs/heads/master Commit: cb5d933d86ac4afd947874f1f1c31c7154cb8249 Parents: ad102af Author: Takeshi YAMAMURO <[email protected]> Authored: Sat Jun 11 15:12:21 2016 -0700 Committer: Reynold Xin <[email protected]> Committed: Sat Jun 11 15:12:21 2016 -0700 ---------------------------------------------------------------------- python/pyspark/sql/readwriter.py | 6 ++++-- .../main/scala/org/apache/spark/sql/DataFrameReader.scala | 4 +++- .../spark/sql/execution/datasources/csv/CSVSuite.scala | 10 ++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/cb5d933d/python/pyspark/sql/readwriter.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 9208a52..7d1f186 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -320,7 +320,8 @@ class DataFrameReader(object): it uses the default value, ``UTF-8``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\``. :param comment: sets the single character used for skipping lines beginning with this @@ -804,7 +805,8 @@ class DataFrameWriter(object): set, it uses the default value, ``,``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\`` :param escapeQuotes: A flag indicating whether values containing quotes should always http://git-wip-us.apache.org/repos/asf/spark/blob/cb5d933d/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index b248583..bb5fa2b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -370,7 +370,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding * type.</li> * <li>`quote` (default `"`): sets the single character used for escaping quoted values where - * the separator can be part of the value.</li> + * the separator can be part of the value. If you would like to turn off quotations, you need to + * set not `null` but an empty string. This behaviour is different form + * `com.databricks.spark.csv`.</li> * <li>`escape` (default `\`): sets the single character used for escaping quotes inside * an already quoted value.</li> * <li>`comment` (default empty string): sets the single character used for skipping lines http://git-wip-us.apache.org/repos/asf/spark/blob/cb5d933d/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index bc95446..f170065 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -655,4 +655,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(msg.contains("CSV data source does not support array<string> data type")) } } + + test("SPARK-15585 turn off quotations") { + val cars = spark.read + .format("csv") + .option("header", "true") + .option("quote", "") + .load(testFile(carsUnbalancedQuotesFile)) + + verifyCars(cars, withHeader = true, checkValues = false) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
