This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 980e6bc [SPARK-26246][SQL][FOLLOWUP] Inferring TimestampType from JSON 980e6bc is described below commit 980e6bcd1c016139c6918d788fb4806a60740fcf Author: Maxim Gekk <maxim.g...@databricks.com> AuthorDate: Sat Jan 5 21:50:27 2019 +0800 [SPARK-26246][SQL][FOLLOWUP] Inferring TimestampType from JSON ## What changes were proposed in this pull request? Added new JSON option `inferTimestamp` (`true` by default) to control inferring of `TimestampType` from string values. ## How was this patch tested? Add new UT to `JsonInferSchemaSuite`. Closes #23455 from MaxGekk/json-infer-time-followup. Authored-by: Maxim Gekk <maxim.g...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- docs/sql-migration-guide-upgrade.md | 3 +++ .../main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala | 6 ++++++ .../scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala | 3 ++- .../org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala | 6 ++++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index c4d2157..7e6a0c0 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -40,6 +40,9 @@ displayTitle: Spark SQL Upgrading Guide - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully. - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian [...] + + - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they matches to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring. + ## Upgrading From Spark SQL 2.3 to 2.4 - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index eaff3fa..1ec9d50 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -117,6 +117,12 @@ private[sql] class JSONOptions( */ val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false) + /** + * Enables inferring of TimestampType from strings matched to the timestamp pattern + * defined by the timestampFormat option. + */ + val inferTimestamp: Boolean = parameters.get("inferTimestamp").map(_.toBoolean).getOrElse(true) + /** Sets config options on a Jackson [[JsonFactory]]. */ def setJacksonOptions(factory: JsonFactory): Unit = { factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala index 3203e62..0bf3f03c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala @@ -128,7 +128,8 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable { } if (options.prefersDecimal && decimalTry.isDefined) { decimalTry.get - } else if ((allCatch opt timestampFormatter.parse(field)).isDefined) { + } else if (options.inferTimestamp && + (allCatch opt timestampFormatter.parse(field)).isDefined) { TimestampType } else { StringType diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala index 9307f9b..9a6f4f5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala @@ -99,4 +99,10 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper { } } } + + test("disable timestamp inferring") { + val json = """{"a": "2019-01-04T21:11:10.123Z"}""" + checkType(Map("inferTimestamp" -> "true"), json, TimestampType) + checkType(Map("inferTimestamp" -> "false"), json, StringType) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org