This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 216b53304613 [SPARK-50795][SQL] Display all DESCRIBE AS JSON dates in
ISO-8601 format and types as `dataType.simpleString`
216b53304613 is described below
commit 216b533046139405c673646379cf4d3b0710836e
Author: Amanda Liu <[email protected]>
AuthorDate: Tue Jan 14 21:19:20 2025 +0800
[SPARK-50795][SQL] Display all DESCRIBE AS JSON dates in ISO-8601 format
and types as `dataType.simpleString`
### What changes were proposed in this pull request?
The PR does two updates for consistency:
1. Display all `DESCRIBE AS JSON` dates in ISO-8601 format and add regex
tests in `DescribeTableSuite.scala` to ensure dates adhere to the format.
2. Display data type names as the `dataType.simpleString`
### Why are the changes needed?
Ensure uniform date and dataType format in `DESCRIBE AS JSON`
### Does this PR introduce _any_ user-facing change?
Yes, affects the date and dataType format produced by `DESCRIBE AS JSON`.
### How was this patch tested?
Added tests in `DescribeTableSuite.scala`
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #49455 from asl3/asl3/describeasjson-date.
Authored-by: Amanda Liu <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
docs/sql-ref-syntax-aux-describe-table.md | 4 +--
.../spark/sql/catalyst/catalog/interface.scala | 21 ++++++++----
.../spark/sql/execution/command/tables.scala | 2 +-
.../resources/sql-tests/results/describe.sql.out | 4 +--
.../execution/command/v1/DescribeTableSuite.scala | 39 +++++++++++++++-------
5 files changed, 47 insertions(+), 23 deletions(-)
diff --git a/docs/sql-ref-syntax-aux-describe-table.md
b/docs/sql-ref-syntax-aux-describe-table.md
index 6a14da1e4380..5f5fd27c865e 100644
--- a/docs/sql-ref-syntax-aux-describe-table.md
+++ b/docs/sql-ref-syntax-aux-describe-table.md
@@ -118,9 +118,9 @@ to return the metadata pertaining to a partition or column
respectively.
"num_buckets": <num_buckets>,
"bucket_columns": ["<col_name>"],
"sort_columns": ["<col_name>"],
- "created_time": "<timestamp_ISO-8601>",
+ "created_time": "<yyyy-MM-dd'T'HH:mm:ss'Z'>",
"created_by": "<created_by>",
- "last_access": "<timestamp_ISO-8601>",
+ "last_access": "<yyyy-MM-dd'T'HH:mm:ss'Z'>",
"partition_provider": "<partition_provider>"
}
```
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index de828dfd3e28..7836e533c8b5 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.catalog
import java.net.URI
import java.time.{ZoneId, ZoneOffset}
-import java.util.Date
import scala.collection.mutable
import scala.util.control.NonFatal
@@ -87,6 +86,14 @@ trait MetadataMapSupport {
}
map
}
+
+ val timestampFormatter = new Iso8601TimestampFormatter(
+ pattern = "yyyy-MM-dd'T'HH:mm:ss'Z'",
+ zoneId = ZoneId.of("UTC"),
+ locale = DateFormatter.defaultLocale,
+ legacyFormat = LegacyDateFormats.LENIENT_SIMPLE_DATE_FORMAT,
+ isParsing = true
+ )
}
@@ -184,10 +191,12 @@ case class CatalogTablePartition(
map += ("Partition Parameters" -> paramsJson)
}
- map += ("Created Time" -> JString(new Date(createTime).toString))
+ map += ("Created Time" -> JString(
+ timestampFormatter.format(DateTimeUtils.millisToMicros(createTime))))
val lastAccess = if (lastAccessTime <= 0) JString("UNKNOWN")
- else JString(new Date(lastAccessTime).toString)
+ else JString(
+ timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
map += ("Last Access" -> lastAccess)
stats.foreach(s => map += ("Partition Statistics" ->
JString(s.simpleString)))
@@ -595,8 +604,8 @@ case class CatalogTable(
else JNull
val lastAccess: JValue =
- if (lastAccessTime <= 0) JString("UNKNOWN") else JString(
-
DateTimeUtils.microsToInstant(DateTimeUtils.millisToMicros(lastAccessTime)).toString)
+ if (lastAccessTime <= 0) JString("UNKNOWN")
+ else
JString(timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
val viewQueryOutputColumns: JValue =
if (viewQueryColumnNames.nonEmpty)
JArray(viewQueryColumnNames.map(JString).toList)
@@ -609,7 +618,7 @@ case class CatalogTable(
map += "Table" -> JString(identifier.table)
if (Option(owner).exists(_.nonEmpty)) map += "Owner" -> JString(owner)
map += "Created Time" ->
-
JString(DateTimeUtils.microsToInstant(DateTimeUtils.millisToMicros(createTime)).toString)
+
JString(timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
if (lastAccess != JNull) map += "Last Access" -> lastAccess
map += "Created By" -> JString(s"Spark $createVersion")
map += "Type" -> JString(tableType.name)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index e69e05ba7dec..73aaed062794 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -907,7 +907,7 @@ case class DescribeTableJsonCommand(
)
case _ =>
- JObject("name" -> JString(dataType.typeName))
+ JObject("name" -> JString(dataType.simpleString))
}
}
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 015b0ceff335..870ad02e7141 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -76,7 +76,7 @@ DESCRIBE EXTENDED t AS JSON
-- !query schema
struct<json_metadata:string>
-- !query output
-{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"integer"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not
included in
comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","pa [...]
+{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not
included in
comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","passwo
[...]
-- !query
@@ -303,7 +303,7 @@ DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON
-- !query schema
struct<json_metadata:string>
-- !query output
-{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"integer"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not
included in
comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"****
[...]
+{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not
included in
comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"********
[...]
-- !query
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
index d5dd96f55c11..cae56754ba46 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -44,6 +44,8 @@ trait DescribeTableSuiteBase extends
command.DescribeTableSuiteBase
def getProvider(): String =
defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT)
+ val iso8601Regex = raw"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$$".r
+
test("Describing of a non-existent partition") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
@@ -339,13 +341,11 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
namespace = Some(List("ns")),
schema_name = Some("ns"),
columns = Some(List(
- TableColumn("employee_id", Type("integer"), true),
+ TableColumn("employee_id", Type("int"), true),
TableColumn("employee_name", Type("string"), true),
TableColumn("department", Type("string"), true),
TableColumn("hire_date", Type("date"), true)
)),
- owner = Some(""),
- created_time = Some(""),
last_access = Some("UNKNOWN"),
created_by = Some(s"Spark $SPARK_VERSION"),
`type` = Some("MANAGED"),
@@ -369,12 +369,15 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
)
if (getProvider() == "hive") {
- assert(expectedOutput == parsedOutput.copy(owner = Some(""),
- created_time = Some(""),
+ assert(expectedOutput == parsedOutput.copy(owner = None,
+ created_time = None,
location = Some("")))
} else {
assert(expectedOutput.copy(inputformat = None, outputformat = None,
serde_library = None)
- == parsedOutput.copy(owner = Some(""), created_time = Some(""),
location = Some("")))
+ == parsedOutput.copy(owner = None, created_time = None, location =
Some("")))
+ }
+ parsedOutput.created_time.foreach { createdTime =>
+ assert(iso8601Regex.matches(createdTime))
}
}
}
@@ -408,7 +411,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
namespace = Some(List("ns")),
schema_name = Some("ns"),
columns = Some(List(
- TableColumn("id", Type("integer"), true),
+ TableColumn("id", Type("int"), true),
TableColumn("name", Type("string"), true),
TableColumn("region", Type("string"), true),
TableColumn("category", Type("string"), true)
@@ -447,6 +450,9 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
== parsedOutput.copy(location = None, created_time = None, owner =
None,
storage_properties = filteredParsedStorageProperties))
}
+ parsedOutput.created_time.foreach { createdTime =>
+ assert(iso8601Regex.matches(createdTime))
+ }
}
}
@@ -475,7 +481,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
namespace = Some(List("ns")),
schema_name = Some("ns"),
columns = Some(List(
- TableColumn("id", Type("integer"), default = Some("1")),
+ TableColumn("id", Type("int"), default = Some("1")),
TableColumn("name", Type("string"), default = Some("'unknown'")),
TableColumn("created_at", Type("timestamp_ltz"), default =
Some("CURRENT_TIMESTAMP")),
TableColumn("is_active", Type("boolean"), default = Some("true"))
@@ -504,6 +510,9 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
parsedOutput.copy(location = None, created_time = None, owner =
None)
)
}
+ parsedOutput.created_time.foreach { createdTime =>
+ assert(iso8601Regex.matches(createdTime))
+ }
}
}
@@ -528,7 +537,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
val expectedOutput = DescribeTableJson(
columns = Some(List(
- TableColumn("id", Type("integer")),
+ TableColumn("id", Type("int")),
TableColumn("name", Type("string")),
TableColumn("created_at", Type("timestamp_ltz"))
))
@@ -564,7 +573,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
namespace = Some(List("default")),
schema_name = Some("default"),
columns = Some(List(
- TableColumn("id", Type("integer")),
+ TableColumn("id", Type("int")),
TableColumn("name", Type("string")),
TableColumn("created_at", Type("timestamp_ltz"))
)),
@@ -590,6 +599,9 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
outputformat = None, serde_library = None, storage_properties =
None)
== parsedOutput.copy(table_properties = None, created_time = None,
owner = None))
}
+ parsedOutput.created_time.foreach { createdTime =>
+ assert(iso8601Regex.matches(createdTime))
+ }
}
}
}
@@ -673,7 +685,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
),
Field(
name = "age",
- `type` = Type("integer")
+ `type` = Type("int")
),
Field(
name = "contact",
@@ -709,7 +721,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
),
Field(
name = "zip",
- `type` = Type("integer")
+ `type` = Type("int")
)
))
)),
@@ -770,6 +782,9 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
assert(expectedOutput.copy(inputformat = None, outputformat = None,
serde_library = None)
== parsedOutput.copy(location = None, created_time = None, owner =
None))
}
+ parsedOutput.created_time.foreach { createdTime =>
+ assert(iso8601Regex.matches(createdTime))
+ }
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]