Repository: spark Updated Branches: refs/heads/branch-2.0 73beb9fb3 -> d5e60748b
[SPARK-15655][SQL] Fix Wrong Partition Column Order when Fetching Partitioned Tables #### What changes were proposed in this pull request? When fetching the partitioned table, the output contains wrong results. The order of partition key values do not match the order of partition key columns in output schema. For example, ```SQL CREATE TABLE table_with_partition(c1 string) PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) INSERT OVERWRITE TABLE table_with_partition PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') SELECT 'blarr' SELECT p1, p2, p3, p4, p5, c1 FROM table_with_partition ``` ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | d| e| c| b| a|blarr| +---+---+---+---+---+-----+ ``` The expected result should be ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | a| b| c| d| e|blarr| +---+---+---+---+---+-----+ ``` This PR is to fix this by enforcing the order matches the table partition definition. #### How was this patch tested? Added a test case into `SQLQuerySuite` Author: gatorsmile <[email protected]> Closes #13400 from gatorsmile/partitionedTableFetch. (cherry picked from commit bc02d011294fcd1ab07b9baf1011c3f2bdf749d9) Signed-off-by: Wenchen Fan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d5e60748 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d5e60748 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d5e60748 Branch: refs/heads/branch-2.0 Commit: d5e60748b2a871ca9a23957f4b62b4c07010c409 Parents: 73beb9f Author: gatorsmile <[email protected]> Authored: Tue Jun 14 09:58:06 2016 -0700 Committer: Wenchen Fan <[email protected]> Committed: Tue Jun 14 09:58:15 2016 -0700 ---------------------------------------------------------------------- .../spark/sql/hive/MetastoreRelation.scala | 2 +- .../sql/hive/execution/SQLQuerySuite.scala | 32 ++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/d5e60748/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala index 9c82014..5596a44 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala @@ -160,7 +160,7 @@ private[hive] case class MetastoreRelation( val tPartition = new org.apache.hadoop.hive.metastore.api.Partition tPartition.setDbName(databaseName) tPartition.setTableName(tableName) - tPartition.setValues(p.spec.values.toList.asJava) + tPartition.setValues(partitionKeys.map(a => p.spec(a.name)).asJava) val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor() tPartition.setSd(sd) http://git-wip-us.apache.org/repos/asf/spark/blob/d5e60748/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 1a0eaa6..9c1f218 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1610,6 +1610,38 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { assert(fs.exists(path), "This is an external table, so the data should not have been dropped") } + test("select partitioned table") { + val table = "table_with_partition" + withTable(table) { + sql( + s""" + |CREATE TABLE $table(c1 string) + |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) + """.stripMargin) + sql( + s""" + |INSERT OVERWRITE TABLE $table + |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') + |SELECT 'blarr' + """.stripMargin) + + // project list is the same order of paritioning columns in table definition + checkAnswer( + sql(s"SELECT p1, p2, p3, p4, p5, c1 FROM $table"), + Row("a", "b", "c", "d", "e", "blarr") :: Nil) + + // project list does not have the same order of paritioning columns in table definition + checkAnswer( + sql(s"SELECT p2, p3, p4, p1, p5, c1 FROM $table"), + Row("b", "c", "d", "a", "e", "blarr") :: Nil) + + // project list contains partial partition columns in table definition + checkAnswer( + sql(s"SELECT p2, p1, p5, c1 FROM $table"), + Row("b", "a", "e", "blarr") :: Nil) + } + } + test("SPARK-14981: DESC not supported for sorting columns") { withTable("t") { val cause = intercept[ParseException] { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
