Repository: spark
Updated Branches:
refs/heads/master bf7e81a51 -> d7a37bcaf
[SPARK-7345][SQL] Spark cannot detect renamed columns using JDBC connector
Issue appears when one tries to create DataFrame using
sqlContext.load("jdbc"...) statement when "dbtable" contains query with renamed
columns.
If original column is used in SQL query once the resulting DataFrame will
contain non-renamed column.
If original column is used in SQL query several times with different aliases,
sqlContext.load will fail.
Original implementation of JDBCRDD.resolveTable uses getColumnName to detect
column names in RDD schema.
Suggested implementation uses getColumnLabel to handle column renames in SQL
statement which is aware of SQL "AS" statement.
Readings:
http://stackoverflow.com/questions/4271152/getcolumnlabel-vs-getcolumnname
http://stackoverflow.com/questions/12259829/jdbc-getcolumnname-getcolumnlabel-db2
Official documentation unfortunately a bit misleading in definition of
"suggested title" purpose however clearly defines behavior of AS keyword in SQL
statement.
http://docs.oracle.com/javase/7/docs/api/java/sql/ResultSetMetaData.html
getColumnLabel - Gets the designated column's suggested title for use in
printouts and displays. The suggested title is usually specified by the SQL AS
clause. If a SQL AS is not specified, the value returned from getColumnLabel
will be the same as the value returned by the getColumnName method.
Author: Oleg Sidorkin <[email protected]>
Closes #6032 from osidorkin/master and squashes the following commits:
10fc44b [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite
(resolved scala style test error)
2aaf6f7 [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite
(renamed fields in JDBC query)
b7d5b22 [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite
09559a0 [Oleg Sidorkin] [SPARK-7345][SQL] Spark cannot detect renamed columns
using JDBC connector
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d7a37bca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d7a37bca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d7a37bca
Branch: refs/heads/master
Commit: d7a37bcaf123389fb0828eefb92659c6d9cb3460
Parents: bf7e81a
Author: Oleg Sidorkin <[email protected]>
Authored: Sun May 10 01:31:34 2015 -0700
Committer: Reynold Xin <[email protected]>
Committed: Sun May 10 01:31:34 2015 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/jdbc/JDBCRDD.scala | 2 +-
.../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 16 ++++++++++++++++
2 files changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d7a37bca/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 1a5083d..a03ade3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -109,7 +109,7 @@ private[sql] object JDBCRDD extends Logging {
val fields = new Array[StructField](ncols)
var i = 0
while (i < ncols) {
- val columnName = rsmd.getColumnName(i + 1)
+ val columnName = rsmd.getColumnLabel(i + 1)
val dataType = rsmd.getColumnType(i + 1)
val typeName = rsmd.getColumnTypeName(i + 1)
val fieldSize = rsmd.getPrecision(i + 1)
http://git-wip-us.apache.org/repos/asf/spark/blob/d7a37bca/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 021affa..2abfe7f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -204,6 +204,22 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
assert(ids(2) === 3)
}
+ test("Register JDBC query with renamed fields") {
+ // Regression test for bug SPARK-7345
+ sql(
+ s"""
+ |CREATE TEMPORARY TABLE renamed
+ |USING org.apache.spark.sql.jdbc
+ |OPTIONS (url '$url', dbtable '(select NAME as NAME1, NAME as NAME2
from TEST.PEOPLE)',
+ |user 'testUser', password 'testPass')
+ """.stripMargin.replaceAll("\n", " "))
+
+ val df = sql("SELECT * FROM renamed")
+ assert(df.schema.fields.size == 2)
+ assert(df.schema.fields(0).name == "NAME1")
+ assert(df.schema.fields(1).name == "NAME2")
+ }
+
test("Basic API") {
assert(TestSQLContext.jdbc(urlWithUserAndPass,
"TEST.PEOPLE").collect().size === 3)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]