spark git commit: [SPARK-13947][SQL] The error message from using an invalid column reference is not clear

lixiao Tue, 24 Oct 2017 23:02:35 -0700

Repository: spark
Updated Branches:
  refs/heads/master 524abb996 -> 427359f07



[SPARK-13947][SQL] The error message from using an invalid column reference is 
not clear

## What changes were proposed in this pull request?

 Rewritten error message for clarity. Added extra information in case of 
attribute name collision, hinting the user to double-check referencing two 
different tables

## How was this patch tested?

No functional changes, only final message has changed. It has been tested 
manually against the situation proposed in the JIRA ticket. Automated tests in 
repository pass.

This PR is original work from me and I license this work to the Spark project

Author: Ruben Berenguel Montoro <[email protected]>
Author: Ruben Berenguel Montoro <[email protected]>
Author: Ruben Berenguel <[email protected]>

Closes #17100 from rberenguel/SPARK-13947-error-message.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/427359f0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/427359f0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/427359f0

Branch: refs/heads/master
Commit: 427359f077ad469d78c97972d021535f30a1e418
Parents: 524abb9
Author: Ruben Berenguel Montoro <[email protected]>
Authored: Tue Oct 24 23:02:11 2017 -0700
Committer: gatorsmile <[email protected]>
Committed: Tue Oct 24 23:02:11 2017 -0700

----------------------------------------------------------------------
 .../sql/catalyst/analysis/CheckAnalysis.scala   | 19 +++++++++++++---
 .../catalyst/analysis/AnalysisErrorSuite.scala  | 23 ++++++++++++++------
 .../negative-cases/invalid-correlation.sql.out  |  2 +-
 3 files changed, 33 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/427359f0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index d9906bb..b5e8bdd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -272,10 +272,23 @@ trait CheckAnalysis extends PredicateHelper {
           case o if o.children.nonEmpty && o.missingInput.nonEmpty =>
             val missingAttributes = o.missingInput.mkString(",")
             val input = o.inputSet.mkString(",")
+            val msgForMissingAttributes = s"Resolved attribute(s) 
$missingAttributes missing " +
+              s"from $input in operator ${operator.simpleString}."
 
-            failAnalysis(
-              s"resolved attribute(s) $missingAttributes missing from $input " 
+
-                s"in operator ${operator.simpleString}")
+            val resolver = plan.conf.resolver
+            val attrsWithSameName = o.missingInput.filter { missing =>
+              o.inputSet.exists(input => resolver(missing.name, input.name))
+            }
+
+            val msg = if (attrsWithSameName.nonEmpty) {
+              val sameNames = attrsWithSameName.map(_.name).mkString(",")
+              s"$msgForMissingAttributes Attribute(s) with the same name 
appear in the " +
+                s"operation: $sameNames. Please check if the right 
attribute(s) are used."
+            } else {
+              msgForMissingAttributes
+            }
+
+            failAnalysis(msg)
 
           case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
             failAnalysis(

http://git-wip-us.apache.org/repos/asf/spark/blob/427359f0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 884e113..5d2f8e7 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -408,16 +408,25 @@ class AnalysisErrorSuite extends AnalysisTest {
     // CheckAnalysis should throw AnalysisException when Aggregate contains 
missing attribute(s)
     // Since we manually construct the logical plan at here and Sum only accept
     // LongType, DoubleType, and DecimalType. We use LongType as the type of a.
-    val plan =
-      Aggregate(
-        Nil,
-        Alias(sum(AttributeReference("a", LongType)(exprId = ExprId(1))), 
"b")() :: Nil,
-        LocalRelation(
-          AttributeReference("a", LongType)(exprId = ExprId(2))))
+    val attrA = AttributeReference("a", LongType)(exprId = ExprId(1))
+    val otherA = AttributeReference("a", LongType)(exprId = ExprId(2))
+    val attrC = AttributeReference("c", LongType)(exprId = ExprId(3))
+    val aliases = Alias(sum(attrA), "b")() :: Alias(sum(attrC), "d")() :: Nil
+    val plan = Aggregate(
+      Nil,
+      aliases,
+      LocalRelation(otherA))
 
     assert(plan.resolved)
 
-    assertAnalysisError(plan, "resolved attribute(s) a#1L missing from a#2L" 
:: Nil)
+    val resolved = s"${attrA.toString},${attrC.toString}"
+
+    val errorMsg = s"Resolved attribute(s) $resolved missing from 
${otherA.toString} " +
+                     s"in operator !Aggregate [${aliases.mkString(", ")}]. " +
+                     s"Attribute(s) with the same name appear in the 
operation: a. " +
+                     "Please check if the right attribute(s) are used."
+
+    assertAnalysisError(plan, errorMsg :: Nil)
   }
 
   test("error test for self-join") {

http://git-wip-us.apache.org/repos/asf/spark/blob/427359f0/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index e4b1a2d..2586f26 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -63,7 +63,7 @@ WHERE  t1a IN (SELECT   min(t2a)
 struct<>
 -- !query 4 output
 org.apache.spark.sql.AnalysisException
-resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter 
t2c#x IN (list#x [t2b#x]);
+Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter 
t2c#x IN (list#x [t2b#x]).;
 
 
 -- !query 5


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-13947][SQL] The error message from using an invalid column reference is not clear

Reply via email to