Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2a6850c70 -> f441b9ac6


[SPARK-17703][SQL][BACKPORT-2.0] Add unnamed version of addReferenceObj for 
minor objects.

## What changes were proposed in this pull request?

This is a backport of #15276.

There are many minor objects in references, which are extracted to the 
generated class field, e.g. `errMsg` in `GetExternalRowField` or 
`ValidateExternalType`, but number of fields in class is limited so we should 
reduce the number.
This pr adds unnamed version of `addReferenceObj` for these minor objects not 
to store the object into field but refer it from the `references` field at the 
time of use.
## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <[email protected]>

Closes #15808 from ueshin/issues/SPARK-17703_2.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f441b9ac
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f441b9ac
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f441b9ac

Branch: refs/heads/branch-2.0
Commit: f441b9ac6595e7a2b0eb4420304d46db85fbe8e7
Parents: 2a6850c
Author: Takuya UESHIN <[email protected]>
Authored: Tue Nov 8 10:23:50 2016 -0800
Committer: Reynold Xin <[email protected]>
Committed: Tue Nov 8 10:23:50 2016 -0800

----------------------------------------------------------------------
 .../catalyst/expressions/codegen/CodeGenerator.scala | 15 +++++++++++++++
 .../apache/spark/sql/catalyst/expressions/misc.scala |  5 ++++-
 .../sql/catalyst/expressions/objects/objects.scala   | 13 ++++++++++---
 3 files changed, 29 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f441b9ac/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 929f2da..751d1c5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -85,6 +85,21 @@ class CodegenContext {
   val references: mutable.ArrayBuffer[Any] = new mutable.ArrayBuffer[Any]()
 
   /**
+   * Add an object to `references`.
+   *
+   * Returns the code to access it.
+   *
+   * This is for minor objects not to store the object into field but refer it 
from the references
+   * field at the time of use because number of fields in class is limited so 
we should reduce it.
+   */
+  def addReferenceObj(obj: Any): String = {
+    val idx = references.length
+    references += obj
+    val clsName = obj.getClass.getName
+    s"(($clsName) references[$idx])"
+  }
+
+  /**
    * Add an object to `references`, create a class member to access it.
    *
    * Returns the name of class member.

http://git-wip-us.apache.org/repos/asf/spark/blob/f441b9ac/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 5c4436f..f7ff8a6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -517,7 +517,10 @@ case class AssertTrue(child: Expression) extends 
UnaryExpression with ImplicitCa
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = child.genCode(ctx)
-    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+
+    // Use unnamed reference that doesn't create a local field here to reduce 
the number of fields
+    // because errMsgField is used only when the value is null or false.
+    val errMsgField = ctx.addReferenceObj(errMsg)
     ExprCode(code = s"""${eval.code}
        |if (${eval.isNull} || !${eval.value}) {
        |  throw new RuntimeException($errMsgField);

http://git-wip-us.apache.org/repos/asf/spark/blob/f441b9ac/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 691edd5..d9c29b3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -731,7 +731,10 @@ case class AssertNotNull(child: Expression, 
walkedTypePath: Seq[String])
       "If the schema is inferred from a Scala tuple/case class, or a Java 
bean, " +
       "please try to use scala.Option[_] or other nullable types " +
       "(e.g. java.lang.Integer instead of int/scala.Int)."
-    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+
+    // Use unnamed reference that doesn't create a local field here to reduce 
the number of fields
+    // because errMsgField is used only when the value is null.
+    val errMsgField = ctx.addReferenceObj(errMsg)
 
     val code = s"""
       ${childGen.code}
@@ -766,7 +769,9 @@ case class GetExternalRowField(
   private val errMsg = s"The ${index}th field '$fieldName' of input row cannot 
be null."
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+    // Use unnamed reference that doesn't create a local field here to reduce 
the number of fields
+    // because errMsgField is used only when the field is null.
+    val errMsgField = ctx.addReferenceObj(errMsg)
     val row = child.genCode(ctx)
     val code = s"""
       ${row.code}
@@ -804,7 +809,9 @@ case class ValidateExternalType(child: Expression, 
expected: DataType)
   private val errMsg = s" is not a valid external type for schema of 
${expected.simpleString}"
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+    // Use unnamed reference that doesn't create a local field here to reduce 
the number of fields
+    // because errMsgField is used only when the type doesn't match.
+    val errMsgField = ctx.addReferenceObj(errMsg)
     val input = child.genCode(ctx)
     val obj = input.value
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to