Repository: spark Updated Branches: refs/heads/master f327e1686 -> 81455a9cd
[SPARK-17703][SQL] Add unnamed version of addReferenceObj for minor objects. ## What changes were proposed in this pull request? There are many minor objects in references, which are extracted to the generated class field, e.g. `errMsg` in `GetExternalRowField` or `ValidateExternalType`, but number of fields in class is limited so we should reduce the number. This pr adds unnamed version of `addReferenceObj` for these minor objects not to store the object into field but refer it from the `references` field at the time of use. ## How was this patch tested? Existing tests. Author: Takuya UESHIN <[email protected]> Closes #15276 from ueshin/issues/SPARK-17703. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/81455a9c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/81455a9c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/81455a9c Branch: refs/heads/master Commit: 81455a9cd963098613bad10182e3fafc83a6e352 Parents: f327e16 Author: Takuya UESHIN <[email protected]> Authored: Fri Sep 30 17:31:59 2016 -0700 Committer: Reynold Xin <[email protected]> Committed: Fri Sep 30 17:31:59 2016 -0700 ---------------------------------------------------------------------- .../catalyst/expressions/codegen/CodeGenerator.scala | 15 +++++++++++++++ .../apache/spark/sql/catalyst/expressions/misc.scala | 5 ++++- .../sql/catalyst/expressions/objects/objects.scala | 12 +++++++++--- 3 files changed, 28 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/81455a9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 33b9b80..cb808e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -85,6 +85,21 @@ class CodegenContext { val references: mutable.ArrayBuffer[Any] = new mutable.ArrayBuffer[Any]() /** + * Add an object to `references`. + * + * Returns the code to access it. + * + * This is for minor objects not to store the object into field but refer it from the references + * field at the time of use because number of fields in class is limited so we should reduce it. + */ + def addReferenceObj(obj: Any): String = { + val idx = references.length + references += obj + val clsName = obj.getClass.getName + s"(($clsName) references[$idx])" + } + + /** * Add an object to `references`, create a class member to access it. * * Returns the name of class member. http://git-wip-us.apache.org/repos/asf/spark/blob/81455a9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 92f8fb8..dbb52a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -517,7 +517,10 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) - val errMsgField = ctx.addReferenceObj("errMsg", errMsg) + + // Use unnamed reference that doesn't create a local field here to reduce the number of fields + // because errMsgField is used only when the value is null or false. + val errMsgField = ctx.addReferenceObj(errMsg) ExprCode(code = s"""${eval.code} |if (${eval.isNull} || !${eval.value}) { | throw new RuntimeException($errMsgField); http://git-wip-us.apache.org/repos/asf/spark/blob/81455a9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index faf8fec..50e2ac3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -906,7 +906,9 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String]) override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val childGen = child.genCode(ctx) - val errMsgField = ctx.addReferenceObj("errMsg", errMsg) + // Use unnamed reference that doesn't create a local field here to reduce the number of fields + // because errMsgField is used only when the value is null. + val errMsgField = ctx.addReferenceObj(errMsg) val code = s""" ${childGen.code} @@ -941,7 +943,9 @@ case class GetExternalRowField( private val errMsg = s"The ${index}th field '$fieldName' of input row cannot be null." override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val errMsgField = ctx.addReferenceObj("errMsg", errMsg) + // Use unnamed reference that doesn't create a local field here to reduce the number of fields + // because errMsgField is used only when the field is null. + val errMsgField = ctx.addReferenceObj(errMsg) val row = child.genCode(ctx) val code = s""" ${row.code} @@ -979,7 +983,9 @@ case class ValidateExternalType(child: Expression, expected: DataType) private val errMsg = s" is not a valid external type for schema of ${expected.simpleString}" override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val errMsgField = ctx.addReferenceObj("errMsg", errMsg) + // Use unnamed reference that doesn't create a local field here to reduce the number of fields + // because errMsgField is used only when the type doesn't match. + val errMsgField = ctx.addReferenceObj(errMsg) val input = child.genCode(ctx) val obj = input.value --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
