http://git-wip-us.apache.org/repos/asf/spark/blob/d7d0cad0/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala deleted file mode 100644 index 944962b..0000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution - -import org.apache.spark.{broadcast, TaskContext} -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.catalyst.plans.physical.Partitioning -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.catalyst.util.toCommentSafeString -import org.apache.spark.sql.execution.aggregate.TungstenAggregate -import org.apache.spark.sql.execution.joins.{BroadcastHashJoin, SortMergeJoin} -import org.apache.spark.sql.execution.metric.{LongSQLMetricValue, SQLMetrics} -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types._ - -/** - * An interface for those physical operators that support codegen. - */ -trait CodegenSupport extends SparkPlan { - - /** Prefix used in the current operator's variable names. */ - private def variablePrefix: String = this match { - case _: TungstenAggregate => "agg" - case _: BroadcastHashJoin => "bhj" - case _: SortMergeJoin => "smj" - case _: PhysicalRDD => "rdd" - case _: DataSourceScan => "scan" - case _ => nodeName.toLowerCase - } - - /** - * Creates a metric using the specified name. - * - * @return name of the variable representing the metric - */ - def metricTerm(ctx: CodegenContext, name: String): String = { - val metric = ctx.addReferenceObj(name, longMetric(name)) - val value = ctx.freshName("metricValue") - val cls = classOf[LongSQLMetricValue].getName - ctx.addMutableState(cls, value, s"$value = ($cls) $metric.localValue();") - value - } - - /** - * Whether this SparkPlan support whole stage codegen or not. - */ - def supportCodegen: Boolean = true - - /** - * Which SparkPlan is calling produce() of this one. It's itself for the first SparkPlan. - */ - protected var parent: CodegenSupport = null - - /** - * Returns all the RDDs of InternalRow which generates the input rows. - * - * Note: right now we support up to two RDDs. - */ - def inputRDDs(): Seq[RDD[InternalRow]] - - /** - * Returns Java source code to process the rows from input RDD. - */ - final def produce(ctx: CodegenContext, parent: CodegenSupport): String = executeQuery { - this.parent = parent - ctx.freshNamePrefix = variablePrefix - s""" - |/*** PRODUCE: ${toCommentSafeString(this.simpleString)} */ - |${doProduce(ctx)} - """.stripMargin - } - - /** - * Generate the Java source code to process, should be overridden by subclass to support codegen. - * - * doProduce() usually generate the framework, for example, aggregation could generate this: - * - * if (!initialized) { - * # create a hash map, then build the aggregation hash map - * # call child.produce() - * initialized = true; - * } - * while (hashmap.hasNext()) { - * row = hashmap.next(); - * # build the aggregation results - * # create variables for results - * # call consume(), which will call parent.doConsume() - * if (shouldStop()) return; - * } - */ - protected def doProduce(ctx: CodegenContext): String - - /** - * Consume the generated columns or row from current SparkPlan, call it's parent's doConsume(). - */ - final def consume(ctx: CodegenContext, outputVars: Seq[ExprCode], row: String = null): String = { - val inputVars = - if (row != null) { - ctx.currentVars = null - ctx.INPUT_ROW = row - output.zipWithIndex.map { case (attr, i) => - BoundReference(i, attr.dataType, attr.nullable).genCode(ctx) - } - } else { - assert(outputVars != null) - assert(outputVars.length == output.length) - // outputVars will be used to generate the code for UnsafeRow, so we should copy them - outputVars.map(_.copy()) - } - - val rowVar = if (row != null) { - ExprCode("", "false", row) - } else { - if (outputVars.nonEmpty) { - val colExprs = output.zipWithIndex.map { case (attr, i) => - BoundReference(i, attr.dataType, attr.nullable) - } - val evaluateInputs = evaluateVariables(outputVars) - // generate the code to create a UnsafeRow - ctx.currentVars = outputVars - val ev = GenerateUnsafeProjection.createCode(ctx, colExprs, false) - val code = s""" - |$evaluateInputs - |${ev.code.trim} - """.stripMargin.trim - ExprCode(code, "false", ev.value) - } else { - // There is no columns - ExprCode("", "false", "unsafeRow") - } - } - - ctx.freshNamePrefix = parent.variablePrefix - val evaluated = evaluateRequiredVariables(output, inputVars, parent.usedInputs) - s""" - | - |/*** CONSUME: ${toCommentSafeString(parent.simpleString)} */ - |$evaluated - |${parent.doConsume(ctx, inputVars, rowVar)} - """.stripMargin - } - - /** - * Returns source code to evaluate all the variables, and clear the code of them, to prevent - * them to be evaluated twice. - */ - protected def evaluateVariables(variables: Seq[ExprCode]): String = { - val evaluate = variables.filter(_.code != "").map(_.code.trim).mkString("\n") - variables.foreach(_.code = "") - evaluate - } - - /** - * Returns source code to evaluate the variables for required attributes, and clear the code - * of evaluated variables, to prevent them to be evaluated twice. - */ - protected def evaluateRequiredVariables( - attributes: Seq[Attribute], - variables: Seq[ExprCode], - required: AttributeSet): String = { - val evaluateVars = new StringBuilder - variables.zipWithIndex.foreach { case (ev, i) => - if (ev.code != "" && required.contains(attributes(i))) { - evaluateVars.append(ev.code.trim + "\n") - ev.code = "" - } - } - evaluateVars.toString() - } - - /** - * The subset of inputSet those should be evaluated before this plan. - * - * We will use this to insert some code to access those columns that are actually used by current - * plan before calling doConsume(). - */ - def usedInputs: AttributeSet = references - - /** - * Generate the Java source code to process the rows from child SparkPlan. - * - * This should be override by subclass to support codegen. - * - * For example, Filter will generate the code like this: - * - * # code to evaluate the predicate expression, result is isNull1 and value2 - * if (isNull1 || !value2) continue; - * # call consume(), which will call parent.doConsume() - * - * Note: A plan can either consume the rows as UnsafeRow (row), or a list of variables (input). - */ - def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { - throw new UnsupportedOperationException - } -} - - -/** - * InputAdapter is used to hide a SparkPlan from a subtree that support codegen. - * - * This is the leaf node of a tree with WholeStageCodegen, is used to generate code that consumes - * an RDD iterator of InternalRow. - */ -case class InputAdapter(child: SparkPlan) extends UnaryNode with CodegenSupport { - - override def output: Seq[Attribute] = child.output - override def outputPartitioning: Partitioning = child.outputPartitioning - override def outputOrdering: Seq[SortOrder] = child.outputOrdering - - override def doExecute(): RDD[InternalRow] = { - child.execute() - } - - override def doExecuteBroadcast[T](): broadcast.Broadcast[T] = { - child.doExecuteBroadcast() - } - - override def inputRDDs(): Seq[RDD[InternalRow]] = { - child.execute() :: Nil - } - - override def doProduce(ctx: CodegenContext): String = { - val input = ctx.freshName("input") - // Right now, InputAdapter is only used when there is one input RDD. - ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];") - val row = ctx.freshName("row") - s""" - | while ($input.hasNext()) { - | InternalRow $row = (InternalRow) $input.next(); - | ${consume(ctx, null, row).trim} - | if (shouldStop()) return; - | } - """.stripMargin - } - - override def simpleString: String = "INPUT" - - override def treeChildren: Seq[SparkPlan] = Nil -} - -object WholeStageCodegen { - val PIPELINE_DURATION_METRIC = "duration" -} - -/** - * WholeStageCodegen compile a subtree of plans that support codegen together into single Java - * function. - * - * Here is the call graph of to generate Java source (plan A support codegen, but plan B does not): - * - * WholeStageCodegen Plan A FakeInput Plan B - * ========================================================================= - * - * -> execute() - * | - * doExecute() ---------> inputRDDs() -------> inputRDDs() ------> execute() - * | - * +-----------------> produce() - * | - * doProduce() -------> produce() - * | - * doProduce() - * | - * doConsume() <--------- consume() - * | - * doConsume() <-------- consume() - * - * SparkPlan A should override doProduce() and doConsume(). - * - * doCodeGen() will create a CodeGenContext, which will hold a list of variables for input, - * used to generated code for BoundReference. - */ -case class WholeStageCodegen(child: SparkPlan) extends UnaryNode with CodegenSupport { - - override def output: Seq[Attribute] = child.output - override def outputPartitioning: Partitioning = child.outputPartitioning - override def outputOrdering: Seq[SortOrder] = child.outputOrdering - - override private[sql] lazy val metrics = Map( - "pipelineTime" -> SQLMetrics.createTimingMetric(sparkContext, - WholeStageCodegen.PIPELINE_DURATION_METRIC)) - - /** - * Generates code for this subtree. - * - * @return the tuple of the codegen context and the actual generated source. - */ - def doCodeGen(): (CodegenContext, String) = { - val ctx = new CodegenContext - val code = child.asInstanceOf[CodegenSupport].produce(ctx, this) - val source = s""" - public Object generate(Object[] references) { - return new GeneratedIterator(references); - } - - /** Codegened pipeline for: - * ${toCommentSafeString(child.treeString.trim)} - */ - final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator { - - private Object[] references; - ${ctx.declareMutableStates()} - - public GeneratedIterator(Object[] references) { - this.references = references; - } - - public void init(int index, scala.collection.Iterator inputs[]) { - partitionIndex = index; - ${ctx.initMutableStates()} - } - - ${ctx.declareAddedFunctions()} - - protected void processNext() throws java.io.IOException { - ${code.trim} - } - } - """.trim - - // try to compile, helpful for debug - val cleanedSource = CodeFormatter.stripExtraNewLines(source) - logDebug(s"\n${CodeFormatter.format(cleanedSource)}") - CodeGenerator.compile(cleanedSource) - (ctx, cleanedSource) - } - - override def doExecute(): RDD[InternalRow] = { - val (ctx, cleanedSource) = doCodeGen() - val references = ctx.references.toArray - - val durationMs = longMetric("pipelineTime") - - val rdds = child.asInstanceOf[CodegenSupport].inputRDDs() - assert(rdds.size <= 2, "Up to two input RDDs can be supported") - if (rdds.length == 1) { - rdds.head.mapPartitionsWithIndex { (index, iter) => - val clazz = CodeGenerator.compile(cleanedSource) - val buffer = clazz.generate(references).asInstanceOf[BufferedRowIterator] - buffer.init(index, Array(iter)) - new Iterator[InternalRow] { - override def hasNext: Boolean = { - val v = buffer.hasNext - if (!v) durationMs += buffer.durationMs() - v - } - override def next: InternalRow = buffer.next() - } - } - } else { - // Right now, we support up to two input RDDs. - rdds.head.zipPartitions(rdds(1)) { (leftIter, rightIter) => - val partitionIndex = TaskContext.getPartitionId() - val clazz = CodeGenerator.compile(cleanedSource) - val buffer = clazz.generate(references).asInstanceOf[BufferedRowIterator] - buffer.init(partitionIndex, Array(leftIter, rightIter)) - new Iterator[InternalRow] { - override def hasNext: Boolean = { - val v = buffer.hasNext - if (!v) durationMs += buffer.durationMs() - v - } - override def next: InternalRow = buffer.next() - } - } - } - } - - override def inputRDDs(): Seq[RDD[InternalRow]] = { - throw new UnsupportedOperationException - } - - override def doProduce(ctx: CodegenContext): String = { - throw new UnsupportedOperationException - } - - override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { - val doCopy = if (ctx.copyResult) { - ".copy()" - } else { - "" - } - s""" - |${row.code} - |append(${row.value}$doCopy); - """.stripMargin.trim - } - - override def innerChildren: Seq[SparkPlan] = { - child :: Nil - } - - private def collectInputs(plan: SparkPlan): Seq[SparkPlan] = plan match { - case InputAdapter(c) => c :: Nil - case other => other.children.flatMap(collectInputs) - } - - override def treeChildren: Seq[SparkPlan] = { - collectInputs(child) - } - - override def simpleString: String = "WholeStageCodegen" -} - - -/** - * Find the chained plans that support codegen, collapse them together as WholeStageCodegen. - */ -case class CollapseCodegenStages(conf: SQLConf) extends Rule[SparkPlan] { - - private def supportCodegen(e: Expression): Boolean = e match { - case e: LeafExpression => true - // CodegenFallback requires the input to be an InternalRow - case e: CodegenFallback => false - case _ => true - } - - private def numOfNestedFields(dataType: DataType): Int = dataType match { - case dt: StructType => dt.fields.map(f => numOfNestedFields(f.dataType)).sum - case m: MapType => numOfNestedFields(m.keyType) + numOfNestedFields(m.valueType) - case a: ArrayType => numOfNestedFields(a.elementType) - case u: UserDefinedType[_] => numOfNestedFields(u.sqlType) - case _ => 1 - } - - private def supportCodegen(plan: SparkPlan): Boolean = plan match { - case plan: CodegenSupport if plan.supportCodegen => - val willFallback = plan.expressions.exists(_.find(e => !supportCodegen(e)).isDefined) - // the generated code will be huge if there are too many columns - val hasTooManyOutputFields = - numOfNestedFields(plan.schema) > conf.wholeStageMaxNumFields - val hasTooManyInputFields = - plan.children.map(p => numOfNestedFields(p.schema)).exists(_ > conf.wholeStageMaxNumFields) - !willFallback && !hasTooManyOutputFields && !hasTooManyInputFields - case _ => false - } - - /** - * Inserts a InputAdapter on top of those that do not support codegen. - */ - private def insertInputAdapter(plan: SparkPlan): SparkPlan = plan match { - case j @ SortMergeJoin(_, _, _, _, left, right) if j.supportCodegen => - // The children of SortMergeJoin should do codegen separately. - j.copy(left = InputAdapter(insertWholeStageCodegen(left)), - right = InputAdapter(insertWholeStageCodegen(right))) - case p if !supportCodegen(p) => - // collapse them recursively - InputAdapter(insertWholeStageCodegen(p)) - case p => - p.withNewChildren(p.children.map(insertInputAdapter)) - } - - /** - * Inserts a WholeStageCodegen on top of those that support codegen. - */ - private def insertWholeStageCodegen(plan: SparkPlan): SparkPlan = plan match { - // For operators that will output domain object, do not insert WholeStageCodegen for it as - // domain object can not be written into unsafe row. - case plan if plan.output.length == 1 && plan.output.head.dataType.isInstanceOf[ObjectType] => - plan.withNewChildren(plan.children.map(insertWholeStageCodegen)) - case plan: CodegenSupport if supportCodegen(plan) => - WholeStageCodegen(insertInputAdapter(plan)) - case other => - other.withNewChildren(other.children.map(insertWholeStageCodegen)) - } - - def apply(plan: SparkPlan): SparkPlan = { - if (conf.wholeStageEnabled) { - insertWholeStageCodegen(plan) - } else { - plan - } - } -}
http://git-wip-us.apache.org/repos/asf/spark/blob/d7d0cad0/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala new file mode 100644 index 0000000..6a03bd0 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -0,0 +1,492 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.{broadcast, TaskContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.util.toCommentSafeString +import org.apache.spark.sql.execution.aggregate.TungstenAggregate +import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec} +import org.apache.spark.sql.execution.metric.{LongSQLMetricValue, SQLMetrics} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +/** + * An interface for those physical operators that support codegen. + */ +trait CodegenSupport extends SparkPlan { + + /** Prefix used in the current operator's variable names. */ + private def variablePrefix: String = this match { + case _: TungstenAggregate => "agg" + case _: BroadcastHashJoinExec => "bhj" + case _: SortMergeJoinExec => "smj" + case _: RDDScanExec => "rdd" + case _: DataSourceScanExec => "scan" + case _ => nodeName.toLowerCase + } + + /** + * Creates a metric using the specified name. + * + * @return name of the variable representing the metric + */ + def metricTerm(ctx: CodegenContext, name: String): String = { + val metric = ctx.addReferenceObj(name, longMetric(name)) + val value = ctx.freshName("metricValue") + val cls = classOf[LongSQLMetricValue].getName + ctx.addMutableState(cls, value, s"$value = ($cls) $metric.localValue();") + value + } + + /** + * Whether this SparkPlan support whole stage codegen or not. + */ + def supportCodegen: Boolean = true + + /** + * Which SparkPlan is calling produce() of this one. It's itself for the first SparkPlan. + */ + protected var parent: CodegenSupport = null + + /** + * Returns all the RDDs of InternalRow which generates the input rows. + * + * Note: right now we support up to two RDDs. + */ + def inputRDDs(): Seq[RDD[InternalRow]] + + /** + * Returns Java source code to process the rows from input RDD. + */ + final def produce(ctx: CodegenContext, parent: CodegenSupport): String = executeQuery { + this.parent = parent + ctx.freshNamePrefix = variablePrefix + s""" + |/*** PRODUCE: ${toCommentSafeString(this.simpleString)} */ + |${doProduce(ctx)} + """.stripMargin + } + + /** + * Generate the Java source code to process, should be overridden by subclass to support codegen. + * + * doProduce() usually generate the framework, for example, aggregation could generate this: + * + * if (!initialized) { + * # create a hash map, then build the aggregation hash map + * # call child.produce() + * initialized = true; + * } + * while (hashmap.hasNext()) { + * row = hashmap.next(); + * # build the aggregation results + * # create variables for results + * # call consume(), which will call parent.doConsume() + * if (shouldStop()) return; + * } + */ + protected def doProduce(ctx: CodegenContext): String + + /** + * Consume the generated columns or row from current SparkPlan, call it's parent's doConsume(). + */ + final def consume(ctx: CodegenContext, outputVars: Seq[ExprCode], row: String = null): String = { + val inputVars = + if (row != null) { + ctx.currentVars = null + ctx.INPUT_ROW = row + output.zipWithIndex.map { case (attr, i) => + BoundReference(i, attr.dataType, attr.nullable).genCode(ctx) + } + } else { + assert(outputVars != null) + assert(outputVars.length == output.length) + // outputVars will be used to generate the code for UnsafeRow, so we should copy them + outputVars.map(_.copy()) + } + + val rowVar = if (row != null) { + ExprCode("", "false", row) + } else { + if (outputVars.nonEmpty) { + val colExprs = output.zipWithIndex.map { case (attr, i) => + BoundReference(i, attr.dataType, attr.nullable) + } + val evaluateInputs = evaluateVariables(outputVars) + // generate the code to create a UnsafeRow + ctx.currentVars = outputVars + val ev = GenerateUnsafeProjection.createCode(ctx, colExprs, false) + val code = s""" + |$evaluateInputs + |${ev.code.trim} + """.stripMargin.trim + ExprCode(code, "false", ev.value) + } else { + // There is no columns + ExprCode("", "false", "unsafeRow") + } + } + + ctx.freshNamePrefix = parent.variablePrefix + val evaluated = evaluateRequiredVariables(output, inputVars, parent.usedInputs) + s""" + | + |/*** CONSUME: ${toCommentSafeString(parent.simpleString)} */ + |$evaluated + |${parent.doConsume(ctx, inputVars, rowVar)} + """.stripMargin + } + + /** + * Returns source code to evaluate all the variables, and clear the code of them, to prevent + * them to be evaluated twice. + */ + protected def evaluateVariables(variables: Seq[ExprCode]): String = { + val evaluate = variables.filter(_.code != "").map(_.code.trim).mkString("\n") + variables.foreach(_.code = "") + evaluate + } + + /** + * Returns source code to evaluate the variables for required attributes, and clear the code + * of evaluated variables, to prevent them to be evaluated twice. + */ + protected def evaluateRequiredVariables( + attributes: Seq[Attribute], + variables: Seq[ExprCode], + required: AttributeSet): String = { + val evaluateVars = new StringBuilder + variables.zipWithIndex.foreach { case (ev, i) => + if (ev.code != "" && required.contains(attributes(i))) { + evaluateVars.append(ev.code.trim + "\n") + ev.code = "" + } + } + evaluateVars.toString() + } + + /** + * The subset of inputSet those should be evaluated before this plan. + * + * We will use this to insert some code to access those columns that are actually used by current + * plan before calling doConsume(). + */ + def usedInputs: AttributeSet = references + + /** + * Generate the Java source code to process the rows from child SparkPlan. + * + * This should be override by subclass to support codegen. + * + * For example, Filter will generate the code like this: + * + * # code to evaluate the predicate expression, result is isNull1 and value2 + * if (isNull1 || !value2) continue; + * # call consume(), which will call parent.doConsume() + * + * Note: A plan can either consume the rows as UnsafeRow (row), or a list of variables (input). + */ + def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { + throw new UnsupportedOperationException + } +} + + +/** + * InputAdapter is used to hide a SparkPlan from a subtree that support codegen. + * + * This is the leaf node of a tree with WholeStageCodegen, is used to generate code that consumes + * an RDD iterator of InternalRow. + */ +case class InputAdapter(child: SparkPlan) extends UnaryExecNode with CodegenSupport { + + override def output: Seq[Attribute] = child.output + override def outputPartitioning: Partitioning = child.outputPartitioning + override def outputOrdering: Seq[SortOrder] = child.outputOrdering + + override def doExecute(): RDD[InternalRow] = { + child.execute() + } + + override def doExecuteBroadcast[T](): broadcast.Broadcast[T] = { + child.doExecuteBroadcast() + } + + override def inputRDDs(): Seq[RDD[InternalRow]] = { + child.execute() :: Nil + } + + override def doProduce(ctx: CodegenContext): String = { + val input = ctx.freshName("input") + // Right now, InputAdapter is only used when there is one input RDD. + ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];") + val row = ctx.freshName("row") + s""" + | while ($input.hasNext()) { + | InternalRow $row = (InternalRow) $input.next(); + | ${consume(ctx, null, row).trim} + | if (shouldStop()) return; + | } + """.stripMargin + } + + override def simpleString: String = "INPUT" + + override def treeChildren: Seq[SparkPlan] = Nil +} + +object WholeStageCodegenExec { + val PIPELINE_DURATION_METRIC = "duration" +} + +/** + * WholeStageCodegen compile a subtree of plans that support codegen together into single Java + * function. + * + * Here is the call graph of to generate Java source (plan A support codegen, but plan B does not): + * + * WholeStageCodegen Plan A FakeInput Plan B + * ========================================================================= + * + * -> execute() + * | + * doExecute() ---------> inputRDDs() -------> inputRDDs() ------> execute() + * | + * +-----------------> produce() + * | + * doProduce() -------> produce() + * | + * doProduce() + * | + * doConsume() <--------- consume() + * | + * doConsume() <-------- consume() + * + * SparkPlan A should override doProduce() and doConsume(). + * + * doCodeGen() will create a CodeGenContext, which will hold a list of variables for input, + * used to generated code for BoundReference. + */ +case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with CodegenSupport { + + override def output: Seq[Attribute] = child.output + override def outputPartitioning: Partitioning = child.outputPartitioning + override def outputOrdering: Seq[SortOrder] = child.outputOrdering + + override private[sql] lazy val metrics = Map( + "pipelineTime" -> SQLMetrics.createTimingMetric(sparkContext, + WholeStageCodegenExec.PIPELINE_DURATION_METRIC)) + + /** + * Generates code for this subtree. + * + * @return the tuple of the codegen context and the actual generated source. + */ + def doCodeGen(): (CodegenContext, String) = { + val ctx = new CodegenContext + val code = child.asInstanceOf[CodegenSupport].produce(ctx, this) + val source = s""" + public Object generate(Object[] references) { + return new GeneratedIterator(references); + } + + /** Codegened pipeline for: + * ${toCommentSafeString(child.treeString.trim)} + */ + final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator { + + private Object[] references; + ${ctx.declareMutableStates()} + + public GeneratedIterator(Object[] references) { + this.references = references; + } + + public void init(int index, scala.collection.Iterator inputs[]) { + partitionIndex = index; + ${ctx.initMutableStates()} + } + + ${ctx.declareAddedFunctions()} + + protected void processNext() throws java.io.IOException { + ${code.trim} + } + } + """.trim + + // try to compile, helpful for debug + val cleanedSource = CodeFormatter.stripExtraNewLines(source) + logDebug(s"\n${CodeFormatter.format(cleanedSource)}") + CodeGenerator.compile(cleanedSource) + (ctx, cleanedSource) + } + + override def doExecute(): RDD[InternalRow] = { + val (ctx, cleanedSource) = doCodeGen() + val references = ctx.references.toArray + + val durationMs = longMetric("pipelineTime") + + val rdds = child.asInstanceOf[CodegenSupport].inputRDDs() + assert(rdds.size <= 2, "Up to two input RDDs can be supported") + if (rdds.length == 1) { + rdds.head.mapPartitionsWithIndex { (index, iter) => + val clazz = CodeGenerator.compile(cleanedSource) + val buffer = clazz.generate(references).asInstanceOf[BufferedRowIterator] + buffer.init(index, Array(iter)) + new Iterator[InternalRow] { + override def hasNext: Boolean = { + val v = buffer.hasNext + if (!v) durationMs += buffer.durationMs() + v + } + override def next: InternalRow = buffer.next() + } + } + } else { + // Right now, we support up to two input RDDs. + rdds.head.zipPartitions(rdds(1)) { (leftIter, rightIter) => + val partitionIndex = TaskContext.getPartitionId() + val clazz = CodeGenerator.compile(cleanedSource) + val buffer = clazz.generate(references).asInstanceOf[BufferedRowIterator] + buffer.init(partitionIndex, Array(leftIter, rightIter)) + new Iterator[InternalRow] { + override def hasNext: Boolean = { + val v = buffer.hasNext + if (!v) durationMs += buffer.durationMs() + v + } + override def next: InternalRow = buffer.next() + } + } + } + } + + override def inputRDDs(): Seq[RDD[InternalRow]] = { + throw new UnsupportedOperationException + } + + override def doProduce(ctx: CodegenContext): String = { + throw new UnsupportedOperationException + } + + override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { + val doCopy = if (ctx.copyResult) { + ".copy()" + } else { + "" + } + s""" + |${row.code} + |append(${row.value}$doCopy); + """.stripMargin.trim + } + + override def innerChildren: Seq[SparkPlan] = { + child :: Nil + } + + private def collectInputs(plan: SparkPlan): Seq[SparkPlan] = plan match { + case InputAdapter(c) => c :: Nil + case other => other.children.flatMap(collectInputs) + } + + override def treeChildren: Seq[SparkPlan] = { + collectInputs(child) + } + + override def simpleString: String = "WholeStageCodegen" +} + + +/** + * Find the chained plans that support codegen, collapse them together as WholeStageCodegen. + */ +case class CollapseCodegenStages(conf: SQLConf) extends Rule[SparkPlan] { + + private def supportCodegen(e: Expression): Boolean = e match { + case e: LeafExpression => true + // CodegenFallback requires the input to be an InternalRow + case e: CodegenFallback => false + case _ => true + } + + private def numOfNestedFields(dataType: DataType): Int = dataType match { + case dt: StructType => dt.fields.map(f => numOfNestedFields(f.dataType)).sum + case m: MapType => numOfNestedFields(m.keyType) + numOfNestedFields(m.valueType) + case a: ArrayType => numOfNestedFields(a.elementType) + case u: UserDefinedType[_] => numOfNestedFields(u.sqlType) + case _ => 1 + } + + private def supportCodegen(plan: SparkPlan): Boolean = plan match { + case plan: CodegenSupport if plan.supportCodegen => + val willFallback = plan.expressions.exists(_.find(e => !supportCodegen(e)).isDefined) + // the generated code will be huge if there are too many columns + val hasTooManyOutputFields = + numOfNestedFields(plan.schema) > conf.wholeStageMaxNumFields + val hasTooManyInputFields = + plan.children.map(p => numOfNestedFields(p.schema)).exists(_ > conf.wholeStageMaxNumFields) + !willFallback && !hasTooManyOutputFields && !hasTooManyInputFields + case _ => false + } + + /** + * Inserts a InputAdapter on top of those that do not support codegen. + */ + private def insertInputAdapter(plan: SparkPlan): SparkPlan = plan match { + case j @ SortMergeJoinExec(_, _, _, _, left, right) if j.supportCodegen => + // The children of SortMergeJoin should do codegen separately. + j.copy(left = InputAdapter(insertWholeStageCodegen(left)), + right = InputAdapter(insertWholeStageCodegen(right))) + case p if !supportCodegen(p) => + // collapse them recursively + InputAdapter(insertWholeStageCodegen(p)) + case p => + p.withNewChildren(p.children.map(insertInputAdapter)) + } + + /** + * Inserts a WholeStageCodegen on top of those that support codegen. + */ + private def insertWholeStageCodegen(plan: SparkPlan): SparkPlan = plan match { + // For operators that will output domain object, do not insert WholeStageCodegen for it as + // domain object can not be written into unsafe row. + case plan if plan.output.length == 1 && plan.output.head.dataType.isInstanceOf[ObjectType] => + plan.withNewChildren(plan.children.map(insertWholeStageCodegen)) + case plan: CodegenSupport if supportCodegen(plan) => + WholeStageCodegenExec(insertInputAdapter(plan)) + case other => + other.withNewChildren(other.children.map(insertWholeStageCodegen)) + } + + def apply(plan: SparkPlan): SparkPlan = { + if (conf.wholeStageEnabled) { + insertWholeStageCodegen(plan) + } else { + plan + } + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/d7d0cad0/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala deleted file mode 100644 index a46d0e0..0000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala +++ /dev/null @@ -1,1008 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution - -import java.util - -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.{SparkEnv, TaskContext} -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate._ -import org.apache.spark.sql.catalyst.plans.physical._ -import org.apache.spark.sql.types.IntegerType -import org.apache.spark.util.collection.unsafe.sort.{UnsafeExternalSorter, UnsafeSorterIterator} - -/** - * This class calculates and outputs (windowed) aggregates over the rows in a single (sorted) - * partition. The aggregates are calculated for each row in the group. Special processing - * instructions, frames, are used to calculate these aggregates. Frames are processed in the order - * specified in the window specification (the ORDER BY ... clause). There are four different frame - * types: - * - Entire partition: The frame is the entire partition, i.e. - * UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. For this case, window function will take all - * rows as inputs and be evaluated once. - * - Growing frame: We only add new rows into the frame, i.e. UNBOUNDED PRECEDING AND .... - * Every time we move to a new row to process, we add some rows to the frame. We do not remove - * rows from this frame. - * - Shrinking frame: We only remove rows from the frame, i.e. ... AND UNBOUNDED FOLLOWING. - * Every time we move to a new row to process, we remove some rows from the frame. We do not add - * rows to this frame. - * - Moving frame: Every time we move to a new row to process, we remove some rows from the frame - * and we add some rows to the frame. Examples are: - * 1 PRECEDING AND CURRENT ROW and 1 FOLLOWING AND 2 FOLLOWING. - * - Offset frame: The frame consist of one row, which is an offset number of rows away from the - * current row. Only [[OffsetWindowFunction]]s can be processed in an offset frame. - * - * Different frame boundaries can be used in Growing, Shrinking and Moving frames. A frame - * boundary can be either Row or Range based: - * - Row Based: A row based boundary is based on the position of the row within the partition. - * An offset indicates the number of rows above or below the current row, the frame for the - * current row starts or ends. For instance, given a row based sliding frame with a lower bound - * offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from - * index 4 to index 6. - * - Range based: A range based boundary is based on the actual value of the ORDER BY - * expression(s). An offset is used to alter the value of the ORDER BY expression, for - * instance if the current order by expression has a value of 10 and the lower bound offset - * is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a - * number of constraints on the ORDER BY expressions: there can be only one expression and this - * expression must have a numerical data type. An exception can be made when the offset is 0, - * because no value modification is needed, in this case multiple and non-numeric ORDER BY - * expression are allowed. - * - * This is quite an expensive operator because every row for a single group must be in the same - * partition and partitions must be sorted according to the grouping and sort order. The operator - * requires the planner to take care of the partitioning and sorting. - * - * The operator is semi-blocking. The window functions and aggregates are calculated one group at - * a time, the result will only be made available after the processing for the entire group has - * finished. The operator is able to process different frame configurations at the same time. This - * is done by delegating the actual frame processing (i.e. calculation of the window functions) to - * specialized classes, see [[WindowFunctionFrame]], which take care of their own frame type: - * Entire Partition, Sliding, Growing & Shrinking. Boundary evaluation is also delegated to a pair - * of specialized classes: [[RowBoundOrdering]] & [[RangeBoundOrdering]]. - */ -case class Window( - windowExpression: Seq[NamedExpression], - partitionSpec: Seq[Expression], - orderSpec: Seq[SortOrder], - child: SparkPlan) - extends UnaryNode { - - override def output: Seq[Attribute] = - child.output ++ windowExpression.map(_.toAttribute) - - override def requiredChildDistribution: Seq[Distribution] = { - if (partitionSpec.isEmpty) { - // Only show warning when the number of bytes is larger than 100 MB? - logWarning("No Partition Defined for Window operation! Moving all data to a single " - + "partition, this can cause serious performance degradation.") - AllTuples :: Nil - } else ClusteredDistribution(partitionSpec) :: Nil - } - - override def requiredChildOrdering: Seq[Seq[SortOrder]] = - Seq(partitionSpec.map(SortOrder(_, Ascending)) ++ orderSpec) - - override def outputOrdering: Seq[SortOrder] = child.outputOrdering - - /** - * Create a bound ordering object for a given frame type and offset. A bound ordering object is - * used to determine which input row lies within the frame boundaries of an output row. - * - * This method uses Code Generation. It can only be used on the executor side. - * - * @param frameType to evaluate. This can either be Row or Range based. - * @param offset with respect to the row. - * @return a bound ordering object. - */ - private[this] def createBoundOrdering(frameType: FrameType, offset: Int): BoundOrdering = { - frameType match { - case RangeFrame => - val (exprs, current, bound) = if (offset == 0) { - // Use the entire order expression when the offset is 0. - val exprs = orderSpec.map(_.child) - val buildProjection = () => newMutableProjection(exprs, child.output) - (orderSpec, buildProjection(), buildProjection()) - } else if (orderSpec.size == 1) { - // Use only the first order expression when the offset is non-null. - val sortExpr = orderSpec.head - val expr = sortExpr.child - // Create the projection which returns the current 'value'. - val current = newMutableProjection(expr :: Nil, child.output) - // Flip the sign of the offset when processing the order is descending - val boundOffset = sortExpr.direction match { - case Descending => -offset - case Ascending => offset - } - // Create the projection which returns the current 'value' modified by adding the offset. - val boundExpr = Add(expr, Cast(Literal.create(boundOffset, IntegerType), expr.dataType)) - val bound = newMutableProjection(boundExpr :: Nil, child.output) - (sortExpr :: Nil, current, bound) - } else { - sys.error("Non-Zero range offsets are not supported for windows " + - "with multiple order expressions.") - } - // Construct the ordering. This is used to compare the result of current value projection - // to the result of bound value projection. This is done manually because we want to use - // Code Generation (if it is enabled). - val sortExprs = exprs.zipWithIndex.map { case (e, i) => - SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction) - } - val ordering = newOrdering(sortExprs, Nil) - RangeBoundOrdering(ordering, current, bound) - case RowFrame => RowBoundOrdering(offset) - } - } - - /** - * Collection containing an entry for each window frame to process. Each entry contains a frames' - * WindowExpressions and factory function for the WindowFrameFunction. - */ - private[this] lazy val windowFrameExpressionFactoryPairs = { - type FrameKey = (String, FrameType, Option[Int], Option[Int]) - type ExpressionBuffer = mutable.Buffer[Expression] - val framedFunctions = mutable.Map.empty[FrameKey, (ExpressionBuffer, ExpressionBuffer)] - - // Add a function and its function to the map for a given frame. - def collect(tpe: String, fr: SpecifiedWindowFrame, e: Expression, fn: Expression): Unit = { - val key = (tpe, fr.frameType, FrameBoundary(fr.frameStart), FrameBoundary(fr.frameEnd)) - val (es, fns) = framedFunctions.getOrElseUpdate( - key, (ArrayBuffer.empty[Expression], ArrayBuffer.empty[Expression])) - es.append(e) - fns.append(fn) - } - - // Collect all valid window functions and group them by their frame. - windowExpression.foreach { x => - x.foreach { - case e @ WindowExpression(function, spec) => - val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame] - function match { - case AggregateExpression(f, _, _, _) => collect("AGGREGATE", frame, e, f) - case f: AggregateWindowFunction => collect("AGGREGATE", frame, e, f) - case f: OffsetWindowFunction => collect("OFFSET", frame, e, f) - case f => sys.error(s"Unsupported window function: $f") - } - case _ => - } - } - - // Map the groups to a (unbound) expression and frame factory pair. - var numExpressions = 0 - framedFunctions.toSeq.map { - case (key, (expressions, functionSeq)) => - val ordinal = numExpressions - val functions = functionSeq.toArray - - // Construct an aggregate processor if we need one. - def processor = AggregateProcessor( - functions, - ordinal, - child.output, - (expressions, schema) => - newMutableProjection(expressions, schema, subexpressionEliminationEnabled)) - - // Create the factory - val factory = key match { - // Offset Frame - case ("OFFSET", RowFrame, Some(offset), Some(h)) if offset == h => - target: MutableRow => - new OffsetWindowFunctionFrame( - target, - ordinal, - functions, - child.output, - (expressions, schema) => - newMutableProjection(expressions, schema, subexpressionEliminationEnabled), - offset) - - // Growing Frame. - case ("AGGREGATE", frameType, None, Some(high)) => - target: MutableRow => { - new UnboundedPrecedingWindowFunctionFrame( - target, - processor, - createBoundOrdering(frameType, high)) - } - - // Shrinking Frame. - case ("AGGREGATE", frameType, Some(low), None) => - target: MutableRow => { - new UnboundedFollowingWindowFunctionFrame( - target, - processor, - createBoundOrdering(frameType, low)) - } - - // Moving Frame. - case ("AGGREGATE", frameType, Some(low), Some(high)) => - target: MutableRow => { - new SlidingWindowFunctionFrame( - target, - processor, - createBoundOrdering(frameType, low), - createBoundOrdering(frameType, high)) - } - - // Entire Partition Frame. - case ("AGGREGATE", frameType, None, None) => - target: MutableRow => { - new UnboundedWindowFunctionFrame(target, processor) - } - } - - // Keep track of the number of expressions. This is a side-effect in a map... - numExpressions += expressions.size - - // Create the Frame Expression - Factory pair. - (expressions, factory) - } - } - - /** - * Create the resulting projection. - * - * This method uses Code Generation. It can only be used on the executor side. - * - * @param expressions unbound ordered function expressions. - * @return the final resulting projection. - */ - private[this] def createResultProjection( - expressions: Seq[Expression]): UnsafeProjection = { - val references = expressions.zipWithIndex.map{ case (e, i) => - // Results of window expressions will be on the right side of child's output - BoundReference(child.output.size + i, e.dataType, e.nullable) - } - val unboundToRefMap = expressions.zip(references).toMap - val patchedWindowExpression = windowExpression.map(_.transform(unboundToRefMap)) - UnsafeProjection.create( - child.output ++ patchedWindowExpression, - child.output) - } - - protected override def doExecute(): RDD[InternalRow] = { - // Unwrap the expressions and factories from the map. - val expressions = windowFrameExpressionFactoryPairs.flatMap(_._1) - val factories = windowFrameExpressionFactoryPairs.map(_._2).toArray - - // Start processing. - child.execute().mapPartitions { stream => - new Iterator[InternalRow] { - - // Get all relevant projections. - val result = createResultProjection(expressions) - val grouping = UnsafeProjection.create(partitionSpec, child.output) - - // Manage the stream and the grouping. - var nextRow: UnsafeRow = null - var nextGroup: UnsafeRow = null - var nextRowAvailable: Boolean = false - private[this] def fetchNextRow() { - nextRowAvailable = stream.hasNext - if (nextRowAvailable) { - nextRow = stream.next().asInstanceOf[UnsafeRow] - nextGroup = grouping(nextRow) - } else { - nextRow = null - nextGroup = null - } - } - fetchNextRow() - - // Manage the current partition. - val rows = ArrayBuffer.empty[UnsafeRow] - val inputFields = child.output.length - var sorter: UnsafeExternalSorter = null - var rowBuffer: RowBuffer = null - val windowFunctionResult = new SpecificMutableRow(expressions.map(_.dataType)) - val frames = factories.map(_(windowFunctionResult)) - val numFrames = frames.length - private[this] def fetchNextPartition() { - // Collect all the rows in the current partition. - // Before we start to fetch new input rows, make a copy of nextGroup. - val currentGroup = nextGroup.copy() - - // clear last partition - if (sorter != null) { - // the last sorter of this task will be cleaned up via task completion listener - sorter.cleanupResources() - sorter = null - } else { - rows.clear() - } - - while (nextRowAvailable && nextGroup == currentGroup) { - if (sorter == null) { - rows += nextRow.copy() - - if (rows.length >= 4096) { - // We will not sort the rows, so prefixComparator and recordComparator are null. - sorter = UnsafeExternalSorter.create( - TaskContext.get().taskMemoryManager(), - SparkEnv.get.blockManager, - SparkEnv.get.serializerManager, - TaskContext.get(), - null, - null, - 1024, - SparkEnv.get.memoryManager.pageSizeBytes, - false) - rows.foreach { r => - sorter.insertRecord(r.getBaseObject, r.getBaseOffset, r.getSizeInBytes, 0) - } - rows.clear() - } - } else { - sorter.insertRecord(nextRow.getBaseObject, nextRow.getBaseOffset, - nextRow.getSizeInBytes, 0) - } - fetchNextRow() - } - if (sorter != null) { - rowBuffer = new ExternalRowBuffer(sorter, inputFields) - } else { - rowBuffer = new ArrayRowBuffer(rows) - } - - // Setup the frames. - var i = 0 - while (i < numFrames) { - frames(i).prepare(rowBuffer.copy()) - i += 1 - } - - // Setup iteration - rowIndex = 0 - rowsSize = rowBuffer.size() - } - - // Iteration - var rowIndex = 0 - var rowsSize = 0L - - override final def hasNext: Boolean = rowIndex < rowsSize || nextRowAvailable - - val join = new JoinedRow - override final def next(): InternalRow = { - // Load the next partition if we need to. - if (rowIndex >= rowsSize && nextRowAvailable) { - fetchNextPartition() - } - - if (rowIndex < rowsSize) { - // Get the results for the window frames. - var i = 0 - val current = rowBuffer.next() - while (i < numFrames) { - frames(i).write(rowIndex, current) - i += 1 - } - - // 'Merge' the input row with the window function result - join(current, windowFunctionResult) - rowIndex += 1 - - // Return the projection. - result(join) - } else throw new NoSuchElementException - } - } - } - } -} - -/** - * Function for comparing boundary values. - */ -private[execution] abstract class BoundOrdering { - def compare(inputRow: InternalRow, inputIndex: Int, outputRow: InternalRow, outputIndex: Int): Int -} - -/** - * Compare the input index to the bound of the output index. - */ -private[execution] final case class RowBoundOrdering(offset: Int) extends BoundOrdering { - override def compare( - inputRow: InternalRow, - inputIndex: Int, - outputRow: InternalRow, - outputIndex: Int): Int = - inputIndex - (outputIndex + offset) -} - -/** - * Compare the value of the input index to the value bound of the output index. - */ -private[execution] final case class RangeBoundOrdering( - ordering: Ordering[InternalRow], - current: Projection, - bound: Projection) extends BoundOrdering { - override def compare( - inputRow: InternalRow, - inputIndex: Int, - outputRow: InternalRow, - outputIndex: Int): Int = - ordering.compare(current(inputRow), bound(outputRow)) -} - -/** - * The interface of row buffer for a partition - */ -private[execution] abstract class RowBuffer { - - /** Number of rows. */ - def size(): Int - - /** Return next row in the buffer, null if no more left. */ - def next(): InternalRow - - /** Skip the next `n` rows. */ - def skip(n: Int): Unit - - /** Return a new RowBuffer that has the same rows. */ - def copy(): RowBuffer -} - -/** - * A row buffer based on ArrayBuffer (the number of rows is limited) - */ -private[execution] class ArrayRowBuffer(buffer: ArrayBuffer[UnsafeRow]) extends RowBuffer { - - private[this] var cursor: Int = -1 - - /** Number of rows. */ - def size(): Int = buffer.length - - /** Return next row in the buffer, null if no more left. */ - def next(): InternalRow = { - cursor += 1 - if (cursor < buffer.length) { - buffer(cursor) - } else { - null - } - } - - /** Skip the next `n` rows. */ - def skip(n: Int): Unit = { - cursor += n - } - - /** Return a new RowBuffer that has the same rows. */ - def copy(): RowBuffer = { - new ArrayRowBuffer(buffer) - } -} - -/** - * An external buffer of rows based on UnsafeExternalSorter - */ -private[execution] class ExternalRowBuffer(sorter: UnsafeExternalSorter, numFields: Int) - extends RowBuffer { - - private[this] val iter: UnsafeSorterIterator = sorter.getIterator - - private[this] val currentRow = new UnsafeRow(numFields) - - /** Number of rows. */ - def size(): Int = iter.getNumRecords() - - /** Return next row in the buffer, null if no more left. */ - def next(): InternalRow = { - if (iter.hasNext) { - iter.loadNext() - currentRow.pointTo(iter.getBaseObject, iter.getBaseOffset, iter.getRecordLength) - currentRow - } else { - null - } - } - - /** Skip the next `n` rows. */ - def skip(n: Int): Unit = { - var i = 0 - while (i < n && iter.hasNext) { - iter.loadNext() - i += 1 - } - } - - /** Return a new RowBuffer that has the same rows. */ - def copy(): RowBuffer = { - new ExternalRowBuffer(sorter, numFields) - } -} - -/** - * A window function calculates the results of a number of window functions for a window frame. - * Before use a frame must be prepared by passing it all the rows in the current partition. After - * preparation the update method can be called to fill the output rows. - */ -private[execution] abstract class WindowFunctionFrame { - /** - * Prepare the frame for calculating the results for a partition. - * - * @param rows to calculate the frame results for. - */ - def prepare(rows: RowBuffer): Unit - - /** - * Write the current results to the target row. - */ - def write(index: Int, current: InternalRow): Unit -} - -/** - * The offset window frame calculates frames containing LEAD/LAG statements. - * - * @param target to write results to. - * @param expressions to shift a number of rows. - * @param inputSchema required for creating a projection. - * @param newMutableProjection function used to create the projection. - * @param offset by which rows get moved within a partition. - */ -private[execution] final class OffsetWindowFunctionFrame( - target: MutableRow, - ordinal: Int, - expressions: Array[Expression], - inputSchema: Seq[Attribute], - newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, - offset: Int) extends WindowFunctionFrame { - - /** Rows of the partition currently being processed. */ - private[this] var input: RowBuffer = null - - /** Index of the input row currently used for output. */ - private[this] var inputIndex = 0 - - /** Row used when there is no valid input. */ - private[this] val emptyRow = new GenericInternalRow(inputSchema.size) - - /** Row used to combine the offset and the current row. */ - private[this] val join = new JoinedRow - - /** Create the projection. */ - private[this] val projection = { - // Collect the expressions and bind them. - val inputAttrs = inputSchema.map(_.withNullability(true)) - val numInputAttributes = inputAttrs.size - val boundExpressions = Seq.fill(ordinal)(NoOp) ++ expressions.toSeq.map { - case e: OffsetWindowFunction => - val input = BindReferences.bindReference(e.input, inputAttrs) - if (e.default == null || e.default.foldable && e.default.eval() == null) { - // Without default value. - input - } else { - // With default value. - val default = BindReferences.bindReference(e.default, inputAttrs).transform { - // Shift the input reference to its default version. - case BoundReference(o, dataType, nullable) => - BoundReference(o + numInputAttributes, dataType, nullable) - } - org.apache.spark.sql.catalyst.expressions.Coalesce(input :: default :: Nil) - } - case e => - BindReferences.bindReference(e, inputAttrs) - } - - // Create the projection. - newMutableProjection(boundExpressions, Nil).target(target) - } - - override def prepare(rows: RowBuffer): Unit = { - input = rows - // drain the first few rows if offset is larger than zero - inputIndex = 0 - while (inputIndex < offset) { - input.next() - inputIndex += 1 - } - inputIndex = offset - } - - override def write(index: Int, current: InternalRow): Unit = { - if (inputIndex >= 0 && inputIndex < input.size) { - val r = input.next() - join(r, current) - } else { - join(emptyRow, current) - } - projection(join) - inputIndex += 1 - } -} - -/** - * The sliding window frame calculates frames with the following SQL form: - * ... BETWEEN 1 PRECEDING AND 1 FOLLOWING - * - * @param target to write results to. - * @param processor to calculate the row values with. - * @param lbound comparator used to identify the lower bound of an output row. - * @param ubound comparator used to identify the upper bound of an output row. - */ -private[execution] final class SlidingWindowFunctionFrame( - target: MutableRow, - processor: AggregateProcessor, - lbound: BoundOrdering, - ubound: BoundOrdering) extends WindowFunctionFrame { - - /** Rows of the partition currently being processed. */ - private[this] var input: RowBuffer = null - - /** The next row from `input`. */ - private[this] var nextRow: InternalRow = null - - /** The rows within current sliding window. */ - private[this] val buffer = new util.ArrayDeque[InternalRow]() - - /** - * Index of the first input row with a value greater than the upper bound of the current - * output row. - */ - private[this] var inputHighIndex = 0 - - /** - * Index of the first input row with a value equal to or greater than the lower bound of the - * current output row. - */ - private[this] var inputLowIndex = 0 - - /** Prepare the frame for calculating a new partition. Reset all variables. */ - override def prepare(rows: RowBuffer): Unit = { - input = rows - nextRow = rows.next() - inputHighIndex = 0 - inputLowIndex = 0 - buffer.clear() - } - - /** Write the frame columns for the current row to the given target row. */ - override def write(index: Int, current: InternalRow): Unit = { - var bufferUpdated = index == 0 - - // Add all rows to the buffer for which the input row value is equal to or less than - // the output row upper bound. - while (nextRow != null && ubound.compare(nextRow, inputHighIndex, current, index) <= 0) { - buffer.add(nextRow.copy()) - nextRow = input.next() - inputHighIndex += 1 - bufferUpdated = true - } - - // Drop all rows from the buffer for which the input row value is smaller than - // the output row lower bound. - while (!buffer.isEmpty && lbound.compare(buffer.peek(), inputLowIndex, current, index) < 0) { - buffer.remove() - inputLowIndex += 1 - bufferUpdated = true - } - - // Only recalculate and update when the buffer changes. - if (bufferUpdated) { - processor.initialize(input.size) - val iter = buffer.iterator() - while (iter.hasNext) { - processor.update(iter.next()) - } - processor.evaluate(target) - } - } -} - -/** - * The unbounded window frame calculates frames with the following SQL forms: - * ... (No Frame Definition) - * ... BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - * - * Its results are the same for each and every row in the partition. This class can be seen as a - * special case of a sliding window, but is optimized for the unbound case. - * - * @param target to write results to. - * @param processor to calculate the row values with. - */ -private[execution] final class UnboundedWindowFunctionFrame( - target: MutableRow, - processor: AggregateProcessor) extends WindowFunctionFrame { - - /** Prepare the frame for calculating a new partition. Process all rows eagerly. */ - override def prepare(rows: RowBuffer): Unit = { - val size = rows.size() - processor.initialize(size) - var i = 0 - while (i < size) { - processor.update(rows.next()) - i += 1 - } - } - - /** Write the frame columns for the current row to the given target row. */ - override def write(index: Int, current: InternalRow): Unit = { - // Unfortunately we cannot assume that evaluation is deterministic. So we need to re-evaluate - // for each row. - processor.evaluate(target) - } -} - -/** - * The UnboundPreceding window frame calculates frames with the following SQL form: - * ... BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW - * - * There is only an upper bound. Very common use cases are for instance running sums or counts - * (row_number). Technically this is a special case of a sliding window. However a sliding window - * has to maintain a buffer, and it must do a full evaluation everytime the buffer changes. This - * is not the case when there is no lower bound, given the additive nature of most aggregates - * streaming updates and partial evaluation suffice and no buffering is needed. - * - * @param target to write results to. - * @param processor to calculate the row values with. - * @param ubound comparator used to identify the upper bound of an output row. - */ -private[execution] final class UnboundedPrecedingWindowFunctionFrame( - target: MutableRow, - processor: AggregateProcessor, - ubound: BoundOrdering) extends WindowFunctionFrame { - - /** Rows of the partition currently being processed. */ - private[this] var input: RowBuffer = null - - /** The next row from `input`. */ - private[this] var nextRow: InternalRow = null - - /** - * Index of the first input row with a value greater than the upper bound of the current - * output row. - */ - private[this] var inputIndex = 0 - - /** Prepare the frame for calculating a new partition. */ - override def prepare(rows: RowBuffer): Unit = { - input = rows - nextRow = rows.next() - inputIndex = 0 - processor.initialize(input.size) - } - - /** Write the frame columns for the current row to the given target row. */ - override def write(index: Int, current: InternalRow): Unit = { - var bufferUpdated = index == 0 - - // Add all rows to the aggregates for which the input row value is equal to or less than - // the output row upper bound. - while (nextRow != null && ubound.compare(nextRow, inputIndex, current, index) <= 0) { - processor.update(nextRow) - nextRow = input.next() - inputIndex += 1 - bufferUpdated = true - } - - // Only recalculate and update when the buffer changes. - if (bufferUpdated) { - processor.evaluate(target) - } - } -} - -/** - * The UnboundFollowing window frame calculates frames with the following SQL form: - * ... BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - * - * There is only an upper bound. This is a slightly modified version of the sliding window. The - * sliding window operator has to check if both upper and the lower bound change when a new row - * gets processed, where as the unbounded following only has to check the lower bound. - * - * This is a very expensive operator to use, O(n * (n - 1) /2), because we need to maintain a - * buffer and must do full recalculation after each row. Reverse iteration would be possible, if - * the commutativity of the used window functions can be guaranteed. - * - * @param target to write results to. - * @param processor to calculate the row values with. - * @param lbound comparator used to identify the lower bound of an output row. - */ -private[execution] final class UnboundedFollowingWindowFunctionFrame( - target: MutableRow, - processor: AggregateProcessor, - lbound: BoundOrdering) extends WindowFunctionFrame { - - /** Rows of the partition currently being processed. */ - private[this] var input: RowBuffer = null - - /** - * Index of the first input row with a value equal to or greater than the lower bound of the - * current output row. - */ - private[this] var inputIndex = 0 - - /** Prepare the frame for calculating a new partition. */ - override def prepare(rows: RowBuffer): Unit = { - input = rows - inputIndex = 0 - } - - /** Write the frame columns for the current row to the given target row. */ - override def write(index: Int, current: InternalRow): Unit = { - var bufferUpdated = index == 0 - - // Duplicate the input to have a new iterator - val tmp = input.copy() - - // Drop all rows from the buffer for which the input row value is smaller than - // the output row lower bound. - tmp.skip(inputIndex) - var nextRow = tmp.next() - while (nextRow != null && lbound.compare(nextRow, inputIndex, current, index) < 0) { - nextRow = tmp.next() - inputIndex += 1 - bufferUpdated = true - } - - // Only recalculate and update when the buffer changes. - if (bufferUpdated) { - processor.initialize(input.size) - while (nextRow != null) { - processor.update(nextRow) - nextRow = tmp.next() - } - processor.evaluate(target) - } - } -} - -/** - * This class prepares and manages the processing of a number of [[AggregateFunction]]s within a - * single frame. The [[WindowFunctionFrame]] takes care of processing the frame in the correct way, - * this reduces the processing of a [[AggregateWindowFunction]] to processing the underlying - * [[AggregateFunction]]. All [[AggregateFunction]]s are processed in [[Complete]] mode. - * - * [[SizeBasedWindowFunction]]s are initialized in a slightly different way. These functions - * require the size of the partition processed, this value is exposed to them when the processor is - * constructed. - * - * Processing of distinct aggregates is currently not supported. - * - * The implementation is split into an object which takes care of construction, and a the actual - * processor class. - */ -private[execution] object AggregateProcessor { - def apply( - functions: Array[Expression], - ordinal: Int, - inputAttributes: Seq[Attribute], - newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection): - AggregateProcessor = { - val aggBufferAttributes = mutable.Buffer.empty[AttributeReference] - val initialValues = mutable.Buffer.empty[Expression] - val updateExpressions = mutable.Buffer.empty[Expression] - val evaluateExpressions = mutable.Buffer.fill[Expression](ordinal)(NoOp) - val imperatives = mutable.Buffer.empty[ImperativeAggregate] - - // SPARK-14244: `SizeBasedWindowFunction`s are firstly created on driver side and then - // serialized to executor side. These functions all reference a global singleton window - // partition size attribute reference, i.e., `SizeBasedWindowFunction.n`. Here we must collect - // the singleton instance created on driver side instead of using executor side - // `SizeBasedWindowFunction.n` to avoid binding failure caused by mismatching expression ID. - val partitionSize: Option[AttributeReference] = { - val aggs = functions.flatMap(_.collectFirst { case f: SizeBasedWindowFunction => f }) - aggs.headOption.map(_.n) - } - - // Check if there are any SizeBasedWindowFunctions. If there are, we add the partition size to - // the aggregation buffer. Note that the ordinal of the partition size value will always be 0. - partitionSize.foreach { n => - aggBufferAttributes += n - initialValues += NoOp - updateExpressions += NoOp - } - - // Add an AggregateFunction to the AggregateProcessor. - functions.foreach { - case agg: DeclarativeAggregate => - aggBufferAttributes ++= agg.aggBufferAttributes - initialValues ++= agg.initialValues - updateExpressions ++= agg.updateExpressions - evaluateExpressions += agg.evaluateExpression - case agg: ImperativeAggregate => - val offset = aggBufferAttributes.size - val imperative = BindReferences.bindReference(agg - .withNewInputAggBufferOffset(offset) - .withNewMutableAggBufferOffset(offset), - inputAttributes) - imperatives += imperative - aggBufferAttributes ++= imperative.aggBufferAttributes - val noOps = Seq.fill(imperative.aggBufferAttributes.size)(NoOp) - initialValues ++= noOps - updateExpressions ++= noOps - evaluateExpressions += imperative - case other => - sys.error(s"Unsupported Aggregate Function: $other") - } - - // Create the projections. - val initialProjection = newMutableProjection( - initialValues, - partitionSize.toSeq) - val updateProjection = newMutableProjection( - updateExpressions, - aggBufferAttributes ++ inputAttributes) - val evaluateProjection = newMutableProjection( - evaluateExpressions, - aggBufferAttributes) - - // Create the processor - new AggregateProcessor( - aggBufferAttributes.toArray, - initialProjection, - updateProjection, - evaluateProjection, - imperatives.toArray, - partitionSize.isDefined) - } -} - -/** - * This class manages the processing of a number of aggregate functions. See the documentation of - * the object for more information. - */ -private[execution] final class AggregateProcessor( - private[this] val bufferSchema: Array[AttributeReference], - private[this] val initialProjection: MutableProjection, - private[this] val updateProjection: MutableProjection, - private[this] val evaluateProjection: MutableProjection, - private[this] val imperatives: Array[ImperativeAggregate], - private[this] val trackPartitionSize: Boolean) { - - private[this] val join = new JoinedRow - private[this] val numImperatives = imperatives.length - private[this] val buffer = new SpecificMutableRow(bufferSchema.toSeq.map(_.dataType)) - initialProjection.target(buffer) - updateProjection.target(buffer) - - /** Create the initial state. */ - def initialize(size: Int): Unit = { - // Some initialization expressions are dependent on the partition size so we have to - // initialize the size before initializing all other fields, and we have to pass the buffer to - // the initialization projection. - if (trackPartitionSize) { - buffer.setInt(0, size) - } - initialProjection(buffer) - var i = 0 - while (i < numImperatives) { - imperatives(i).initialize(buffer) - i += 1 - } - } - - /** Update the buffer. */ - def update(input: InternalRow): Unit = { - updateProjection(join(buffer, input)) - var i = 0 - while (i < numImperatives) { - imperatives(i).update(buffer, input) - i += 1 - } - } - - /** Evaluate buffer. */ - def evaluate(target: MutableRow): Unit = - evaluateProjection.target(target)(buffer) -} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
