[1/5] spark git commit: [SPARK-7186] [SQL] Decouple internal Row from external Row

rxin Fri, 12 Jun 2015 23:07:45 -0700

Repository: spark
Updated Branches:
  refs/heads/master 6e9c3ff1e -> d46f8e5d4



http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index 3240079..01df189 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -26,11 +26,13 @@ import scala.collection.mutable.ArrayBuffer
 import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.sources.PartitioningUtils._
 import org.apache.spark.sql.sources.{LogicalRelation, Partition, PartitionSpec}
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{Column, QueryTest, Row, SQLContext}
+import org.apache.spark.sql._
+import org.apache.spark.unsafe.types.UTF8String
 
 // The data where the partitioning key exists only in the directory structure.
 case class ParquetData(intField: Int, stringField: String)
@@ -114,7 +116,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with 
ParquetTest {
         StructType(Seq(
           StructField("a", IntegerType),
           StructField("b", StringType))),
-        Seq(Partition(Row(10, "hello"), 
"hdfs://host:9000/path/a=10/b=hello"))))
+        Seq(Partition(InternalRow(10, UTF8String.fromString("hello")),
+          "hdfs://host:9000/path/a=10/b=hello"))))
 
     check(Seq(
       "hdfs://host:9000/path/a=10/b=20",
@@ -124,8 +127,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest 
with ParquetTest {
           StructField("a", DoubleType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
-          Partition(Row(10.5, "hello"), 
"hdfs://host:9000/path/a=10.5/b=hello"))))
+          Partition(InternalRow(10, UTF8String.fromString("20")),
+            "hdfs://host:9000/path/a=10/b=20"),
+          Partition(InternalRow(10.5, UTF8String.fromString("hello")),
+            "hdfs://host:9000/path/a=10.5/b=hello"))))
 
     check(Seq(
       "hdfs://host:9000/path/_temporary",
@@ -143,8 +148,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest 
with ParquetTest {
           StructField("a", DoubleType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
-          Partition(Row(10.5, "hello"), 
"hdfs://host:9000/path/a=10.5/b=hello"))))
+          Partition(InternalRow(10, UTF8String.fromString("20")),
+            "hdfs://host:9000/path/a=10/b=20"),
+          Partition(InternalRow(10.5, UTF8String.fromString("hello")),
+            "hdfs://host:9000/path/a=10.5/b=hello"))))
 
     check(Seq(
       s"hdfs://host:9000/path/a=10/b=20",
@@ -154,8 +161,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest 
with ParquetTest {
           StructField("a", IntegerType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row(10, "20"), s"hdfs://host:9000/path/a=10/b=20"),
-          Partition(Row(null, "hello"), 
s"hdfs://host:9000/path/a=$defaultPartitionName/b=hello"))))
+          Partition(InternalRow(10, UTF8String.fromString("20")),
+            s"hdfs://host:9000/path/a=10/b=20"),
+          Partition(InternalRow(null, UTF8String.fromString("hello")),
+            s"hdfs://host:9000/path/a=$defaultPartitionName/b=hello"))))
 
     check(Seq(
       s"hdfs://host:9000/path/a=10/b=$defaultPartitionName",
@@ -165,8 +174,9 @@ class ParquetPartitionDiscoverySuite extends QueryTest with 
ParquetTest {
           StructField("a", DoubleType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row(10, null), 
s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"),
-          Partition(Row(10.5, null), 
s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName"))))
+          Partition(InternalRow(10, null), 
s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"),
+          Partition(InternalRow(10.5, null),
+            s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName"))))
 
     check(Seq(
       s"hdfs://host:9000/path1",
@@ -185,7 +195,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with 
ParquetTest {
         StructType(Seq(
           StructField("a", StringType),
           StructField("b", StringType))),
-        Seq(Partition(Row("10", "hello"), 
"hdfs://host:9000/path/a=10/b=hello"))))
+        Seq(Partition(InternalRow(UTF8String.fromString("10"), 
UTF8String.fromString("hello")),
+          "hdfs://host:9000/path/a=10/b=hello"))))
 
     check(Seq(
       "hdfs://host:9000/path/a=10/b=20",
@@ -195,8 +206,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest 
with ParquetTest {
           StructField("a", StringType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row("10", "20"), "hdfs://host:9000/path/a=10/b=20"),
-          Partition(Row("10.5", "hello"), 
"hdfs://host:9000/path/a=10.5/b=hello"))))
+          Partition(InternalRow(UTF8String.fromString("10"), 
UTF8String.fromString("20")),
+            "hdfs://host:9000/path/a=10/b=20"),
+          Partition(InternalRow(UTF8String.fromString("10.5"), 
UTF8String.fromString("hello")),
+            "hdfs://host:9000/path/a=10.5/b=hello"))))
 
     check(Seq(
       "hdfs://host:9000/path/_temporary",
@@ -214,8 +227,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest 
with ParquetTest {
           StructField("a", StringType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row("10", "20"), "hdfs://host:9000/path/a=10/b=20"),
-          Partition(Row("10.5", "hello"), 
"hdfs://host:9000/path/a=10.5/b=hello"))))
+          Partition(InternalRow(UTF8String.fromString("10"), 
UTF8String.fromString("20")),
+            "hdfs://host:9000/path/a=10/b=20"),
+          Partition(InternalRow(UTF8String.fromString("10.5"), 
UTF8String.fromString("hello")),
+            "hdfs://host:9000/path/a=10.5/b=hello"))))
 
     check(Seq(
       s"hdfs://host:9000/path/a=10/b=20",
@@ -225,8 +240,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest 
with ParquetTest {
           StructField("a", StringType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row("10", "20"), s"hdfs://host:9000/path/a=10/b=20"),
-          Partition(Row(null, "hello"), 
s"hdfs://host:9000/path/a=$defaultPartitionName/b=hello"))))
+          Partition(InternalRow(UTF8String.fromString("10"), 
UTF8String.fromString("20")),
+            s"hdfs://host:9000/path/a=10/b=20"),
+          Partition(InternalRow(null, UTF8String.fromString("hello")),
+            s"hdfs://host:9000/path/a=$defaultPartitionName/b=hello"))))
 
     check(Seq(
       s"hdfs://host:9000/path/a=10/b=$defaultPartitionName",
@@ -236,8 +253,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest 
with ParquetTest {
           StructField("a", StringType),
           StructField("b", StringType))),
         Seq(
-          Partition(Row("10", null), 
s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"),
-          Partition(Row("10.5", null), 
s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName"))))
+          Partition(InternalRow(UTF8String.fromString("10"), null),
+            s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"),
+          Partition(InternalRow(UTF8String.fromString("10.5"), null),
+            s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName"))))
 
     check(Seq(
       s"hdfs://host:9000/path1",

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index de0107a..be3b34d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -20,15 +20,13 @@ package org.apache.spark.sql.parquet
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{SQLConf, QueryTest}
-import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.{QueryTest, Row, SQLConf}
 
 /**
  * A test suite that tests various Parquet queries.
  */
 class ParquetQuerySuiteBase extends QueryTest with ParquetTest {
   lazy val sqlContext = org.apache.spark.sql.test.TestSQLContext
-  import sqlContext.implicits._
   import sqlContext.sql
 
   test("simple select queries") {

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
index 51d22b6..5fc53f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql.sources
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class DDLScanSource extends RelationProvider {
   override def createRelation(
@@ -56,9 +58,12 @@ case class SimpleDDLScan(from: Int, to: Int, table: 
String)(@transient val sqlCo
       )
     ))
 
+  override def needConversion: Boolean = false
 
   override def buildScan(): RDD[Row] = {
-    sqlContext.sparkContext.parallelize(from to to).map(e => Row(s"people$e", 
e * 2))
+    sqlContext.sparkContext.parallelize(from to to).map { e =>
+      InternalRow(UTF8String.fromString(s"people$e"), e * 2)
+    }
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 5d4ecd8..4a4d8d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -19,9 +19,13 @@ package org.apache.spark.sql.sources
 
 import java.sql.{Timestamp, Date}
 
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.util.DateUtils
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class DefaultSource extends SimpleScanSource
 
@@ -60,10 +64,12 @@ case class AllDataTypesScan(
 
   override def schema: StructType = userSpecifiedSchema
 
+  override def needConversion: Boolean = false
+
   override def buildScan(): RDD[Row] = {
     sqlContext.sparkContext.parallelize(from to to).map { i =>
-      Row(
-        s"str_$i",
+      InternalRow(
+        UTF8String.fromString(s"str_$i"),
         s"str_$i".getBytes(),
         i % 2 == 0,
         i.toByte,
@@ -72,17 +78,18 @@ case class AllDataTypesScan(
         i.toLong,
         i.toFloat,
         i.toDouble,
-        new java.math.BigDecimal(i),
-        new java.math.BigDecimal(i),
-        new Date(1970, 1, 1),
-        new Timestamp(20000 + i),
-        s"varchar_$i",
+        Decimal(new java.math.BigDecimal(i)),
+        Decimal(new java.math.BigDecimal(i)),
+        DateUtils.fromJavaDate(new Date(1970, 1, 1)),
+        DateUtils.fromJavaTimestamp(new Timestamp(20000 + i)),
+        UTF8String.fromString(s"varchar_$i"),
         Seq(i, i + 1),
-        Seq(Map(s"str_$i" -> Row(i.toLong))),
+        Seq(Map(UTF8String.fromString(s"str_$i") -> InternalRow(i.toLong))),
         Map(i -> i.toString),
-        Map(Map(s"str_$i" -> i.toFloat) -> Row(i.toLong)),
+        Map(Map(UTF8String.fromString(s"str_$i") -> i.toFloat) -> 
InternalRow(i.toLong)),
         Row(i, i.toString),
-        Row(Seq(s"str_$i", s"str_${i + 1}"), Row(Seq(new Date(1970, 1, i + 
1)))))
+        Row(Seq(UTF8String.fromString(s"str_$i"), 
UTF8String.fromString(s"str_${i + 1}")),
+          InternalRow(Seq(DateUtils.fromJavaDate(new Date(1970, 1, i + 1))))))
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index fd01a87..d4f1ae8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -366,7 +366,7 @@ private[hive] trait HiveInspectors {
       (o: Any) => {
         if (o != null) {
           val struct = soi.create()
-          (soi.getAllStructFieldRefs, wrappers, 
o.asInstanceOf[Row].toSeq).zipped.foreach {
+          (soi.getAllStructFieldRefs, wrappers, 
o.asInstanceOf[InternalRow].toSeq).zipped.foreach {
             (field, wrapper, data) => soi.setStructFieldData(struct, field, 
wrapper(data))
           }
           struct
@@ -474,7 +474,7 @@ private[hive] trait HiveInspectors {
     }
     case x: SettableStructObjectInspector =>
       val fieldRefs = x.getAllStructFieldRefs
-      val row = a.asInstanceOf[Row]
+      val row = a.asInstanceOf[InternalRow]
       // 1. create the pojo (most likely) object
       val result = x.create()
       var i = 0
@@ -490,7 +490,7 @@ private[hive] trait HiveInspectors {
       result
     case x: StructObjectInspector =>
       val fieldRefs = x.getAllStructFieldRefs
-      val row = a.asInstanceOf[Row]
+      val row = a.asInstanceOf[InternalRow]
       val result = new java.util.ArrayList[AnyRef](fieldRefs.length)
       var i = 0
       while (i < fieldRefs.length) {
@@ -517,7 +517,7 @@ private[hive] trait HiveInspectors {
   }
 
   def wrap(
-      row: Row,
+      row: InternalRow,
       inspectors: Seq[ObjectInspector],
       cache: Array[AnyRef]): Array[AnyRef] = {
     var i = 0

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 5a4651a..619ef63 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -302,7 +302,7 @@ private[hive] class HiveMetastoreCatalog(val client: 
ClientInterface, hive: Hive
       val partitionColumnDataTypes = partitionSchema.map(_.dataType)
       val partitions = metastoreRelation.hiveQlPartitions.map { p =>
         val location = p.getLocation
-        val values = Row.fromSeq(p.getValues.zip(partitionColumnDataTypes).map 
{
+        val values = 
InternalRow.fromSeq(p.getValues.zip(partitionColumnDataTypes).map {
           case (rawValue, dataType) => Cast(Literal(rawValue), 
dataType).eval(null)
         })
         ParquetPartition(values, location)

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index c6b6510..452b7f0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate
-import org.apache.spark.sql.catalyst.expressions.{Row, _}
+import org.apache.spark.sql.catalyst.expressions.{InternalRow, _}
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -137,7 +137,7 @@ private[hive] trait HiveStrategies {
             val partitionLocations = partitions.map(_.getLocation)
 
             if (partitionLocations.isEmpty) {
-              PhysicalRDD(plan.output, sparkContext.emptyRDD[Row]) :: Nil
+              PhysicalRDD(plan.output, sparkContext.emptyRDD[InternalRow]) :: 
Nil
             } else {
               hiveContext
                 .read.parquet(partitionLocations: _*)
@@ -165,7 +165,7 @@ private[hive] trait HiveStrategies {
           // TODO: Remove this hack for Spark 1.3.
           case iae: java.lang.IllegalArgumentException
               if iae.getMessage.contains("Can not create a Path from an empty 
string") =>
-            PhysicalRDD(plan.output, sparkContext.emptyRDD[Row]) :: Nil
+            PhysicalRDD(plan.output, sparkContext.emptyRDD[InternalRow]) :: Nil
         }
       case _ => Nil
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index d3c82d8..4858103 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -41,9 +41,9 @@ import org.apache.spark.util.Utils
  * A trait for subclasses that handle table scans.
  */
 private[hive] sealed trait TableReader {
-  def makeRDDForTable(hiveTable: HiveTable): RDD[Row]
+  def makeRDDForTable(hiveTable: HiveTable): RDD[InternalRow]
 
-  def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[Row]
+  def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): 
RDD[InternalRow]
 }
 
 
@@ -74,7 +74,7 @@ class HadoopTableReader(
   private val _broadcastedHiveConf =
     sc.sparkContext.broadcast(new SerializableWritable(hiveExtraConf))
 
-  override def makeRDDForTable(hiveTable: HiveTable): RDD[Row] =
+  override def makeRDDForTable(hiveTable: HiveTable): RDD[InternalRow] =
     makeRDDForTable(
       hiveTable,
       Class.forName(
@@ -94,7 +94,7 @@ class HadoopTableReader(
   def makeRDDForTable(
       hiveTable: HiveTable,
       deserializerClass: Class[_ <: Deserializer],
-      filterOpt: Option[PathFilter]): RDD[Row] = {
+      filterOpt: Option[PathFilter]): RDD[InternalRow] = {
 
     assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on 
a partitioned table,
       since input formats may differ across partitions. Use 
makeRDDForTablePartitions() instead.""")
@@ -125,7 +125,7 @@ class HadoopTableReader(
     deserializedHadoopRDD
   }
 
-  override def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): 
RDD[Row] = {
+  override def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): 
RDD[InternalRow] = {
     val partitionToDeserializer = partitions.map(part =>
       (part, 
part.getDeserializer.getClass.asInstanceOf[Class[Deserializer]])).toMap
     makeRDDForPartitionedTable(partitionToDeserializer, filterOpt = None)
@@ -144,7 +144,7 @@ class HadoopTableReader(
   def makeRDDForPartitionedTable(
       partitionToDeserializer: Map[HivePartition,
       Class[_ <: Deserializer]],
-      filterOpt: Option[PathFilter]): RDD[Row] = {
+      filterOpt: Option[PathFilter]): RDD[InternalRow] = {
 
     // SPARK-5068:get FileStatus and do the filtering locally when the path is 
not exists
     def verifyPartitionPath(
@@ -243,7 +243,7 @@ class HadoopTableReader(
 
     // Even if we don't use any partitions, we still need an empty RDD
     if (hivePartitionRDDs.size == 0) {
-      new EmptyRDD[Row](sc.sparkContext)
+      new EmptyRDD[InternalRow](sc.sparkContext)
     } else {
       new UnionRDD(hivePartitionRDDs(0).context, hivePartitionRDDs)
     }
@@ -319,7 +319,7 @@ private[hive] object HadoopTableReader extends 
HiveInspectors with Logging {
       rawDeser: Deserializer,
       nonPartitionKeyAttrs: Seq[(Attribute, Int)],
       mutableRow: MutableRow,
-      tableDeser: Deserializer): Iterator[Row] = {
+      tableDeser: Deserializer): Iterator[InternalRow] = {
 
     val soi = if 
(rawDeser.getObjectInspector.equals(tableDeser.getObjectInspector)) {
       rawDeser.getObjectInspector.asInstanceOf[StructObjectInspector]
@@ -391,7 +391,7 @@ private[hive] object HadoopTableReader extends 
HiveInspectors with Logging {
         i += 1
       }
 
-      mutableRow: Row
+      mutableRow: InternalRow
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 87c36a8..0e4a242 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.{AnalysisException, SQLContext}
-import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.expressions.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, 
LogicalPlan}
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.client.{HiveTable, HiveColumn}
@@ -42,7 +42,7 @@ case class CreateTableAsSelect(
   def database: String = tableDesc.database
   def tableName: String = tableDesc.name
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     lazy val metastoreRelation: MetastoreRelation = {
       import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
@@ -89,7 +89,7 @@ case class CreateTableAsSelect(
       hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, 
true, false)).toRdd
     }
 
-    Seq.empty[Row]
+    Seq.empty[InternalRow]
   }
 
   override def argString: String = {

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
index 6fce69b..a893810 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
@@ -21,12 +21,10 @@ import scala.collection.JavaConversions._
 
 import org.apache.hadoop.hive.metastore.api.FieldSchema
 
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
-import org.apache.spark.sql.execution.{SparkPlan, RunnableCommand}
-import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation}
-import org.apache.spark.sql.hive.HiveShim
 import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.expressions.{Attribute, InternalRow}
+import org.apache.spark.sql.execution.RunnableCommand
+import org.apache.spark.sql.hive.MetastoreRelation
 
 /**
  * Implementation for "describe [extended] table".
@@ -37,7 +35,7 @@ case class DescribeHiveTableCommand(
     override val output: Seq[Attribute],
     isExtended: Boolean) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     // Trying to mimic the format of Hive's output. But not exactly the same.
     var results: Seq[(String, String, String)] = Nil
 
@@ -59,7 +57,7 @@ case class DescribeHiveTableCommand(
     }
 
     results.map { case (name, dataType, comment) =>
-      Row(name, dataType, comment)
+      InternalRow(name, dataType, comment)
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
index 60a9bb6..87f8e3f 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
@@ -1,34 +1,34 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.execution
-
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Row}
-import org.apache.spark.sql.execution.RunnableCommand
-import org.apache.spark.sql.hive.HiveContext
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.types.StringType
-
-private[hive]
-case class HiveNativeCommand(sql: String) extends RunnableCommand {
-
-  override def output: Seq[AttributeReference] =
-    Seq(AttributeReference("result", StringType, nullable = false)())
-
-  override def run(sqlContext: SQLContext): Seq[Row] =
-    sqlContext.asInstanceOf[HiveContext].runSqlHive(sql).map(Row(_))
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, 
InternalRow}
+import org.apache.spark.sql.execution.RunnableCommand
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.types.StringType
+
+private[hive]
+case class HiveNativeCommand(sql: String) extends RunnableCommand {
+
+  override def output: Seq[AttributeReference] =
+    Seq(AttributeReference("result", StringType, nullable = false)())
+
+  override def run(sqlContext: SQLContext): Seq[InternalRow] =
+    sqlContext.asInstanceOf[HiveContext].runSqlHive(sql).map(InternalRow(_))
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index 11ee550..1f5e4af 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -129,7 +129,7 @@ case class HiveTableScan(
     }
   }
 
-  protected override def doExecute(): RDD[Row] = if 
(!relation.hiveQlTable.isPartitioned) {
+  protected override def doExecute(): RDD[InternalRow] = if 
(!relation.hiveQlTable.isPartitioned) {
     hadoopReader.makeRDDForTable(relation.hiveQlTable)
   } else {
     
hadoopReader.makeRDDForPartitionedTable(prunePartitions(relation.hiveQlPartitions))

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index eeb4726..1d306c5 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -30,7 +30,8 @@ import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.mapred.{FileOutputFormat, JobConf}
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.{Attribute, InternalRow}
 import org.apache.spark.sql.execution.{UnaryNode, SparkPlan}
 import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc}
 import org.apache.spark.sql.hive._
@@ -60,7 +61,7 @@ case class InsertIntoHiveTable(
   def output: Seq[Attribute] = child.output
 
   def saveAsHiveFile(
-      rdd: RDD[Row],
+      rdd: RDD[InternalRow],
       valueClass: Class[_],
       fileSinkConf: FileSinkDesc,
       conf: SerializableWritable[JobConf],
@@ -82,7 +83,7 @@ case class InsertIntoHiveTable(
     writerContainer.commitJob()
 
     // Note that this function is executed on executor side
-    def writeToFile(context: TaskContext, iterator: Iterator[Row]): Unit = {
+    def writeToFile(context: TaskContext, iterator: Iterator[InternalRow]): 
Unit = {
       val serializer = newSerializer(fileSinkConf.getTableInfo)
       val standardOI = ObjectInspectorUtils
         .getStandardObjectInspector(
@@ -119,7 +120,7 @@ case class InsertIntoHiveTable(
    *
    * Note: this is run once and then kept to avoid double insertions.
    */
-  protected[sql] lazy val sideEffectResult: Seq[Row] = {
+  protected[sql] lazy val sideEffectResult: Seq[InternalRow] = {
     // Have to pass the TableDesc object to RDD.mapPartitions and then 
instantiate new serializer
     // instances within the closure, since Serializer is not serializable 
while TableDesc is.
     val tableDesc = table.tableDesc
@@ -250,12 +251,13 @@ case class InsertIntoHiveTable(
     // however for now we return an empty list to simplify compatibility 
checks with hive, which
     // does not return anything for insert operations.
     // TODO: implement hive compatibility as rules.
-    Seq.empty[Row]
+    Seq.empty[InternalRow]
   }
 
-  override def executeCollect(): Array[Row] = sideEffectResult.toArray
+  override def executeCollect(): Array[Row] =
+    sideEffectResult.toArray
 
-  protected override def doExecute(): RDD[Row] = {
+  protected override def doExecute(): RDD[InternalRow] = {
     sqlContext.sparkContext.parallelize(sideEffectResult, 1)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
index 28792db..9d8872a 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -55,7 +55,7 @@ case class ScriptTransformation(
 
   override def otherCopyArgs: Seq[HiveContext] = sc :: Nil
 
-  protected override def doExecute(): RDD[Row] = {
+  protected override def doExecute(): RDD[InternalRow] = {
     child.execute().mapPartitions { iter =>
       val cmd = List("/bin/bash", "-c", script)
       val builder = new ProcessBuilder(cmd)
@@ -72,8 +72,8 @@ case class ScriptTransformation(
 
       val (outputSerde, outputSoi) = ioschema.initOutputSerDe(output)
 
-      val iterator: Iterator[Row] = new Iterator[Row] with HiveInspectors {
-        var cacheRow: Row = null
+      val iterator: Iterator[InternalRow] = new Iterator[InternalRow] with 
HiveInspectors {
+        var cacheRow: InternalRow = null
         var curLine: String = null
         var eof: Boolean = false
 
@@ -90,7 +90,7 @@ case class ScriptTransformation(
           }
         }
 
-        def deserialize(): Row = {
+        def deserialize(): InternalRow = {
           if (cacheRow != null) return cacheRow
 
           val mutableRow = new SpecificMutableRow(output.map(_.dataType))
@@ -120,7 +120,7 @@ case class ScriptTransformation(
           }
         }
 
-        override def next(): Row = {
+        override def next(): InternalRow = {
           if (!hasNext) {
             throw new NoSuchElementException
           }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 0ba94d7..195e575 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -22,7 +22,7 @@ import 
org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.{SaveMode, DataFrame, SQLContext}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, InternalRow}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.HiveContext
@@ -39,9 +39,9 @@ import org.apache.spark.util.Utils
 private[hive]
 case class AnalyzeTable(tableName: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     sqlContext.asInstanceOf[HiveContext].analyze(tableName)
-    Seq.empty[Row]
+    Seq.empty[InternalRow]
   }
 }
 
@@ -53,7 +53,7 @@ case class DropTable(
     tableName: String,
     ifExists: Boolean) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     val ifExistsClause = if (ifExists) "IF EXISTS " else ""
     try {
@@ -70,7 +70,7 @@ case class DropTable(
     hiveContext.invalidateTable(tableName)
     hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableName")
     hiveContext.catalog.unregisterTable(Seq(tableName))
-    Seq.empty[Row]
+    Seq.empty[InternalRow]
   }
 }
 
@@ -83,7 +83,7 @@ case class AddJar(path: String) extends RunnableCommand {
     schema.toAttributes
   }
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     val currentClassLoader = Utils.getContextOrSparkClassLoader
 
@@ -99,18 +99,18 @@ case class AddJar(path: String) extends RunnableCommand {
     // Add jar to executors
     hiveContext.sparkContext.addJar(path)
 
-    Seq(Row(0))
+    Seq(InternalRow(0))
   }
 }
 
 private[hive]
 case class AddFile(path: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     hiveContext.runSqlHive(s"ADD FILE $path")
     hiveContext.sparkContext.addFile(path)
-    Seq.empty[Row]
+    Seq.empty[InternalRow]
   }
 }
 
@@ -123,12 +123,12 @@ case class CreateMetastoreDataSource(
     allowExisting: Boolean,
     managedIfNoPath: Boolean) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
 
     if (hiveContext.catalog.tableExists(tableName :: Nil)) {
       if (allowExisting) {
-        return Seq.empty[Row]
+        return Seq.empty[InternalRow]
       } else {
         throw new AnalysisException(s"Table $tableName already exists.")
       }
@@ -151,7 +151,7 @@ case class CreateMetastoreDataSource(
       optionsWithPath,
       isExternal)
 
-    Seq.empty[Row]
+    Seq.empty[InternalRow]
   }
 }
 
@@ -164,7 +164,7 @@ case class CreateMetastoreDataSourceAsSelect(
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     var createMetastoreTable = false
     var isExternal = true
@@ -188,7 +188,7 @@ case class CreateMetastoreDataSourceAsSelect(
             s"Or, if you are using SQL CREATE TABLE, you need to drop 
$tableName first.")
         case SaveMode.Ignore =>
           // Since the table already exists and the save mode is Ignore, we 
will just return.
-          return Seq.empty[Row]
+          return Seq.empty[InternalRow]
         case SaveMode.Append =>
           // Check if the specified data source match the data source of the 
existing table.
           val resolved = ResolvedDataSource(
@@ -230,7 +230,7 @@ case class CreateMetastoreDataSourceAsSelect(
     val data = DataFrame(hiveContext, query)
     val df = existingSchema match {
       // If we are inserting into an existing table, just use the existing 
schema.
-      case Some(schema) => 
sqlContext.createDataFrame(data.queryExecution.toRdd, schema)
+      case Some(schema) => 
sqlContext.internalCreateDataFrame(data.queryExecution.toRdd, schema)
       case None => data
     }
 
@@ -253,6 +253,6 @@ case class CreateMetastoreDataSourceAsSelect(
 
     // Refresh the cache of the table in the catalog.
     hiveContext.refreshTable(tableName)
-    Seq.empty[Row]
+    Seq.empty[InternalRow]
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index a46ee9d..c40dd4e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -121,7 +121,7 @@ private[hive] case class HiveSimpleUdf(funcWrapper: 
HiveFunctionWrapper, childre
   protected lazy val cached: Array[AnyRef] = new Array[AnyRef](children.length)
 
   // TODO: Finish input output types.
-  override def eval(input: Row): Any = {
+  override def eval(input: InternalRow): Any = {
     unwrap(
       FunctionRegistry.invoke(method, function, conversionHelper
         .convertIfNecessary(wrap(children.map(c => c.eval(input)), arguments, 
cached): _*): _*),
@@ -178,7 +178,7 @@ private[hive] case class HiveGenericUdf(funcWrapper: 
HiveFunctionWrapper, childr
 
   lazy val dataType: DataType = inspectorToDataType(returnInspector)
 
-  override def eval(input: Row): Any = {
+  override def eval(input: InternalRow): Any = {
     returnInspector // Make sure initialized.
 
     var i = 0
@@ -345,7 +345,7 @@ private[hive] case class HiveWindowFunction(
 
   def nullable: Boolean = true
 
-  override def eval(input: Row): Any =
+  override def eval(input: InternalRow): Any =
     throw new TreeNodeException(this, s"No function to evaluate expression. 
type: ${this.nodeName}")
 
   @transient
@@ -369,7 +369,7 @@ private[hive] case class HiveWindowFunction(
     evaluator.reset(hiveEvaluatorBuffer)
   }
 
-  override def prepareInputParameters(input: Row): AnyRef = {
+  override def prepareInputParameters(input: InternalRow): AnyRef = {
     wrap(inputProjection(input), inputInspectors, new 
Array[AnyRef](children.length))
   }
   // Add input parameters for a single row.
@@ -512,7 +512,7 @@ private[hive] case class HiveGenericUdtf(
     field => (inspectorToDataType(field.getFieldObjectInspector), true)
   }
 
-  override def eval(input: Row): TraversableOnce[Row] = {
+  override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
     outputInspector // Make sure initialized.
 
     val inputProjection = new InterpretedProjection(children)
@@ -522,23 +522,23 @@ private[hive] case class HiveGenericUdtf(
   }
 
   protected class UDTFCollector extends Collector {
-    var collected = new ArrayBuffer[Row]
+    var collected = new ArrayBuffer[InternalRow]
 
     override def collect(input: java.lang.Object) {
       // We need to clone the input here because implementations of
       // GenericUDTF reuse the same object. Luckily they are always an array, 
so
       // it is easy to clone.
-      collected += unwrap(input, outputInspector).asInstanceOf[Row]
+      collected += unwrap(input, outputInspector).asInstanceOf[InternalRow]
     }
 
-    def collectRows(): Seq[Row] = {
+    def collectRows(): Seq[InternalRow] = {
       val toCollect = collected
-      collected = new ArrayBuffer[Row]
+      collected = new ArrayBuffer[InternalRow]
       toCollect
     }
   }
 
-  override def terminate(): TraversableOnce[Row] = {
+  override def terminate(): TraversableOnce[InternalRow] = {
     outputInspector // Make sure initialized.
     function.close()
     collector.collectRows()
@@ -578,7 +578,7 @@ private[hive] case class HiveUdafFunction(
   private val buffer =
     function.getNewAggregationBuffer
 
-  override def eval(input: Row): Any = unwrap(function.evaluate(buffer), 
returnInspector)
+  override def eval(input: InternalRow): Any = 
unwrap(function.evaluate(buffer), returnInspector)
 
   @transient
   val inputProjection = new InterpretedProjection(exprs)
@@ -586,7 +586,7 @@ private[hive] case class HiveUdafFunction(
   @transient
   protected lazy val cached = new Array[AnyRef](exprs.length)
 
-  def update(input: Row): Unit = {
+  def update(input: InternalRow): Unit = {
     val inputs = inputProjection(input)
     function.iterate(buffer, wrap(inputs, inspectors, cached))
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
index df137e7..aff0456 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
@@ -28,8 +28,9 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.io.LongWritable
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.expressions.{Literal, Row}
+import org.apache.spark.sql.catalyst.expressions.{Literal, InternalRow}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.Row
 
 class HiveInspectorSuite extends SparkFunSuite with HiveInspectors {
   test("Test wrap SettableStructObjectInspector") {
@@ -45,7 +46,7 @@ class HiveInspectorSuite extends SparkFunSuite with 
HiveInspectors {
       classOf[UDAFPercentile.State],
       ObjectInspectorOptions.JAVA).asInstanceOf[StructObjectInspector]
 
-    val a = unwrap(state, soi).asInstanceOf[Row]
+    val a = unwrap(state, soi).asInstanceOf[InternalRow]
     val b = wrap(a, soi).asInstanceOf[UDAFPercentile.State]
 
     val sfCounts = soi.getStructFieldRef("counts")
@@ -127,7 +128,7 @@ class HiveInspectorSuite extends SparkFunSuite with 
HiveInspectors {
     }
   }
 
-  def checkValues(row1: Seq[Any], row2: Row): Unit = {
+  def checkValues(row1: Seq[Any], row2: InternalRow): Unit = {
     row1.zip(row2.toSeq).foreach { case (r1, r2) =>
       checkValue(r1, r2)
     }
@@ -203,7 +204,7 @@ class HiveInspectorSuite extends SparkFunSuite with 
HiveInspectors {
     })
 
     checkValues(row,
-      unwrap(wrap(Row.fromSeq(row), toInspector(dt)), 
toInspector(dt)).asInstanceOf[Row])
+      unwrap(wrap(Row.fromSeq(row), toInspector(dt)), 
toInspector(dt)).asInstanceOf[InternalRow])
     checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index 5a5ea10..a0d80dc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.parquet.ParquetTest
-import org.apache.spark.sql.{QueryTest, SQLConf}
+import org.apache.spark.sql.{QueryTest, Row, SQLConf}
 
 case class Cases(lower: String, UPPER: String)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
index 0e63d84..8707f9f 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
@@ -21,7 +21,7 @@ import java.io.File
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.expressions.InternalRow
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index b384fb3..267d22c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -25,7 +25,7 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.expressions.InternalRow
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._

http://git-wip-us.apache.org/repos/asf/spark/blob/d46f8e5d/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index e62ac90..3864349 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -21,8 +21,6 @@ import java.io.File
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD}
 import org.apache.spark.sql.hive.execution.HiveTableScan
 import org.apache.spark.sql.hive.test.TestHive._
@@ -30,7 +28,7 @@ import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan}
 import org.apache.spark.sql.sources.{InsertIntoDataSource, 
InsertIntoHadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{DataFrame, QueryTest, SQLConf, SaveMode}
+import org.apache.spark.sql.{DataFrame, QueryTest, Row, SQLConf, SaveMode}
 import org.apache.spark.util.Utils
 
 // The data where the partitioning key exists only in the directory structure.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[1/5] spark git commit: [SPARK-7186] [SQL] Decouple internal Row from external Row

Reply via email to