This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 11be22b [SPARK-25713][SQL] implementing copy for ColumnArray 11be22b is described below commit 11be22bb5e4784578c2f9ec1b80b30b2cf0ac3c7 Author: ayudovin <a.yudovin6...@gmail.com> AuthorDate: Thu Jan 24 10:35:44 2019 +0800 [SPARK-25713][SQL] implementing copy for ColumnArray ## What changes were proposed in this pull request? Implement copy() for ColumnarArray ## How was this patch tested? Updating test case to existing tests in ColumnVectorSuite Closes #23569 from ayudovin/copy-for-columnArray. Authored-by: ayudovin <a.yudovin6...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/vectorized/ColumnarArray.java | 22 +++++++++++++++++++++- .../execution/vectorized/ColumnVectorSuite.scala | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java index dd2bd78..1471627 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java +++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java @@ -17,7 +17,9 @@ package org.apache.spark.sql.vectorized; import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData; import org.apache.spark.sql.catalyst.util.ArrayData; +import org.apache.spark.sql.catalyst.util.GenericArrayData; import org.apache.spark.sql.types.*; import org.apache.spark.unsafe.types.CalendarInterval; import org.apache.spark.unsafe.types.UTF8String; @@ -46,7 +48,25 @@ public final class ColumnarArray extends ArrayData { @Override public ArrayData copy() { - throw new UnsupportedOperationException(); + DataType dt = data.dataType(); + + if (dt instanceof BooleanType) { + return UnsafeArrayData.fromPrimitiveArray(toBooleanArray()); + } else if (dt instanceof ByteType) { + return UnsafeArrayData.fromPrimitiveArray(toByteArray()); + } else if (dt instanceof ShortType) { + return UnsafeArrayData.fromPrimitiveArray(toShortArray()); + } else if (dt instanceof IntegerType) { + return UnsafeArrayData.fromPrimitiveArray(toIntArray()); + } else if (dt instanceof LongType) { + return UnsafeArrayData.fromPrimitiveArray(toLongArray()); + } else if (dt instanceof FloatType) { + return UnsafeArrayData.fromPrimitiveArray(toFloatArray()); + } else if (dt instanceof DoubleType) { + return UnsafeArrayData.fromPrimitiveArray(toDoubleArray()); + } else { + return new GenericArrayData(toObjectArray(dt)); + } } @Override diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala index 2d1ad4b..866fcb1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala @@ -58,9 +58,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, BooleanType) === (i % 2 == 0)) + assert(arrayCopy.get(i, BooleanType) === (i % 2 == 0)) } } @@ -70,9 +72,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, ByteType) === i.toByte) + assert(arrayCopy.get(i, ByteType) === i.toByte) } } @@ -82,9 +86,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, ShortType) === i.toShort) + assert(arrayCopy.get(i, ShortType) === i.toShort) } } @@ -94,9 +100,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, IntegerType) === i) + assert(arrayCopy.get(i, IntegerType) === i) } } @@ -106,9 +114,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, LongType) === i) + assert(arrayCopy.get(i, LongType) === i) } } @@ -118,9 +128,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, FloatType) === i.toFloat) + assert(arrayCopy.get(i, FloatType) === i.toFloat) } } @@ -130,9 +142,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, DoubleType) === i.toDouble) + assert(arrayCopy.get(i, DoubleType) === i.toDouble) } } @@ -143,9 +157,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => assert(array.get(i, StringType) === UTF8String.fromString(s"str$i")) + assert(arrayCopy.get(i, StringType) === UTF8String.fromString(s"str$i")) } } @@ -156,10 +172,12 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } val array = new ColumnarArray(testVector, 0, 10) + val arrayCopy = array.copy() (0 until 10).foreach { i => val utf8 = s"str$i".getBytes("utf8") assert(array.get(i, BinaryType) === utf8) + assert(arrayCopy.get(i, BinaryType) === utf8) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org