Repository: spark Updated Branches: refs/heads/branch-1.4 8109c9e10 -> c68d0e235
[SPARK-7294][SQL] ADD BETWEEN Author: äºå³¤ <[email protected]> Author: kaka1992 <[email protected]> Closes #5839 from kaka1992/master and squashes the following commits: b15360d [kaka1992] Fix python unit test in sql/test. =_= I forget to commit this file last time. f928816 [kaka1992] Fix python style in sql/test. d2e7f72 [kaka1992] Fix python style in sql/test. c54d904 [kaka1992] Fix empty map bug. 7e64d1e [äºå³¤] Update 7b9b858 [äºå³¤] undo f080f8d [äºå³¤] update pep8 76f0c51 [äºå³¤] Merge remote-tracking branch 'remotes/upstream/master' 7d62368 [äºå³¤] [SPARK-7294] ADD BETWEEN baf839b [äºå³¤] [SPARK-7294] ADD BETWEEN d11d5b9 [äºå³¤] [SPARK-7294] ADD BETWEEN (cherry picked from commit 735bc3d042b1e3e12de57b66f166af14254ad314) Signed-off-by: Reynold Xin <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c68d0e23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c68d0e23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c68d0e23 Branch: refs/heads/branch-1.4 Commit: c68d0e235251dffeb684761fb2f94da249cde74d Parents: 8109c9e Author: äºå³¤ <[email protected]> Authored: Tue May 5 13:23:53 2015 -0700 Committer: Reynold Xin <[email protected]> Committed: Tue May 5 13:24:01 2015 -0700 ---------------------------------------------------------------------- python/pyspark/sql/dataframe.py | 7 +++++++ python/pyspark/sql/tests.py | 8 ++++++++ .../src/main/scala/org/apache/spark/sql/Column.scala | 9 +++++++++ .../org/apache/spark/sql/ColumnExpressionSuite.scala | 14 ++++++++++++++ 4 files changed, 38 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c68d0e23/python/pyspark/sql/dataframe.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 17448b3..24f3705 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1405,6 +1405,13 @@ class Column(object): raise TypeError("unexpected type: %s" % type(dataType)) return Column(jc) + @ignore_unicode_prefix + def between(self, lowerBound, upperBound): + """ A boolean expression that is evaluated to true if the value of this + expression is between the given columns. + """ + return (self >= lowerBound) & (self <= upperBound) + def __repr__(self): return 'Column<%s>' % self._jc.toString().encode('utf8') http://git-wip-us.apache.org/repos/asf/spark/blob/c68d0e23/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 7ea6656..46c4c88 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -453,6 +453,14 @@ class SQLTests(ReusedPySparkTestCase): for row in rndn: assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1] + def test_between_function(self): + df = self.sc.parallelize([ + Row(a=1, b=2, c=3), + Row(a=2, b=1, c=3), + Row(a=4, b=1, c=4)]).toDF() + self.assertEqual([Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)], + df.filter(df.a.between(df.b, df.c)).collect()) + def test_save_and_load(self): df = self.df tmpPath = tempfile.mkdtemp() http://git-wip-us.apache.org/repos/asf/spark/blob/c68d0e23/sql/core/src/main/scala/org/apache/spark/sql/Column.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 33f9d0b..c0503bf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -296,6 +296,15 @@ class Column(protected[sql] val expr: Expression) extends Logging { def eqNullSafe(other: Any): Column = this <=> other /** + * True if the current column is between the lower bound and upper bound, inclusive. + * + * @group java_expr_ops + */ + def between(lowerBound: Any, upperBound: Any): Column = { + (this >= lowerBound) && (this <= upperBound) + } + + /** * True if the current expression is null. * * @group expr_ops http://git-wip-us.apache.org/repos/asf/spark/blob/c68d0e23/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 6322faf..3c1ad65 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -208,6 +208,20 @@ class ColumnExpressionSuite extends QueryTest { testData2.collect().toSeq.filter(r => r.getInt(0) <= r.getInt(1))) } + test("between") { + val testData = TestSQLContext.sparkContext.parallelize( + (0, 1, 2) :: + (1, 2, 3) :: + (2, 1, 0) :: + (2, 2, 4) :: + (3, 1, 6) :: + (3, 2, 0) :: Nil).toDF("a", "b", "c") + val expectAnswer = testData.collect().toSeq. + filter(r => r.getInt(0) >= r.getInt(1) && r.getInt(0) <= r.getInt(2)) + + checkAnswer(testData.filter($"a".between($"b", $"c")), expectAnswer) + } + val booleanData = TestSQLContext.createDataFrame(TestSQLContext.sparkContext.parallelize( Row(false, false) :: Row(false, true) :: --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
