Repository: spark
Updated Branches:
refs/heads/branch-1.3 9edb34fc3 -> 0ba759985
[SPARK-6536] [PySpark] Column.inSet() in Python
```
>>> df[df.name.inSet("Bob", "Mike")].collect()
[Row(age=5, name=u'Bob')]
>>> df[df.age.inSet([1, 2, 3])].collect()
[Row(age=2, name=u'Alice')]
```
Author: Davies Liu <[email protected]>
Closes #5190 from davies/in and squashes the following commits:
6b73a47 [Davies Liu] Column.inSet() in Python
(cherry picked from commit f535802977c5a3ce45894d89fdf59f8723f023c8)
Signed-off-by: Reynold Xin <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ba75998
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ba75998
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ba75998
Branch: refs/heads/branch-1.3
Commit: 0ba759985288f5df6940c37f5f401bc31de53a1c
Parents: 9edb34f
Author: Davies Liu <[email protected]>
Authored: Thu Mar 26 00:01:24 2015 -0700
Committer: Reynold Xin <[email protected]>
Committed: Thu Mar 26 00:01:32 2015 -0700
----------------------------------------------------------------------
python/pyspark/sql/dataframe.py | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/0ba75998/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 5cb89da..bf7c47b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -985,6 +985,23 @@ class Column(object):
__getslice__ = substr
+ def inSet(self, *cols):
+ """ A boolean expression that is evaluated to true if the value of this
+ expression is contained by the evaluated values of the arguments.
+
+ >>> df[df.name.inSet("Bob", "Mike")].collect()
+ [Row(age=5, name=u'Bob')]
+ >>> df[df.age.inSet([1, 2, 3])].collect()
+ [Row(age=2, name=u'Alice')]
+ """
+ if len(cols) == 1 and isinstance(cols[0], (list, set)):
+ cols = cols[0]
+ cols = [c._jc if isinstance(c, Column) else
_create_column_from_literal(c) for c in cols]
+ sc = SparkContext._active_spark_context
+ jcols = ListConverter().convert(cols, sc._gateway._gateway_client)
+ jc = getattr(self._jc, "in")(sc._jvm.PythonUtils.toSeq(jcols))
+ return Column(jc)
+
# order
asc = _unary_op("asc", "Returns a sort expression based on the"
" ascending order of the given column name.")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]