Repository: spark
Updated Branches:
refs/heads/master 276ef1c3c -> f53580297
[SPARK-6536] [PySpark] Column.inSet() in Python
```
>>> df[df.name.inSet("Bob", "Mike")].collect()
[Row(age=5, name=u'Bob')]
>>> df[df.age.inSet([1, 2, 3])].collect()
[Row(age=2, name=u'Alice')]
```
Author: Davies Liu <[email protected]>
Closes #5190 from davies/in and squashes the following commits:
6b73a47 [Davies Liu] Column.inSet() in Python
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f5358029
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f5358029
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f5358029
Branch: refs/heads/master
Commit: f535802977c5a3ce45894d89fdf59f8723f023c8
Parents: 276ef1c
Author: Davies Liu <[email protected]>
Authored: Thu Mar 26 00:01:24 2015 -0700
Committer: Reynold Xin <[email protected]>
Committed: Thu Mar 26 00:01:24 2015 -0700
----------------------------------------------------------------------
python/pyspark/sql/dataframe.py | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f5358029/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 5cb89da..bf7c47b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -985,6 +985,23 @@ class Column(object):
__getslice__ = substr
+ def inSet(self, *cols):
+ """ A boolean expression that is evaluated to true if the value of this
+ expression is contained by the evaluated values of the arguments.
+
+ >>> df[df.name.inSet("Bob", "Mike")].collect()
+ [Row(age=5, name=u'Bob')]
+ >>> df[df.age.inSet([1, 2, 3])].collect()
+ [Row(age=2, name=u'Alice')]
+ """
+ if len(cols) == 1 and isinstance(cols[0], (list, set)):
+ cols = cols[0]
+ cols = [c._jc if isinstance(c, Column) else
_create_column_from_literal(c) for c in cols]
+ sc = SparkContext._active_spark_context
+ jcols = ListConverter().convert(cols, sc._gateway._gateway_client)
+ jc = getattr(self._jc, "in")(sc._jvm.PythonUtils.toSeq(jcols))
+ return Column(jc)
+
# order
asc = _unary_op("asc", "Returns a sort expression based on the"
" ascending order of the given column name.")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]