Repository: spark Updated Branches: refs/heads/branch-2.0 2285de734 -> 20efb7969
[SPARK-16324][SQL] regexp_extract should doc that it returns empty string when match fails ## What changes were proposed in this pull request? Doc that regexp_extract returns empty string when regex or group does not match ## How was this patch tested? Jenkins test, with a few new test cases Author: Sean Owen <[email protected]> Closes #14525 from srowen/SPARK-16324. (cherry picked from commit 0578ff9681edbaab4ae68f67272dc3d4d890d53b) Signed-off-by: Sean Owen <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20efb796 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20efb796 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20efb796 Branch: refs/heads/branch-2.0 Commit: 20efb7969ac8b313cd0895b57789e47d657453a4 Parents: 2285de7 Author: Sean Owen <[email protected]> Authored: Wed Aug 10 10:14:43 2016 +0100 Committer: Sean Owen <[email protected]> Committed: Wed Aug 10 10:14:51 2016 +0100 ---------------------------------------------------------------------- python/pyspark/sql/functions.py | 6 +++++- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 3 ++- .../test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 4 ++++ 3 files changed, 11 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/20efb796/python/pyspark/sql/functions.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 8a01805..4ea83e2 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1440,11 +1440,15 @@ def split(str, pattern): @ignore_unicode_prefix @since(1.5) def regexp_extract(str, pattern, idx): - """Extract a specific(idx) group identified by a java regex, from the specified string column. + """Extract a specific group matched by a Java regex, from the specified string column. + If the regex did not match, or the specified group did not match, an empty string is returned. >>> df = spark.createDataFrame([('100-200',)], ['str']) >>> df.select(regexp_extract('str', '(\d+)-(\d+)', 1).alias('d')).collect() [Row(d=u'100')] + >>> df = spark.createDataFrame([('foo',)], ['str']) + >>> df.select(regexp_extract('str', '(\d+)', 1).alias('d')).collect() + [Row(d=u'')] >>> df = spark.createDataFrame([('aaaac',)], ['str']) >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect() [Row(d=u'')] http://git-wip-us.apache.org/repos/asf/spark/blob/20efb796/sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index ab09ef7..4e185b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2175,7 +2175,8 @@ object functions { def ltrim(e: Column): Column = withExpr {StringTrimLeft(e.expr) } /** - * Extract a specific(idx) group identified by a java regex, from the specified string column. + * Extract a specific group matched by a Java regex, from the specified string column. + * If the regex did not match, or the specified group did not match, an empty string is returned. * * @group string_funcs * @since 1.5.0 http://git-wip-us.apache.org/repos/asf/spark/blob/20efb796/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 6edd7b0..9be2de9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -95,6 +95,10 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { test("non-matching optional group") { val df = Seq(Tuple1("aaaac")).toDF("s") checkAnswer( + df.select(regexp_extract($"s", "(foo)", 1)), + Row("") + ) + checkAnswer( df.select(regexp_extract($"s", "(a+)(b)?(c)", 2)), Row("") ) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
