Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2285de734 -> 20efb7969


[SPARK-16324][SQL] regexp_extract should doc that it returns empty string when 
match fails

## What changes were proposed in this pull request?

Doc that regexp_extract returns empty string when regex or group does not match

## How was this patch tested?

Jenkins test, with a few new test cases

Author: Sean Owen <[email protected]>

Closes #14525 from srowen/SPARK-16324.

(cherry picked from commit 0578ff9681edbaab4ae68f67272dc3d4d890d53b)
Signed-off-by: Sean Owen <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20efb796
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20efb796
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20efb796

Branch: refs/heads/branch-2.0
Commit: 20efb7969ac8b313cd0895b57789e47d657453a4
Parents: 2285de7
Author: Sean Owen <[email protected]>
Authored: Wed Aug 10 10:14:43 2016 +0100
Committer: Sean Owen <[email protected]>
Committed: Wed Aug 10 10:14:51 2016 +0100

----------------------------------------------------------------------
 python/pyspark/sql/functions.py                                | 6 +++++-
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala   | 3 ++-
 .../test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 4 ++++
 3 files changed, 11 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/20efb796/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 8a01805..4ea83e2 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1440,11 +1440,15 @@ def split(str, pattern):
 @ignore_unicode_prefix
 @since(1.5)
 def regexp_extract(str, pattern, idx):
-    """Extract a specific(idx) group identified by a java regex, from the 
specified string column.
+    """Extract a specific group matched by a Java regex, from the specified 
string column.
+    If the regex did not match, or the specified group did not match, an empty 
string is returned.
 
     >>> df = spark.createDataFrame([('100-200',)], ['str'])
     >>> df.select(regexp_extract('str', '(\d+)-(\d+)', 1).alias('d')).collect()
     [Row(d=u'100')]
+    >>> df = spark.createDataFrame([('foo',)], ['str'])
+    >>> df.select(regexp_extract('str', '(\d+)', 1).alias('d')).collect()
+    [Row(d=u'')]
     >>> df = spark.createDataFrame([('aaaac',)], ['str'])
     >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
     [Row(d=u'')]

http://git-wip-us.apache.org/repos/asf/spark/blob/20efb796/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index ab09ef7..4e185b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2175,7 +2175,8 @@ object functions {
   def ltrim(e: Column): Column = withExpr {StringTrimLeft(e.expr) }
 
   /**
-   * Extract a specific(idx) group identified by a java regex, from the 
specified string column.
+   * Extract a specific group matched by a Java regex, from the specified 
string column.
+   * If the regex did not match, or the specified group did not match, an 
empty string is returned.
    *
    * @group string_funcs
    * @since 1.5.0

http://git-wip-us.apache.org/repos/asf/spark/blob/20efb796/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 6edd7b0..9be2de9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -95,6 +95,10 @@ class StringFunctionsSuite extends QueryTest with 
SharedSQLContext {
   test("non-matching optional group") {
     val df = Seq(Tuple1("aaaac")).toDF("s")
     checkAnswer(
+      df.select(regexp_extract($"s", "(foo)", 1)),
+      Row("")
+    )
+    checkAnswer(
       df.select(regexp_extract($"s", "(a+)(b)?(c)", 2)),
       Row("")
     )


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to