Repository: spark Updated Branches: refs/heads/master ec18cd0af -> ec79183ac
[SPARK-16340][SQL] Support column arguments for `regexp_replace` Dataset operation ## What changes were proposed in this pull request? Currently, `regexp_replace` function supports `Column` arguments in a query. This PR supports that in a `Dataset` operation, too. ## How was this patch tested? Pass the Jenkins tests with a updated testcase. Author: Dongjoon Hyun <[email protected]> Closes #14060 from dongjoon-hyun/SPARK-16340. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ec79183a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ec79183a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ec79183a Branch: refs/heads/master Commit: ec79183ac5b842e49baa40ea1c9a72ce8f108fe5 Parents: ec18cd0 Author: Dongjoon Hyun <[email protected]> Authored: Tue Jul 5 22:11:40 2016 -0700 Committer: Reynold Xin <[email protected]> Committed: Tue Jul 5 22:11:40 2016 -0700 ---------------------------------------------------------------------- .../src/main/scala/org/apache/spark/sql/functions.scala | 10 ++++++++++ .../scala/org/apache/spark/sql/StringFunctionsSuite.scala | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ec79183a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c8782df..45d5d05 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2194,6 +2194,16 @@ object functions { } /** + * Replace all substrings of the specified string value that match regexp with rep. + * + * @group string_funcs + * @since 2.1.0 + */ + def regexp_replace(e: Column, pattern: Column, replacement: Column): Column = withExpr { + RegExpReplace(e.expr, pattern.expr, replacement.expr) + } + + /** * Decodes a BASE64 encoded string column and returns it as a binary column. * This is the reverse of base64. * http://git-wip-us.apache.org/repos/asf/spark/blob/ec79183a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index dff4226..3edd988 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -77,8 +77,10 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { checkAnswer( df.select( regexp_replace($"a", "(\\d+)", "num"), + regexp_replace($"a", $"b", $"c"), regexp_extract($"a", "(\\d+)-(\\d+)", 1)), - Row("num-num", "100") :: Row("num-num", "100") :: Row("num-num", "100") :: Nil) + Row("num-num", "300", "100") :: Row("num-num", "400", "100") :: + Row("num-num", "400-400", "100") :: Nil) // for testing the mutable state of the expression in code gen. // This is a hack way to enable the codegen, thus the codegen is enable by default, --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
