This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new dedf15c6df5 [fix])(function) add function regexp_extract_or_null 
(#38296)
dedf15c6df5 is described below

commit dedf15c6df57a20b86b2772054449074ae4b1a64
Author: 苏小刚 <suxiaogang...@icloud.com>
AuthorDate: Thu Aug 15 10:24:12 2024 +0800

    [fix])(function) add function regexp_extract_or_null (#38296)
    
    ## Proposed changes
    
    Add function regexp_extract_or_null to be compatible with presto. The
    function is same as regexp_extract, except that it returns null when no
    match is found.
---
 be/src/vec/functions/function_regexp.cpp           | 18 ++++--
 be/test/vec/function/function_like_test.cpp        | 39 ++++++++++++
 .../doris/catalog/BuiltinScalarFunctions.java      |  2 +
 .../functions/scalar/RegexpExtractOrNull.java      | 73 ++++++++++++++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |  5 ++
 .../test_string_function_regexp.out                | 12 ++++
 .../test_string_function_regexp.groovy             |  5 ++
 7 files changed, 148 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/functions/function_regexp.cpp 
b/be/src/vec/functions/function_regexp.cpp
index 4da133a6f51..525d99b6cc7 100644
--- a/be/src/vec/functions/function_regexp.cpp
+++ b/be/src/vec/functions/function_regexp.cpp
@@ -184,8 +184,9 @@ struct RegexpReplaceOneImpl {
     }
 };
 
+template <bool ReturnNull>
 struct RegexpExtractImpl {
-    static constexpr auto name = "regexp_extract";
+    static constexpr auto name = ReturnNull ? "regexp_extract_or_null" : 
"regexp_extract";
     // 3 args
     static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
                              size_t input_rows_count, ColumnString::Chars& 
result_data,
@@ -201,7 +202,8 @@ struct RegexpExtractImpl {
             }
             const auto& index_data = index_col->get_int(i);
             if (index_data < 0) {
-                StringOP::push_empty_string(i, result_data, result_offset);
+                ReturnNull ? StringOP::push_null_string(i, result_data, 
result_offset, null_map)
+                           : StringOP::push_empty_string(i, result_data, 
result_offset);
                 continue;
             }
             _execute_inner_loop<false>(context, str_col, pattern_col, 
index_data, result_data,
@@ -220,7 +222,8 @@ struct RegexpExtractImpl {
         const auto& index_data = index_col->get_int(0);
         if (index_data < 0) {
             for (size_t i = 0; i < input_rows_count; ++i) {
-                StringOP::push_empty_string(i, result_data, result_offset);
+                ReturnNull ? StringOP::push_null_string(i, result_data, 
result_offset, null_map)
+                           : StringOP::push_empty_string(i, result_data, 
result_offset);
             }
             return;
         }
@@ -260,7 +263,8 @@ struct RegexpExtractImpl {
 
         int max_matches = 1 + re->NumberOfCapturingGroups();
         if (index_data >= max_matches) {
-            StringOP::push_empty_string(index_now, result_data, result_offset);
+            ReturnNull ? StringOP::push_null_string(index_now, result_data, 
result_offset, null_map)
+                       : StringOP::push_empty_string(index_now, result_data, 
result_offset);
             return;
         }
 
@@ -268,7 +272,8 @@ struct RegexpExtractImpl {
         bool success =
                 re->Match(str_sp, 0, str.size, re2::RE2::UNANCHORED, 
&matches[0], max_matches);
         if (!success) {
-            StringOP::push_empty_string(index_now, result_data, result_offset);
+            ReturnNull ? StringOP::push_null_string(index_now, result_data, 
result_offset, null_map)
+                       : StringOP::push_empty_string(index_now, result_data, 
result_offset);
             return;
         }
         const re2::StringPiece& match = matches[index_data];
@@ -486,7 +491,8 @@ public:
 
 void register_function_regexp_extract(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionRegexp<RegexpReplaceImpl>>();
-    factory.register_function<FunctionRegexp<RegexpExtractImpl>>();
+    factory.register_function<FunctionRegexp<RegexpExtractImpl<true>>>();
+    factory.register_function<FunctionRegexp<RegexpExtractImpl<false>>>();
     factory.register_function<FunctionRegexp<RegexpReplaceOneImpl>>();
     factory.register_function<FunctionRegexp<RegexpExtractAllImpl>>();
 }
diff --git a/be/test/vec/function/function_like_test.cpp 
b/be/test/vec/function/function_like_test.cpp
index 794bc8c5eae..e39b2cf43b0 100644
--- a/be/test/vec/function/function_like_test.cpp
+++ b/be/test/vec/function/function_like_test.cpp
@@ -155,6 +155,45 @@ TEST(FunctionLikeTest, regexp_extract) {
     }
 }
 
+TEST(FunctionLikeTest, regexp_extract_or_null) {
+    std::string func_name = "regexp_extract_or_null";
+
+    DataSet data_set = {{{std::string("x=a3&x=18abc&x=2&y=3&x=4"),
+                          std::string("x=([0-9]+)([a-z]+)"), (int64_t)0},
+                         std::string("x=18abc")},
+                        {{std::string("x=a3&x=18abc&x=2&y=3&x=4"),
+                          std::string("^x=([a-z]+)([0-9]+)"), (int64_t)0},
+                         std::string("x=a3")},
+                        {{std::string("x=a3&x=18abc&x=2&y=3&x=4"),
+                          std::string("^x=([a-z]+)([0-9]+)"), (int64_t)1},
+                         std::string("a")},
+                        {{std::string("http://a.m.baidu.com/i41915173660.htm";),
+                          std::string("i([0-9]+)"), (int64_t)0},
+                         std::string("i41915173660")},
+                        {{std::string("http://a.m.baidu.com/i41915173660.htm";),
+                          std::string("i([0-9]+)"), (int64_t)1},
+                         std::string("41915173660")},
+
+                        {{std::string("hitdecisiondlist"), 
std::string("(i)(.*?)(e)"), (int64_t)0},
+                         std::string("itde")},
+                        {{std::string("hitdecisiondlist"), 
std::string("(i)(.*?)(e)"), (int64_t)1},
+                         std::string("i")},
+                        {{std::string("hitdecisiondlist"), 
std::string("(i)(.*?)(e)"), (int64_t)2},
+                         std::string("td")},
+                        // null
+                        {{std::string("abc"), Null(), (int64_t)0}, Null()},
+                        {{Null(), std::string("i([0-9]+)"), (int64_t)0}, 
Null()}};
+
+    // pattern is constant value
+    InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted 
{TypeIndex::String},
+                                              TypeIndex::Int64};
+    for (const auto& line : data_set) {
+        DataSet const_pattern_dataset = {line};
+        static_cast<void>(check_function<DataTypeString, true>(func_name, 
const_pattern_input_types,
+                                                               
const_pattern_dataset));
+    }
+}
+
 TEST(FunctionLikeTest, regexp_extract_all) {
     std::string func_name = "regexp_extract_all";
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 76c6f066301..cddacdefc53 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -341,6 +341,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Random;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.RandomBytes;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtract;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractAll;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractOrNull;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplace;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplaceOne;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Repeat;
@@ -811,6 +812,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(Regexp.class, "regexp"),
             scalar(RegexpExtract.class, "regexp_extract"),
             scalar(RegexpExtractAll.class, "regexp_extract_all"),
+            scalar(RegexpExtractOrNull.class, "regexp_extract_or_null"),
             scalar(RegexpReplace.class, "regexp_replace"),
             scalar(RegexpReplaceOne.class, "regexp_replace_one"),
             scalar(Repeat.class, "repeat"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpExtractOrNull.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpExtractOrNull.java
new file mode 100644
index 00000000000..78f94db338d
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpExtractOrNull.java
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'regexp_extract_or_null'. This class is generated by 
GenerateFunction.
+ */
+public class RegexpExtractOrNull extends ScalarFunction
+        implements TernaryExpression, ExplicitlyCastableSignature, 
AlwaysNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
+                    .args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT, BigIntType.INSTANCE),
+            FunctionSignature.ret(StringType.INSTANCE)
+                    .args(StringType.INSTANCE, StringType.INSTANCE, 
BigIntType.INSTANCE)
+    );
+
+    /**
+     * constructor with 3 arguments.
+     */
+    public RegexpExtractOrNull(Expression arg0, Expression arg1, Expression 
arg2) {
+        super("regexp_extract_or_null", arg0, arg1, arg2);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public RegexpExtractOrNull withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 3);
+        return new RegexpExtractOrNull(children.get(0), children.get(1), 
children.get(2));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitRegexpExtractOrNull(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 78d8ca0f701..093bf9f1acf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -340,6 +340,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Random;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.RandomBytes;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtract;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractAll;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractOrNull;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplace;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplaceOne;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Repeat;
@@ -1726,6 +1727,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(regexpExtractAll, context);
     }
 
+    default R visitRegexpExtractOrNull(RegexpExtractOrNull 
regexpExtractOrNull, C context) {
+        return visitScalarFunction(regexpExtractOrNull, context);
+    }
+
     default R visitRegexpReplace(RegexpReplace regexpReplace, C context) {
         return visitScalarFunction(regexpReplace, context);
     }
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
index cfe2fd3eaf7..60719fded1a 100644
--- 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
@@ -49,6 +49,18 @@ b
 -- !sql --
 d
 
+-- !sql --
+
+
+-- !sql --
+b
+
+-- !sql --
+d
+
+-- !sql --
+\N
+
 -- !sql --
 ['18','17']
 
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
index 5926492ac4d..2066452b0d6 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
@@ -51,6 +51,11 @@ suite("test_string_function_regexp") {
 
     qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 
1);"
     qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 
2);"
+    qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 
3);"
+
+    qt_sql "SELECT regexp_extract_or_null('AbCdE', 
'([[:lower:]]+)C([[:lower:]]+)', 1);"
+    qt_sql "SELECT regexp_extract_or_null('AbCdE', 
'([[:lower:]]+)C([[:lower:]]+)', 2);"
+    qt_sql "SELECT regexp_extract_or_null('AbCdE', 
'([[:lower:]]+)C([[:lower:]]+)', 3);"
 
     qt_sql "SELECT regexp_extract_all('x=a3&x=18abc&x=2&y=3&x=4&x=17bcd', 
'x=([0-9]+)([a-z]+)');"
     qt_sql "SELECT regexp_extract_all('http://a.m.baidu.com/i41915i73660.htm', 
'i([0-9]+)');"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to