This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-c108335-hive-sql
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-c108335-hive-sql by 
this push:
     new 3d8a79efc7b Impl substr from zero (#49196)
3d8a79efc7b is described below

commit 3d8a79efc7b9e5ef2074db5d03b5594384811a66
Author: Socrates <suyit...@selectdb.com>
AuthorDate: Tue Mar 18 15:55:58 2025 +0800

    Impl substr from zero (#49196)
---
 be/src/vec/exec/scan/file_scanner.cpp              |   2 +-
 be/src/vec/functions/function_string.cpp           |   9 +-
 be/src/vec/functions/function_string.h             |  35 +++++--
 .../sink/writer/iceberg/partition_transformers.h   |   2 +-
 .../doris/catalog/BuiltinScalarFunctions.java      |   8 +-
 .../functions/scalar/SubstringForZero.java         | 113 +++++++++++++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |   5 +
 7 files changed, 158 insertions(+), 16 deletions(-)

diff --git a/be/src/vec/exec/scan/file_scanner.cpp 
b/be/src/vec/exec/scan/file_scanner.cpp
index 62e03e3dc5d..21e0ea2f195 100644
--- a/be/src/vec/exec/scan/file_scanner.cpp
+++ b/be/src/vec/exec/scan/file_scanner.cpp
@@ -845,7 +845,7 @@ void FileScanner::_truncate_char_or_varchar_column(Block* 
block, int idx, int le
     temp_arguments[2] = num_columns_without_result + 1; // len
     size_t result_column_id = num_columns_without_result + 2;
 
-    SubstringUtil::substring_execute(*block, temp_arguments, result_column_id, 
block->rows());
+    SubstringUtil<>::substring_execute(*block, temp_arguments, 
result_column_id, block->rows());
     auto res = 
ColumnNullable::create(block->get_by_position(result_column_id).column,
                                       null_map_column_ptr);
     block->replace_by_position(idx, std::move(res));
diff --git a/be/src/vec/functions/function_string.cpp 
b/be/src/vec/functions/function_string.cpp
index d87eaaa3dde..be166baa5f1 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -1195,8 +1195,10 @@ void register_function_string(SimpleFunctionFactory& 
factory) {
     factory.register_function<FunctionTrim<Trim2Impl<true, false, 
NameLTrimIn>>>();
     factory.register_function<FunctionTrim<Trim2Impl<false, true, 
NameRTrimIn>>>();
     factory.register_function<FunctionConvertTo>();
-    factory.register_function<FunctionSubstring<Substr3Impl>>();
-    factory.register_function<FunctionSubstring<Substr2Impl>>();
+    factory.register_function<FunctionSubstring<Substr3Impl<false>>>();
+    factory.register_function<FunctionSubstring<Substr2Impl<false>>>();
+    factory.register_function<FunctionSubstring<Substr3Impl<true>>>();
+    factory.register_function<FunctionSubstring<Substr2Impl<true>>>();
     factory.register_function<FunctionLeft>();
     factory.register_function<FunctionRight>();
     factory.register_function<FunctionNullOrEmpty>();
@@ -1242,7 +1244,8 @@ void register_function_string(SimpleFunctionFactory& 
factory) {
 
     factory.register_alias(FunctionLeft::name, "strleft");
     factory.register_alias(FunctionRight::name, "strright");
-    factory.register_alias(SubstringUtil::name, "substr");
+    factory.register_alias(SubstringUtil<>::name, "substr");
+    factory.register_alias(SubstringUtil<true>::name, "substr_for_zero");
     factory.register_alias(FunctionToLower::name, "lcase");
     factory.register_alias(FunctionToUpper::name, "ucase");
     factory.register_alias(FunctionStringDigestOneArg<MD5Sum>::name, "md5");
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 3b909f4a8d5..b38ca7c50d9 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -162,8 +162,9 @@ struct StringOP {
     }
 };
 
+template <bool is_for_zero = false>
 struct SubstringUtil {
-    static constexpr auto name = "substring";
+    static constexpr auto name = is_for_zero ? "substring_for_zero" : 
"substring";
 
     static void substring_execute(Block& block, const ColumnNumbers& 
arguments, uint32_t result,
                                   size_t input_rows_count) {
@@ -220,7 +221,7 @@ private:
         PMR::vector<size_t> index {&pool};
 
         if constexpr (is_const) {
-            if (start[0] == 0 || len[0] <= 0) {
+            if ((!is_for_zero && start[0] == 0) || len[0] <= 0) {
                 for (size_t i = 0; i < size; ++i) {
                     StringOP::push_empty_string(i, res_chars, res_offsets);
                 }
@@ -237,11 +238,17 @@ private:
             int char_len = simd::VStringFunctions::get_char_len(str_data, 
str_size);
             // return empty string if start > src.length
             // Here, start_value is compared against the length of the 
character.
-            if (start_value > char_len || str_size == 0 || start_value == 0 || 
len_value <= 0) {
+            if (start_value > char_len || str_size == 0 || (!is_for_zero && 
start_value == 0) ||
+                len_value <= 0) {
                 StringOP::push_empty_string(i, res_chars, res_offsets);
                 continue;
             }
 
+            // Handle Hive compatibility mode - treat start=0 as start=1
+            if (is_for_zero && start_value == 0) {
+                start_value = 1;
+            }
+
             size_t byte_pos = 0;
             index.clear();
             for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
@@ -287,7 +294,7 @@ private:
         res_offsets.resize(size);
 
         if constexpr (is_const) {
-            if (start[0] == 0 || len[0] <= 0) {
+            if ((!is_for_zero && start[0] == 0) || len[0] <= 0) {
                 for (size_t i = 0; i < size; ++i) {
                     StringOP::push_empty_string(i, res_chars, res_offsets);
                 }
@@ -305,6 +312,11 @@ private:
             int start_value = is_const ? start[0] : start[i];
             int len_value = is_const ? len[0] : len[i];
 
+            // Handle Hive compatibility mode - treat start=0 as start=1
+            if (is_for_zero && start_value == 0) {
+                start_value = 1;
+            }
+
             if (start_value > str_size || start_value < -str_size || str_size 
== 0 ||
                 len_value <= 0) {
                 StringOP::push_empty_string(i, res_chars, res_offsets);
@@ -616,7 +628,7 @@ private:
 template <typename Impl>
 class FunctionSubstring : public IFunction {
 public:
-    static constexpr auto name = SubstringUtil::name;
+    static constexpr auto name = Impl::name;
     String get_name() const override { return name; }
     static FunctionPtr create() { return 
std::make_shared<FunctionSubstring<Impl>>(); }
 
@@ -636,7 +648,9 @@ public:
     }
 };
 
+template <bool is_for_zero = false>
 struct Substr3Impl {
+    static constexpr auto name = SubstringUtil<is_for_zero>::name;
     static DataTypes get_variadic_argument_types() {
         return {std::make_shared<DataTypeString>(), 
std::make_shared<DataTypeInt32>(),
                 std::make_shared<DataTypeInt32>()};
@@ -645,12 +659,14 @@ struct Substr3Impl {
     static Status execute_impl(FunctionContext* context, Block& block,
                                const ColumnNumbers& arguments, uint32_t result,
                                size_t input_rows_count) {
-        SubstringUtil::substring_execute(block, arguments, result, 
input_rows_count);
+        SubstringUtil<is_for_zero>::substring_execute(block, arguments, 
result, input_rows_count);
         return Status::OK();
     }
 };
 
+template <bool is_for_zero = false>
 struct Substr2Impl {
+    static constexpr auto name = SubstringUtil<is_for_zero>::name;
     static DataTypes get_variadic_argument_types() {
         return {std::make_shared<DataTypeString>(), 
std::make_shared<DataTypeInt32>()};
     }
@@ -679,7 +695,8 @@ struct Substr2Impl {
         block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), 
"strlen"});
         ColumnNumbers temp_arguments = {arguments[0], arguments[1], 
block.columns() - 1};
 
-        SubstringUtil::substring_execute(block, temp_arguments, result, 
input_rows_count);
+        SubstringUtil<is_for_zero>::substring_execute(block, temp_arguments, 
result,
+                                                      input_rows_count);
         return Status::OK();
     }
 };
@@ -891,7 +908,7 @@ public:
         temp_arguments[1] = num_columns_without_result;
         temp_arguments[2] = arguments[1];
 
-        SubstringUtil::substring_execute(block, temp_arguments, result, 
input_rows_count);
+        SubstringUtil<false>::substring_execute(block, temp_arguments, result, 
input_rows_count);
         return Status::OK();
     }
 };
@@ -940,7 +957,7 @@ public:
         temp_arguments[0] = arguments[0];
         temp_arguments[1] = num_columns_without_result;
         temp_arguments[2] = num_columns_without_result + 1;
-        SubstringUtil::substring_execute(block, temp_arguments, result, 
input_rows_count);
+        SubstringUtil<>::substring_execute(block, temp_arguments, result, 
input_rows_count);
         return Status::OK();
     }
 };
diff --git a/be/src/vec/sink/writer/iceberg/partition_transformers.h 
b/be/src/vec/sink/writer/iceberg/partition_transformers.h
index 0b18ce24952..cd1cfa6f94c 100644
--- a/be/src/vec/sink/writer/iceberg/partition_transformers.h
+++ b/be/src/vec/sink/writer/iceberg/partition_transformers.h
@@ -177,7 +177,7 @@ public:
         temp_arguments[2] = 2; // width
         uint32_t result_column_id = 3;
 
-        SubstringUtil::substring_execute(temp_block, temp_arguments, 
result_column_id,
+        SubstringUtil<>::substring_execute(temp_block, temp_arguments, 
result_column_id,
                                          temp_block.rows());
         if (is_nullable) {
             auto res_column = ColumnNullable::create(
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index c0409ae3489..0445cc3155e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -426,6 +426,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement
 import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringForZero;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringIndex;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Tan;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Tanh;
@@ -493,7 +494,8 @@ import java.util.List;
 /**
  * Builtin scalar functions.
  * <p>
- * Note: Please ensure that this class only has some lists and no procedural 
code.
+ * Note: Please ensure that this class only has some lists and no procedural
+ * code.
  * It helps to be clear and concise.
  */
 public class BuiltinScalarFunctions implements FunctionHelper {
@@ -923,6 +925,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(SubBitmap.class, "sub_bitmap"),
             scalar(SubReplace.class, "sub_replace"),
             scalar(Substring.class, "substr", "substring"),
+            scalar(SubstringForZero.class, "substr_for_zero", 
"substring_for_zero"),
             scalar(SubstringIndex.class, "substring_index"),
             scalar(Tan.class, "tan"),
             scalar(Tanh.class, "tanh"),
@@ -991,5 +994,6 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
     public static final BuiltinScalarFunctions INSTANCE = new 
BuiltinScalarFunctions();
 
     // Note: Do not add any code here!
-    private BuiltinScalarFunctions() {}
+    private BuiltinScalarFunctions() {
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubstringForZero.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubstringForZero.java
new file mode 100644
index 00000000000..6c4f2994837
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubstringForZero.java
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * ScalarFunction 'substring_for_zero'. For compatibility with Hive.
+ */
+public class SubstringForZero extends ScalarFunction
+        implements ExplicitlyCastableSignature, PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT,
 IntegerType.INSTANCE),
+            
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, 
IntegerType.INSTANCE),
+            FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
+                    .args(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE, 
IntegerType.INSTANCE),
+            FunctionSignature.ret(StringType.INSTANCE)
+                    .args(StringType.INSTANCE, IntegerType.INSTANCE, 
IntegerType.INSTANCE));
+
+    /**
+     * constructor with 2 arguments.
+     */
+    public SubstringForZero(Expression arg0, Expression arg1) {
+        super("substring_for_zero", arg0, arg1, Literal.of(Integer.MAX_VALUE));
+    }
+
+    /**
+     * constructor with 3 arguments.
+     */
+    public SubstringForZero(Expression arg0, Expression arg1, Expression arg2) 
{
+        super("substring_for_zero", arg0, arg1, arg2);
+    }
+
+    @Override
+    public FunctionSignature computeSignature(FunctionSignature signature) {
+        Optional<Expression> length = arity() == 3
+                ? Optional.of(getArgument(2))
+                : Optional.empty();
+        DataType returnType = VarcharType.SYSTEM_DEFAULT;
+        if (length.isPresent() && length.get() instanceof IntegerLiteral) {
+            returnType = VarcharType.createVarcharType(((IntegerLiteral) 
length.get()).getValue());
+        }
+        return signature.withReturnType(returnType);
+    }
+
+    public Expression getSource() {
+        return child(0);
+    }
+
+    public Expression getPosition() {
+        return child(1);
+    }
+
+    public Optional<Expression> getLength() {
+        return arity() == 3 ? Optional.of(child(2)) : Optional.empty();
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public SubstringForZero withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 2
+                || children.size() == 3);
+        if (children.size() == 2) {
+            return new SubstringForZero(children.get(0), children.get(1));
+        } else {
+            return new SubstringForZero(children.get(0), children.get(1), 
children.get(2));
+        }
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitSubstringForZero(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index af8740047e6..02aa54e17b2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -424,6 +424,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement
 import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringForZero;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringIndex;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Tan;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Tanh;
@@ -2071,6 +2072,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(substring, context);
     }
 
+    default R visitSubstringForZero(SubstringForZero substringForZero, C 
context) {
+        return visitScalarFunction(substringForZero, context);
+    }
+
     default R visitSubstringIndex(SubstringIndex substringIndex, C context) {
         return visitScalarFunction(substringIndex, context);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to