HappenLee commented on code in PR #19257:
URL: https://github.com/apache/doris/pull/19257#discussion_r1186959431


##########
be/src/vec/functions/function_string.h:
##########
@@ -170,71 +170,126 @@ struct SubstringUtil {
                         const PaddedPODArray<Int32>& start, const 
PaddedPODArray<Int32>& len,
                         NullMap& null_map, ColumnString::Chars& res_chars,
                         ColumnString::Offsets& res_offsets) {
-        int size = offsets.size();
+        size_t size = offsets.size();
         res_offsets.resize(size);
         res_chars.reserve(chars.size());
 
         std::array<std::byte, 128 * 1024> buf;
         PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
         PMR::vector<size_t> index {&pool};
 
-        PMR::vector<std::pair<const unsigned char*, int>> strs(&pool);
-        strs.resize(size);
         auto* __restrict data_ptr = chars.data();
         auto* __restrict offset_ptr = offsets.data();
-        for (int i = 0; i < size; ++i) {
-            strs[i].first = data_ptr + offset_ptr[i - 1];
-            strs[i].second = offset_ptr[i] - offset_ptr[i - 1];
-        }
-
-        for (int i = 0; i < size; ++i) {
-            auto [raw_str, str_size] = strs[i];
-            const auto& start_value = start[index_check_const(i, Const)];
-            const auto& len_value = len[index_check_const(i, Const)];
 
-            // return empty string if start > src.length
-            if (start_value > str_size || str_size == 0 || start_value == 0 || 
len_value <= 0) {
-                StringOP::push_empty_string(i, res_chars, res_offsets);
-                continue;
-            }
-            // reference to string_function.cpp: substring
-            size_t byte_pos = 0;
-            index.clear();
-            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
-                char_size = UTF8_BYTE_LENGTH[(unsigned char)(raw_str)[j]];
-                index.push_back(j);
-                if (start_value > 0 && index.size() > start_value + len_value) 
{
-                    break;
+        if constexpr (Const) {
+            const auto start_value = start[0];
+            const auto len_value = len[0];
+            if (start_value == 0 || len_value <= 0) {
+                for (size_t i = 0; i < size; ++i) {
+                    StringOP::push_empty_string(i, res_chars, res_offsets);
                 }
-            }
+            } else {
+                for (size_t i = 0; i < size; ++i) {
+                    const int str_size = offset_ptr[i] - offset_ptr[i - 1];
+                    const uint8_t* raw_str = data_ptr + offset_ptr[i - 1];
+                    // return empty string if start > src.length
+                    if (start_value > str_size || start_value < -str_size || 
str_size == 0) {
+                        StringOP::push_empty_string(i, res_chars, res_offsets);
+                        continue;
+                    }
+                    // reference to string_function.cpp: substring
+                    size_t byte_pos = 0;
+                    index.clear();
+                    for (size_t j = 0, char_size = 0;
+                         j < str_size &&
+                         (start_value <= 0 || index.size() <= start_value + 
len_value);
+                         j += char_size) {
+                        char_size = UTF8_BYTE_LENGTH[(unsigned 
char)(raw_str)[j]];
+                        index.push_back(j);
+                    }
 
-            int fixed_pos = start_value;
-            if (fixed_pos < -(int)index.size()) {
-                StringOP::push_empty_string(i, res_chars, res_offsets);
-                continue;
-            }
-            if (fixed_pos < 0) {
-                fixed_pos = index.size() + fixed_pos + 1;
+                    int fixed_pos = start_value;
+                    if (fixed_pos < 0) {
+                        fixed_pos = str_size + fixed_pos + 1;
+                    } else if (fixed_pos > index.size()) {
+                        StringOP::push_null_string(i, res_chars, res_offsets, 
null_map);
+                        continue;
+                    }
+
+                    byte_pos = index[fixed_pos - 1];
+                    int fixed_len = str_size - byte_pos;
+                    if (fixed_pos + len_value <= index.size()) {
+                        fixed_len = index[fixed_pos + len_value - 1] - 
byte_pos;
+                    }
+
+                    if (byte_pos <= str_size && fixed_len > 0) {
+                        // return StringRef(str.data + byte_pos, fixed_len);
+                        StringOP::push_value_string(
+                                std::string_view {reinterpret_cast<const 
char*>(raw_str + byte_pos),
+                                                  (size_t)fixed_len},
+                                i, res_chars, res_offsets);
+                    } else {
+                        StringOP::push_empty_string(i, res_chars, res_offsets);
+                    }
+                }
             }
-            if (fixed_pos > index.size()) {
-                StringOP::push_null_string(i, res_chars, res_offsets, 
null_map);
-                continue;
+        } else {
+            PMR::vector<std::pair<const unsigned char*, int>> strs(&pool);
+            strs.resize(size);
+            for (int i = 0; i < size; ++i) {
+                strs[i].first = data_ptr + offset_ptr[i - 1];
+                strs[i].second = offset_ptr[i] - offset_ptr[i - 1];
             }
 
-            byte_pos = index[fixed_pos - 1];
-            int fixed_len = str_size - byte_pos;
-            if (fixed_pos + len_value <= index.size()) {
-                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
-            }
+            for (size_t i = 0; i < size; ++i) {
+                auto [raw_str, str_size] = strs[i];
+                const auto& start_value = start[index_check_const(i, Const)];

Review Comment:
   do not need call `index_check_const` ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to