yangzhg commented on a change in pull request #3638:
URL: https://github.com/apache/incubator-doris/pull/3638#discussion_r428406815



##########
File path: be/src/exprs/string_functions.cpp
##########
@@ -196,28 +196,56 @@ StringVal StringFunctions::lpad(
     if (str.is_null || len.is_null || pad.is_null || len.val < 0) {
         return StringVal::null();
     }
+
+    size_t str_char_size = 0;
+    size_t pad_char_size = 0;
+    size_t byte_pos = 0;
+    std::vector<size_t> str_index;
+    std::vector<size_t> pad_index;
+    for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+        char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+        str_index.push_back(byte_pos);
+        byte_pos += char_size;
+        ++str_char_size;
+    }
+    byte_pos = 0;
+    for (size_t i = 0, char_size = 0; i < pad.len; i += char_size) {
+        char_size = get_utf8_byte_length((unsigned)(pad.ptr)[i]);
+        pad_index.push_back(byte_pos);
+        byte_pos += char_size;
+        ++pad_char_size;
+    }
+    
     // Corner cases: Shrink the original string, or leave it alone.
     // TODO: Hive seems to go into an infinite loop if pad.len == 0,
     // so we should pay attention to Hive's future solution to be compatible.
-    if (len.val <= str.len || pad.len == 0) {
-        return StringVal(str.ptr, len.val);
+    if (len.val <= str_char_size || pad.len == 0) {
+        if (len.val >= str_index.size()) {
+            return StringVal::null();
+        }
+        return StringVal(str.ptr, str_index.at(len.val));
     }
 
     // TODO pengyubing
     // StringVal result = StringVal::create_temp_string_val(context, len.val);
-    StringVal result(context, len.val);
+    int32_t pad_byte_len = 0;
+    int32_t pad_times = (len.val - str_char_size) / pad_char_size;
+    int32_t pad_remainder = (len.val - str_char_size) % pad_char_size;
+    pad_byte_len = pad_times * pad.len;
+    pad_byte_len += pad_index.at(pad_remainder);

Review comment:
       operator[]  isn't bound-checked  if the requested position is out of 
range , it will hide the problem and make it difficault to find problem when 
when change the code




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to