HappenLee commented on code in PR #40162: URL: https://github.com/apache/doris/pull/40162#discussion_r1740317261
########## be/src/vec/functions/function_string.h: ########## @@ -1556,86 +1556,106 @@ class FunctionStringPad : public IFunction { const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); const auto& padcol_offsets = padcol->get_offsets(); const auto& padcol_chars = padcol->get_chars(); + std::visit( + [&](auto str_const, auto len_const, auto pad_const) { + execute_utf8<str_const, len_const, pad_const>( + strcol_offsets, strcol_chars, col_len_data, padcol_offsets, + padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); + }, + vectorized::make_bool_variant(col_const[0]), + vectorized::make_bool_variant(col_const[1]), + vectorized::make_bool_variant(col_const[2])); - std::vector<size_t> str_index; + block.get_by_position(result).column = + ColumnNullable::create(std::move(res), std::move(null_map)); + return Status::OK(); + } + + template <bool str_const, bool len_const, bool pad_const> + void execute_utf8(const ColumnString::Offsets& strcol_offsets, + const ColumnString::Chars& strcol_chars, + const ColumnInt32::Container& col_len_data, + const ColumnString::Offsets& padcol_offsets, + const ColumnString::Chars& padcol_chars, ColumnString::Offsets& res_offsets, + ColumnString::Chars& res_chars, ColumnUInt8::Container& null_map_data, + size_t input_rows_count) const { std::vector<size_t> pad_index; + size_t const_pad_char_size = 0; + if constexpr (pad_const) { + const_pad_char_size = simd::VStringFunctions::get_char_len( + (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); + } fmt::memory_buffer buffer; - const bool str_const = col_const[0]; - const bool len_const = col_const[1]; - const bool pad_const = col_const[2]; + buffer.reserve(strcol_chars.size()); + size_t buffer_len = 0; + for (size_t i = 0; i < input_rows_count; ++i) { - str_index.clear(); - pad_index.clear(); + if constexpr (!pad_const) { + pad_index.clear(); + } buffer.clear(); - const auto len = col_len_data[index_check_const(i, len_const)]; + const auto len = col_len_data[index_check_const<len_const>(i)]; if (len < 0) { // return NULL when input length is invalid number null_map_data[i] = true; - StringOP::push_empty_string(i, res_chars, res_offsets); + res_offsets[i] = buffer_len; } else { - const auto str_idx = index_check_const(i, str_const); + const auto str_idx = index_check_const<str_const>(i); const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; - const auto pad_idx = index_check_const(i, pad_const); + const auto pad_idx = index_check_const<pad_const>(i); const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; - // get utf8 len - size_t str_char_size = simd::VStringFunctions::get_char_len((const char*)str_data, - str_len, str_index); - size_t pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, - pad_len, pad_index); - - if (len <= str_char_size) { - // truncate the input string - if (len < str_char_size) { - buffer.append(str_data, str_data + str_index[len]); - } else { - buffer.append(str_data, str_data + str_len); - } - StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, - res_chars, res_offsets); + auto [real_len, skip_chars] = simd::VStringFunctions::skip_leading_utf8( + (const char*)str_data, (const char*)str_data + str_len, len); + if (len <= skip_chars) { Review Comment: seems wired. `skip_chars` seems always `<= len` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org