zhiqiang-hhhh commented on code in PR #50149:
URL: https://github.com/apache/doris/pull/50149#discussion_r2052289517
##########
be/src/vec/functions/function_string.h:
##########
@@ -1804,24 +1804,34 @@ class FunctionSubstringIndex : public IFunction {
const auto* str_col = assert_cast<const
ColumnString*>(content_column.get());
- [[maybe_unused]] const auto& [delimiter_col, delimiter_const] =
+ // Handle both constant and non-constant delimiter parameters
+ ColumnPtr delimiter_column_ptr;
+ bool delimiter_const = false;
+ std::tie(delimiter_column_ptr, delimiter_const) =
unpack_if_const(block.get_by_position(arguments[1]).column);
- auto delimiter = delimiter_col->get_data_at(0);
- int32_t delimiter_size = delimiter.size;
+ const auto* delimiter_col = assert_cast<const
ColumnString*>(delimiter_column_ptr.get());
- [[maybe_unused]] const auto& [part_num_col, part_const] =
+ ColumnPtr part_num_column_ptr;
+ bool part_num_const = false;
+ std::tie(part_num_column_ptr, part_num_const) =
unpack_if_const(block.get_by_position(arguments[2]).column);
- auto part_number = *((int*)part_num_col->get_data_at(0).data);
+ const auto* part_num_col = part_num_column_ptr.get();
Review Comment:
const ColumnVector<int32>* part_num_col = assert_cast<const
ColumnVector<int32>*>(part_num_column_ptr.get());
##########
be/src/vec/functions/function_string.h:
##########
@@ -1847,12 +1857,10 @@ class FunctionSubstringIndex : public IFunction {
StringOP::push_value_string(std::string_view(str.data,
str.size), i,
res_chars, res_offsets);
}
- }
- } else {
- StringRef delimiter_ref(delimiter);
- StringSearch search(&delimiter_ref);
- for (size_t i = 0; i < input_rows_count; ++i) {
- auto str = str_col->get_data_at(i);
+ } else {
+ // For multi-character delimiters
+ StringRef delimiter_ref(delimiter);
+ StringSearch search(&delimiter_ref);
Review Comment:
maybe performance regression when second arg is a Literal
##########
be/src/vec/functions/function_string.h:
##########
@@ -1804,24 +1804,34 @@ class FunctionSubstringIndex : public IFunction {
const auto* str_col = assert_cast<const
ColumnString*>(content_column.get());
- [[maybe_unused]] const auto& [delimiter_col, delimiter_const] =
+ // Handle both constant and non-constant delimiter parameters
+ ColumnPtr delimiter_column_ptr;
+ bool delimiter_const = false;
+ std::tie(delimiter_column_ptr, delimiter_const) =
unpack_if_const(block.get_by_position(arguments[1]).column);
- auto delimiter = delimiter_col->get_data_at(0);
- int32_t delimiter_size = delimiter.size;
+ const auto* delimiter_col = assert_cast<const
ColumnString*>(delimiter_column_ptr.get());
- [[maybe_unused]] const auto& [part_num_col, part_const] =
+ ColumnPtr part_num_column_ptr;
+ bool part_num_const = false;
+ std::tie(part_num_column_ptr, part_num_const) =
unpack_if_const(block.get_by_position(arguments[2]).column);
- auto part_number = *((int*)part_num_col->get_data_at(0).data);
+ const auto* part_num_col = part_num_column_ptr.get();
- if (part_number == 0 || delimiter_size == 0) {
- for (size_t i = 0; i < input_rows_count; ++i) {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ auto str = str_col->get_data_at(i);
+ auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 :
i);
+ int32_t delimiter_size = delimiter.size;
+
+ const auto* part_num_data =
part_num_col->get_data_at(part_num_const ? 0 : i).data;
+ auto part_number = *reinterpret_cast<const int*>(part_num_data);
Review Comment:
do not use reinterpret_cast
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]