github-actions[bot] commented on code in PR #38954: URL: https://github.com/apache/doris/pull/38954#discussion_r1708775995
########## be/src/vec/aggregate_functions/aggregate_function_window_funnel.h: ########## @@ -72,138 +75,238 @@ } } -template <typename DateValueType, typename NativeType> +template <TypeIndex TYPE_INDEX, typename NativeType> struct WindowFunnelState { - std::vector<std::pair<DateValueType, int>> events; - int max_event_level; - bool sorted; + using DateValueType = std::conditional_t<TYPE_INDEX == TypeIndex::DateTimeV2, + DateV2Value<DateTimeV2ValueType>, VecDateTimeValue>; + int event_count = 0; int64_t window; - WindowFunnelMode window_funnel_mode; bool enable_mode; + WindowFunnelMode window_funnel_mode; + mutable MutableColumnPtr timestamp_column; + mutable MutableColumns event_columns; + ColumnVector<NativeType>::Container* timestamp_column_data; + std::vector<ColumnVector<UInt8>::Container*> event_columns_datas; + Block block; + SortDescription sort_description {1}; + bool sorted; + bool is_merge; WindowFunnelState() { - sorted = true; - max_event_level = 0; + event_count = 0; window = 0; window_funnel_mode = WindowFunnelMode::INVALID; + + sort_description[0].column_number = 0; + sort_description[0].direction = 1; + sort_description[0].nulls_direction = -1; + sorted = false; + is_merge = false; + } + WindowFunnelState(int arg_event_count) : WindowFunnelState() { + timestamp_column = ColumnVector<NativeType>::create(); + timestamp_column_data = + &assert_cast<ColumnVector<NativeType>&>(*timestamp_column).get_data(); + event_count = arg_event_count; + event_columns.resize(event_count); + for (int i = 0; i < event_count; i++) { + event_columns[i] = ColumnVector<UInt8>::create(); + event_columns_datas.emplace_back( + &assert_cast<ColumnVector<UInt8>&>(*event_columns[i]).get_data()); + } } void reset() { - sorted = true; - max_event_level = 0; window = 0; - events.shrink_to_fit(); + timestamp_column->clear(); + for (auto& column : event_columns) { + column->clear(); + } + block.clear_column_data(); + sorted = false; + is_merge = false; } - void add(const DateValueType& timestamp, int event_idx, int event_num, int64_t win, - WindowFunnelMode mode) { + void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, WindowFunnelMode mode) { window = win; - max_event_level = event_num; window_funnel_mode = enable_mode ? mode : WindowFunnelMode::DEFAULT; - if (sorted && events.size() > 0) { - if (events.back().first == timestamp) { - sorted = events.back().second <= event_idx; - } else { - sorted = events.back().first < timestamp; - } + timestamp_column_data->push_back( + assert_cast<const ColumnVector<NativeType>&>(*arg_columns[2]).get_data()[row_num]); + for (int i = 0; i < event_count; i++) { + event_columns_datas[i]->push_back( + assert_cast<const ColumnVector<UInt8>&>(*arg_columns[3 + i]) + .get_data()[row_num]); } - events.emplace_back(timestamp, event_idx); } void sort() { if (sorted) { return; } - std::stable_sort(events.begin(), events.end()); - } + if (!is_merge) { + Block tmp_block; + tmp_block.insert({std::move(timestamp_column), + DataTypeFactory::instance().create_data_type(TYPE_INDEX), + "timestamp"}); + for (int i = 0; i < event_count; i++) { + tmp_block.insert({std::move(event_columns[i]), + DataTypeFactory::instance().create_data_type(TypeIndex::UInt8), + "event_" + std::to_string(i)}); + } - int get() const { - if (max_event_level == 0) { - return 0; + block = tmp_block.clone_without_columns(); + sort_block(tmp_block, block, sort_description, 0); + } else { + auto tmp_block = block.clone_without_columns(); + sort_block(block, tmp_block, sort_description, 0); + block = std::move(tmp_block); } - std::vector<std::optional<std::pair<DateValueType, DateValueType>>> events_timestamp( - max_event_level); - bool is_first_set = false; - for (int64_t i = 0; i < events.size(); i++) { - const int& event_idx = events[i].second; - const DateValueType& timestamp = events[i].first; - if (event_idx == 0) { - events_timestamp[0] = {timestamp, timestamp}; - is_first_set = true; - continue; - } - if (window_funnel_mode == WindowFunnelMode::DEDUPLICATION && - events_timestamp[event_idx].has_value()) { - break; - } - if (events_timestamp[event_idx - 1].has_value()) { - const DateValueType& first_timestamp = - events_timestamp[event_idx - 1].value().first; - DateValueType last_timestamp = first_timestamp; - TimeInterval interval(SECOND, window, false); - last_timestamp.template date_add_interval<SECOND>(interval); - - if (window_funnel_mode != WindowFunnelMode::INCREASE) { - if (timestamp <= last_timestamp) { - events_timestamp[event_idx] = {first_timestamp, timestamp}; - if (event_idx + 1 == max_event_level) { - // Usually, max event level is small. - return max_event_level; + sorted = true; + } + + template <WindowFunnelMode WINDOW_FUNNEL_MODE> + int _match_event_list(size_t& start_row, size_t row_count, Review Comment: warning: function '_match_event_list' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp int _match_event_list(size_t& start_row, size_t row_count, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:168:** 86 lines including whitespace and comments (threshold 80) ```cpp int _match_event_list(size_t& start_row, size_t row_count, ^ ``` </details> ########## be/src/vec/aggregate_functions/aggregate_function_window_funnel.h: ########## @@ -72,138 +75,238 @@ WindowFunnelMode string_to_window_funnel_mode(const String& string) { } } -template <typename DateValueType, typename NativeType> +template <TypeIndex TYPE_INDEX, typename NativeType> struct WindowFunnelState { - std::vector<std::pair<DateValueType, int>> events; - int max_event_level; - bool sorted; + using DateValueType = std::conditional_t<TYPE_INDEX == TypeIndex::DateTimeV2, + DateV2Value<DateTimeV2ValueType>, VecDateTimeValue>; + int event_count = 0; int64_t window; - WindowFunnelMode window_funnel_mode; bool enable_mode; + WindowFunnelMode window_funnel_mode; + mutable MutableColumnPtr timestamp_column; + mutable MutableColumns event_columns; + ColumnVector<NativeType>::Container* timestamp_column_data; + std::vector<ColumnVector<UInt8>::Container*> event_columns_datas; + Block block; + SortDescription sort_description {1}; + bool sorted; + bool is_merge; WindowFunnelState() { - sorted = true; - max_event_level = 0; + event_count = 0; window = 0; window_funnel_mode = WindowFunnelMode::INVALID; + + sort_description[0].column_number = 0; + sort_description[0].direction = 1; + sort_description[0].nulls_direction = -1; + sorted = false; + is_merge = false; + } + WindowFunnelState(int arg_event_count) : WindowFunnelState() { + timestamp_column = ColumnVector<NativeType>::create(); + timestamp_column_data = + &assert_cast<ColumnVector<NativeType>&>(*timestamp_column).get_data(); + event_count = arg_event_count; + event_columns.resize(event_count); + for (int i = 0; i < event_count; i++) { + event_columns[i] = ColumnVector<UInt8>::create(); + event_columns_datas.emplace_back( + &assert_cast<ColumnVector<UInt8>&>(*event_columns[i]).get_data()); + } } void reset() { - sorted = true; - max_event_level = 0; window = 0; - events.shrink_to_fit(); + timestamp_column->clear(); + for (auto& column : event_columns) { + column->clear(); + } + block.clear_column_data(); + sorted = false; + is_merge = false; } - void add(const DateValueType& timestamp, int event_idx, int event_num, int64_t win, - WindowFunnelMode mode) { + void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, WindowFunnelMode mode) { window = win; - max_event_level = event_num; window_funnel_mode = enable_mode ? mode : WindowFunnelMode::DEFAULT; - if (sorted && events.size() > 0) { - if (events.back().first == timestamp) { - sorted = events.back().second <= event_idx; - } else { - sorted = events.back().first < timestamp; - } + timestamp_column_data->push_back( + assert_cast<const ColumnVector<NativeType>&>(*arg_columns[2]).get_data()[row_num]); + for (int i = 0; i < event_count; i++) { + event_columns_datas[i]->push_back( + assert_cast<const ColumnVector<UInt8>&>(*arg_columns[3 + i]) + .get_data()[row_num]); } - events.emplace_back(timestamp, event_idx); } void sort() { if (sorted) { return; } - std::stable_sort(events.begin(), events.end()); - } + if (!is_merge) { + Block tmp_block; + tmp_block.insert({std::move(timestamp_column), + DataTypeFactory::instance().create_data_type(TYPE_INDEX), + "timestamp"}); + for (int i = 0; i < event_count; i++) { + tmp_block.insert({std::move(event_columns[i]), + DataTypeFactory::instance().create_data_type(TypeIndex::UInt8), + "event_" + std::to_string(i)}); + } - int get() const { - if (max_event_level == 0) { - return 0; + block = tmp_block.clone_without_columns(); + sort_block(tmp_block, block, sort_description, 0); + } else { + auto tmp_block = block.clone_without_columns(); + sort_block(block, tmp_block, sort_description, 0); + block = std::move(tmp_block); } - std::vector<std::optional<std::pair<DateValueType, DateValueType>>> events_timestamp( - max_event_level); - bool is_first_set = false; - for (int64_t i = 0; i < events.size(); i++) { - const int& event_idx = events[i].second; - const DateValueType& timestamp = events[i].first; - if (event_idx == 0) { - events_timestamp[0] = {timestamp, timestamp}; - is_first_set = true; - continue; - } - if (window_funnel_mode == WindowFunnelMode::DEDUPLICATION && - events_timestamp[event_idx].has_value()) { - break; - } - if (events_timestamp[event_idx - 1].has_value()) { - const DateValueType& first_timestamp = - events_timestamp[event_idx - 1].value().first; - DateValueType last_timestamp = first_timestamp; - TimeInterval interval(SECOND, window, false); - last_timestamp.template date_add_interval<SECOND>(interval); - - if (window_funnel_mode != WindowFunnelMode::INCREASE) { - if (timestamp <= last_timestamp) { - events_timestamp[event_idx] = {first_timestamp, timestamp}; - if (event_idx + 1 == max_event_level) { - // Usually, max event level is small. - return max_event_level; + sorted = true; + } + + template <WindowFunnelMode WINDOW_FUNNEL_MODE> + int _match_event_list(size_t& start_row, size_t row_count, Review Comment: warning: function '_match_event_list' has cognitive complexity of 63 (threshold 50) [readability-function-cognitive-complexity] ```cpp int _match_event_list(size_t& start_row, size_t row_count, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:181:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (match_row < row_count) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:190:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp for (; column_idx < event_count + 1; column_idx++) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:194:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::FIXED) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:196:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (event_data[match_row] == 1) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:199:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp if (current_timestamp <= end_timestamp) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:207:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (match_row < row_count) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:211:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (is_matched) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:212:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::INCREASE) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:216:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (!is_matched) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:219:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::INCREASE) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:223:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::DEDUPLICATION) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:225:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp if (match_row != last_match_row + 1) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:227:** +6, including nesting penalty of 5, nesting level increased to 6 ```cpp for (int tmp_column_idx = 1; tmp_column_idx < column_idx; ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:237:** +7, including nesting penalty of 6, nesting level increased to 7 ```cpp if (dup_match_row >= rows_to_match) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:243:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp if (is_dup) { ^ ``` **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:249:** +1, nesting level increased to 3 ```cpp } else { ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org