github-actions[bot] commented on code in PR #38954:
URL: https://github.com/apache/doris/pull/38954#discussion_r1708775995


##########
be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:
##########
@@ -72,138 +75,238 @@
     }
 }
 
-template <typename DateValueType, typename NativeType>
+template <TypeIndex TYPE_INDEX, typename NativeType>
 struct WindowFunnelState {
-    std::vector<std::pair<DateValueType, int>> events;
-    int max_event_level;
-    bool sorted;
+    using DateValueType = std::conditional_t<TYPE_INDEX == 
TypeIndex::DateTimeV2,
+                                             DateV2Value<DateTimeV2ValueType>, 
VecDateTimeValue>;
+    int event_count = 0;
     int64_t window;
-    WindowFunnelMode window_funnel_mode;
     bool enable_mode;
+    WindowFunnelMode window_funnel_mode;
+    mutable MutableColumnPtr timestamp_column;
+    mutable MutableColumns event_columns;
+    ColumnVector<NativeType>::Container* timestamp_column_data;
+    std::vector<ColumnVector<UInt8>::Container*> event_columns_datas;
+    Block block;
+    SortDescription sort_description {1};
+    bool sorted;
+    bool is_merge;
 
     WindowFunnelState() {
-        sorted = true;
-        max_event_level = 0;
+        event_count = 0;
         window = 0;
         window_funnel_mode = WindowFunnelMode::INVALID;
+
+        sort_description[0].column_number = 0;
+        sort_description[0].direction = 1;
+        sort_description[0].nulls_direction = -1;
+        sorted = false;
+        is_merge = false;
+    }
+    WindowFunnelState(int arg_event_count) : WindowFunnelState() {
+        timestamp_column = ColumnVector<NativeType>::create();
+        timestamp_column_data =
+                
&assert_cast<ColumnVector<NativeType>&>(*timestamp_column).get_data();
+        event_count = arg_event_count;
+        event_columns.resize(event_count);
+        for (int i = 0; i < event_count; i++) {
+            event_columns[i] = ColumnVector<UInt8>::create();
+            event_columns_datas.emplace_back(
+                    
&assert_cast<ColumnVector<UInt8>&>(*event_columns[i]).get_data());
+        }
     }
 
     void reset() {
-        sorted = true;
-        max_event_level = 0;
         window = 0;
-        events.shrink_to_fit();
+        timestamp_column->clear();
+        for (auto& column : event_columns) {
+            column->clear();
+        }
+        block.clear_column_data();
+        sorted = false;
+        is_merge = false;
     }
 
-    void add(const DateValueType& timestamp, int event_idx, int event_num, 
int64_t win,
-             WindowFunnelMode mode) {
+    void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, 
WindowFunnelMode mode) {
         window = win;
-        max_event_level = event_num;
         window_funnel_mode = enable_mode ? mode : WindowFunnelMode::DEFAULT;
 
-        if (sorted && events.size() > 0) {
-            if (events.back().first == timestamp) {
-                sorted = events.back().second <= event_idx;
-            } else {
-                sorted = events.back().first < timestamp;
-            }
+        timestamp_column_data->push_back(
+                assert_cast<const 
ColumnVector<NativeType>&>(*arg_columns[2]).get_data()[row_num]);
+        for (int i = 0; i < event_count; i++) {
+            event_columns_datas[i]->push_back(
+                    assert_cast<const ColumnVector<UInt8>&>(*arg_columns[3 + 
i])
+                            .get_data()[row_num]);
         }
-        events.emplace_back(timestamp, event_idx);
     }
 
     void sort() {
         if (sorted) {
             return;
         }
-        std::stable_sort(events.begin(), events.end());
-    }
+        if (!is_merge) {
+            Block tmp_block;
+            tmp_block.insert({std::move(timestamp_column),
+                              
DataTypeFactory::instance().create_data_type(TYPE_INDEX),
+                              "timestamp"});
+            for (int i = 0; i < event_count; i++) {
+                tmp_block.insert({std::move(event_columns[i]),
+                                  
DataTypeFactory::instance().create_data_type(TypeIndex::UInt8),
+                                  "event_" + std::to_string(i)});
+            }
 
-    int get() const {
-        if (max_event_level == 0) {
-            return 0;
+            block = tmp_block.clone_without_columns();
+            sort_block(tmp_block, block, sort_description, 0);
+        } else {
+            auto tmp_block = block.clone_without_columns();
+            sort_block(block, tmp_block, sort_description, 0);
+            block = std::move(tmp_block);
         }
-        std::vector<std::optional<std::pair<DateValueType, DateValueType>>> 
events_timestamp(
-                max_event_level);
-        bool is_first_set = false;
-        for (int64_t i = 0; i < events.size(); i++) {
-            const int& event_idx = events[i].second;
-            const DateValueType& timestamp = events[i].first;
-            if (event_idx == 0) {
-                events_timestamp[0] = {timestamp, timestamp};
-                is_first_set = true;
-                continue;
-            }
-            if (window_funnel_mode == WindowFunnelMode::DEDUPLICATION &&
-                events_timestamp[event_idx].has_value()) {
-                break;
-            }
-            if (events_timestamp[event_idx - 1].has_value()) {
-                const DateValueType& first_timestamp =
-                        events_timestamp[event_idx - 1].value().first;
-                DateValueType last_timestamp = first_timestamp;
-                TimeInterval interval(SECOND, window, false);
-                last_timestamp.template date_add_interval<SECOND>(interval);
-
-                if (window_funnel_mode != WindowFunnelMode::INCREASE) {
-                    if (timestamp <= last_timestamp) {
-                        events_timestamp[event_idx] = {first_timestamp, 
timestamp};
-                        if (event_idx + 1 == max_event_level) {
-                            // Usually, max event level is small.
-                            return max_event_level;
+        sorted = true;
+    }
+
+    template <WindowFunnelMode WINDOW_FUNNEL_MODE>
+    int _match_event_list(size_t& start_row, size_t row_count,

Review Comment:
   warning: function '_match_event_list' exceeds recommended size/complexity 
thresholds [readability-function-size]
   ```cpp
       int _match_event_list(size_t& start_row, size_t row_count,
           ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:168:** 
86 lines including whitespace and comments (threshold 80)
   ```cpp
       int _match_event_list(size_t& start_row, size_t row_count,
           ^
   ```
   
   </details>
   



##########
be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:
##########
@@ -72,138 +75,238 @@ WindowFunnelMode string_to_window_funnel_mode(const 
String& string) {
     }
 }
 
-template <typename DateValueType, typename NativeType>
+template <TypeIndex TYPE_INDEX, typename NativeType>
 struct WindowFunnelState {
-    std::vector<std::pair<DateValueType, int>> events;
-    int max_event_level;
-    bool sorted;
+    using DateValueType = std::conditional_t<TYPE_INDEX == 
TypeIndex::DateTimeV2,
+                                             DateV2Value<DateTimeV2ValueType>, 
VecDateTimeValue>;
+    int event_count = 0;
     int64_t window;
-    WindowFunnelMode window_funnel_mode;
     bool enable_mode;
+    WindowFunnelMode window_funnel_mode;
+    mutable MutableColumnPtr timestamp_column;
+    mutable MutableColumns event_columns;
+    ColumnVector<NativeType>::Container* timestamp_column_data;
+    std::vector<ColumnVector<UInt8>::Container*> event_columns_datas;
+    Block block;
+    SortDescription sort_description {1};
+    bool sorted;
+    bool is_merge;
 
     WindowFunnelState() {
-        sorted = true;
-        max_event_level = 0;
+        event_count = 0;
         window = 0;
         window_funnel_mode = WindowFunnelMode::INVALID;
+
+        sort_description[0].column_number = 0;
+        sort_description[0].direction = 1;
+        sort_description[0].nulls_direction = -1;
+        sorted = false;
+        is_merge = false;
+    }
+    WindowFunnelState(int arg_event_count) : WindowFunnelState() {
+        timestamp_column = ColumnVector<NativeType>::create();
+        timestamp_column_data =
+                
&assert_cast<ColumnVector<NativeType>&>(*timestamp_column).get_data();
+        event_count = arg_event_count;
+        event_columns.resize(event_count);
+        for (int i = 0; i < event_count; i++) {
+            event_columns[i] = ColumnVector<UInt8>::create();
+            event_columns_datas.emplace_back(
+                    
&assert_cast<ColumnVector<UInt8>&>(*event_columns[i]).get_data());
+        }
     }
 
     void reset() {
-        sorted = true;
-        max_event_level = 0;
         window = 0;
-        events.shrink_to_fit();
+        timestamp_column->clear();
+        for (auto& column : event_columns) {
+            column->clear();
+        }
+        block.clear_column_data();
+        sorted = false;
+        is_merge = false;
     }
 
-    void add(const DateValueType& timestamp, int event_idx, int event_num, 
int64_t win,
-             WindowFunnelMode mode) {
+    void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, 
WindowFunnelMode mode) {
         window = win;
-        max_event_level = event_num;
         window_funnel_mode = enable_mode ? mode : WindowFunnelMode::DEFAULT;
 
-        if (sorted && events.size() > 0) {
-            if (events.back().first == timestamp) {
-                sorted = events.back().second <= event_idx;
-            } else {
-                sorted = events.back().first < timestamp;
-            }
+        timestamp_column_data->push_back(
+                assert_cast<const 
ColumnVector<NativeType>&>(*arg_columns[2]).get_data()[row_num]);
+        for (int i = 0; i < event_count; i++) {
+            event_columns_datas[i]->push_back(
+                    assert_cast<const ColumnVector<UInt8>&>(*arg_columns[3 + 
i])
+                            .get_data()[row_num]);
         }
-        events.emplace_back(timestamp, event_idx);
     }
 
     void sort() {
         if (sorted) {
             return;
         }
-        std::stable_sort(events.begin(), events.end());
-    }
+        if (!is_merge) {
+            Block tmp_block;
+            tmp_block.insert({std::move(timestamp_column),
+                              
DataTypeFactory::instance().create_data_type(TYPE_INDEX),
+                              "timestamp"});
+            for (int i = 0; i < event_count; i++) {
+                tmp_block.insert({std::move(event_columns[i]),
+                                  
DataTypeFactory::instance().create_data_type(TypeIndex::UInt8),
+                                  "event_" + std::to_string(i)});
+            }
 
-    int get() const {
-        if (max_event_level == 0) {
-            return 0;
+            block = tmp_block.clone_without_columns();
+            sort_block(tmp_block, block, sort_description, 0);
+        } else {
+            auto tmp_block = block.clone_without_columns();
+            sort_block(block, tmp_block, sort_description, 0);
+            block = std::move(tmp_block);
         }
-        std::vector<std::optional<std::pair<DateValueType, DateValueType>>> 
events_timestamp(
-                max_event_level);
-        bool is_first_set = false;
-        for (int64_t i = 0; i < events.size(); i++) {
-            const int& event_idx = events[i].second;
-            const DateValueType& timestamp = events[i].first;
-            if (event_idx == 0) {
-                events_timestamp[0] = {timestamp, timestamp};
-                is_first_set = true;
-                continue;
-            }
-            if (window_funnel_mode == WindowFunnelMode::DEDUPLICATION &&
-                events_timestamp[event_idx].has_value()) {
-                break;
-            }
-            if (events_timestamp[event_idx - 1].has_value()) {
-                const DateValueType& first_timestamp =
-                        events_timestamp[event_idx - 1].value().first;
-                DateValueType last_timestamp = first_timestamp;
-                TimeInterval interval(SECOND, window, false);
-                last_timestamp.template date_add_interval<SECOND>(interval);
-
-                if (window_funnel_mode != WindowFunnelMode::INCREASE) {
-                    if (timestamp <= last_timestamp) {
-                        events_timestamp[event_idx] = {first_timestamp, 
timestamp};
-                        if (event_idx + 1 == max_event_level) {
-                            // Usually, max event level is small.
-                            return max_event_level;
+        sorted = true;
+    }
+
+    template <WindowFunnelMode WINDOW_FUNNEL_MODE>
+    int _match_event_list(size_t& start_row, size_t row_count,

Review Comment:
   warning: function '_match_event_list' has cognitive complexity of 63 
(threshold 50) [readability-function-cognitive-complexity]
   ```cpp
       int _match_event_list(size_t& start_row, size_t row_count,
           ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:181:** 
+1, including nesting penalty of 0, nesting level increased to 1
   ```cpp
           if (match_row < row_count) {
           ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:190:** 
+2, including nesting penalty of 1, nesting level increased to 2
   ```cpp
               for (; column_idx < event_count + 1; column_idx++) {
               ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:194:** 
+3, including nesting penalty of 2, nesting level increased to 3
   ```cpp
                   if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::FIXED) 
{
                   ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:196:** 
+4, including nesting penalty of 3, nesting level increased to 4
   ```cpp
                       if (event_data[match_row] == 1) {
                       ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:199:** 
+5, including nesting penalty of 4, nesting level increased to 5
   ```cpp
                           if (current_timestamp <= end_timestamp) {
                           ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:207:** 
+3, including nesting penalty of 2, nesting level increased to 3
   ```cpp
                   if (match_row < row_count) {
                   ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:211:** 
+4, including nesting penalty of 3, nesting level increased to 4
   ```cpp
                       if (is_matched) {
                       ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:212:** 
+5, including nesting penalty of 4, nesting level increased to 5
   ```cpp
                           if constexpr (WINDOW_FUNNEL_MODE == 
WindowFunnelMode::INCREASE) {
                           ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:216:** 
+4, including nesting penalty of 3, nesting level increased to 4
   ```cpp
                       if (!is_matched) {
                       ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:219:** 
+4, including nesting penalty of 3, nesting level increased to 4
   ```cpp
                       if constexpr (WINDOW_FUNNEL_MODE == 
WindowFunnelMode::INCREASE) {
                       ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:223:** 
+4, including nesting penalty of 3, nesting level increased to 4
   ```cpp
                       if constexpr (WINDOW_FUNNEL_MODE == 
WindowFunnelMode::DEDUPLICATION) {
                       ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:225:** 
+5, including nesting penalty of 4, nesting level increased to 5
   ```cpp
                           if (match_row != last_match_row + 1) {
                           ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:227:** 
+6, including nesting penalty of 5, nesting level increased to 6
   ```cpp
                               for (int tmp_column_idx = 1; tmp_column_idx < 
column_idx;
                               ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:237:** 
+7, including nesting penalty of 6, nesting level increased to 7
   ```cpp
                                   if (dup_match_row >= rows_to_match) {
                                   ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:243:** 
+5, including nesting penalty of 4, nesting level increased to 5
   ```cpp
                           if (is_dup) {
                           ^
   ```
   **be/src/vec/aggregate_functions/aggregate_function_window_funnel.h:249:** 
+1, nesting level increased to 3
   ```cpp
                   } else {
                     ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to