HappenLee commented on code in PR #49212:
URL: https://github.com/apache/doris/pull/49212#discussion_r2004749195


##########
be/src/vec/exprs/lambda_function/varray_map_function.cpp:
##########
@@ -184,57 +184,73 @@ class ArrayMapFunction : public LambdaFunction {
             data_types.push_back(col_type.get_nested_type());
         }
 
-        ColumnPtr result_col = nullptr;
+        MutableColumnPtr result_col = nullptr;
         DataTypePtr res_type;
         std::string res_name;
 
         //process first row
-        args.array_start = (*args.offsets_ptr)[args.current_row_idx - 1];
-        args.cur_size = (*args.offsets_ptr)[args.current_row_idx] - 
args.array_start;
-
-        while (args.current_row_idx < block->rows()) {
-            Block lambda_block;
-            for (int i = 0; i < names.size(); i++) {
-                ColumnWithTypeAndName data_column;
-                if (_contains_column_id(args, i) || i >= gap) {
-                    data_column = ColumnWithTypeAndName(data_types[i], 
names[i]);
+        args_info.array_start = 
(*args_info.offsets_ptr)[args_info.current_row_idx - 1];
+        args_info.cur_size =
+                (*args_info.offsets_ptr)[args_info.current_row_idx] - 
args_info.array_start;
+
+        // lambda block to exectute the lambda, and reuse the memory
+        Block lambda_block;
+        auto column_size = names.size();
+        MutableColumns columns(column_size);
+        while (args_info.current_row_idx < block->rows()) {
+            bool mem_reuse = lambda_block.mem_reuse();
+            for (int i = 0; i < column_size; i++) {
+                if (mem_reuse) {
+                    columns[i] = 
lambda_block.get_by_position(i).column->assume_mutable();
                 } else {
-                    data_column = ColumnWithTypeAndName(
-                            
data_types[i]->create_column_const_with_default_value(0), data_types[i],
-                            names[i]);
+                    if (_contains_column_id(output_slot_ref_indexs, i) || i >= 
gap) {
+                        // TODO: maybe could create const column, so not 
insert_many_from when extand data
+                        // but now here handle batch_size of array nested data 
every time, so maybe have different rows
+                        columns[i] = data_types[i]->create_column();
+                    } else {
+                        columns[i] = data_types[i]
+                                             
->create_column_const_with_default_value(0)
+                                             ->assume_mutable();
+                    }
                 }
-                lambda_block.insert(std::move(data_column));
             }
-
-            MutableColumns columns = lambda_block.mutate_columns();
+            // batch_size of array nested data every time inorder to avoid 
memory overflow
             while (columns[gap]->size() < batch_size) {
                 long max_step = batch_size - columns[gap]->size();
-                long current_step =
-                        std::min(max_step, (long)(args.cur_size - 
args.current_offset_in_array));
-                size_t pos = args.array_start + args.current_offset_in_array;
+                long current_step = std::min(
+                        max_step, (long)(args_info.cur_size - 
args_info.current_offset_in_array));
+                size_t pos = args_info.array_start + 
args_info.current_offset_in_array;
                 for (int i = 0; i < arguments.size(); ++i) {
                     columns[gap + i]->insert_range_from(*lambda_datas[i], pos, 
current_step);
                 }
-                args.current_offset_in_array += current_step;
-                args.current_repeat_times += current_step;
-                if (args.current_offset_in_array >= args.cur_size) {
-                    args.current_row_eos = true;
+                args_info.current_offset_in_array += current_step;
+                args_info.current_repeat_times += current_step;
+                if (args_info.current_offset_in_array >= args_info.cur_size) {
+                    args_info.current_row_eos = true;
                 }
-                _extend_data(columns, block, args, gap);
-                if (args.current_row_eos) {
-                    args.current_row_idx++;
-                    args.current_offset_in_array = 0;
-                    if (args.current_row_idx >= block->rows()) {
+                _extend_data(columns, block, args_info.current_repeat_times, 
gap,

Review Comment:
   set `                args_info.current_repeat_times = 0;  ` in 
`_extend_data` func



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to