HappenLee commented on code in PR #49212: URL: https://github.com/apache/doris/pull/49212#discussion_r2004749195
########## be/src/vec/exprs/lambda_function/varray_map_function.cpp: ########## @@ -184,57 +184,73 @@ class ArrayMapFunction : public LambdaFunction { data_types.push_back(col_type.get_nested_type()); } - ColumnPtr result_col = nullptr; + MutableColumnPtr result_col = nullptr; DataTypePtr res_type; std::string res_name; //process first row - args.array_start = (*args.offsets_ptr)[args.current_row_idx - 1]; - args.cur_size = (*args.offsets_ptr)[args.current_row_idx] - args.array_start; - - while (args.current_row_idx < block->rows()) { - Block lambda_block; - for (int i = 0; i < names.size(); i++) { - ColumnWithTypeAndName data_column; - if (_contains_column_id(args, i) || i >= gap) { - data_column = ColumnWithTypeAndName(data_types[i], names[i]); + args_info.array_start = (*args_info.offsets_ptr)[args_info.current_row_idx - 1]; + args_info.cur_size = + (*args_info.offsets_ptr)[args_info.current_row_idx] - args_info.array_start; + + // lambda block to exectute the lambda, and reuse the memory + Block lambda_block; + auto column_size = names.size(); + MutableColumns columns(column_size); + while (args_info.current_row_idx < block->rows()) { + bool mem_reuse = lambda_block.mem_reuse(); + for (int i = 0; i < column_size; i++) { + if (mem_reuse) { + columns[i] = lambda_block.get_by_position(i).column->assume_mutable(); } else { - data_column = ColumnWithTypeAndName( - data_types[i]->create_column_const_with_default_value(0), data_types[i], - names[i]); + if (_contains_column_id(output_slot_ref_indexs, i) || i >= gap) { + // TODO: maybe could create const column, so not insert_many_from when extand data + // but now here handle batch_size of array nested data every time, so maybe have different rows + columns[i] = data_types[i]->create_column(); + } else { + columns[i] = data_types[i] + ->create_column_const_with_default_value(0) + ->assume_mutable(); + } } - lambda_block.insert(std::move(data_column)); } - - MutableColumns columns = lambda_block.mutate_columns(); + // batch_size of array nested data every time inorder to avoid memory overflow while (columns[gap]->size() < batch_size) { long max_step = batch_size - columns[gap]->size(); - long current_step = - std::min(max_step, (long)(args.cur_size - args.current_offset_in_array)); - size_t pos = args.array_start + args.current_offset_in_array; + long current_step = std::min( + max_step, (long)(args_info.cur_size - args_info.current_offset_in_array)); + size_t pos = args_info.array_start + args_info.current_offset_in_array; for (int i = 0; i < arguments.size(); ++i) { columns[gap + i]->insert_range_from(*lambda_datas[i], pos, current_step); } - args.current_offset_in_array += current_step; - args.current_repeat_times += current_step; - if (args.current_offset_in_array >= args.cur_size) { - args.current_row_eos = true; + args_info.current_offset_in_array += current_step; + args_info.current_repeat_times += current_step; + if (args_info.current_offset_in_array >= args_info.cur_size) { + args_info.current_row_eos = true; } - _extend_data(columns, block, args, gap); - if (args.current_row_eos) { - args.current_row_idx++; - args.current_offset_in_array = 0; - if (args.current_row_idx >= block->rows()) { + _extend_data(columns, block, args_info.current_repeat_times, gap, Review Comment: set ` args_info.current_repeat_times = 0; ` in `_extend_data` func -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org