zhangstar333 commented on code in PR #44365: URL: https://github.com/apache/doris/pull/44365#discussion_r1870821721
########## be/src/vec/exprs/lambda_function/varray_map_function.cpp: ########## @@ -113,75 +158,175 @@ class ArrayMapFunction : public LambdaFunction { const auto& off_data = assert_cast<const ColumnArray::ColumnOffsets&>( col_array.get_offsets_column()); array_column_offset = off_data.clone_resized(col_array.get_offsets_column().size()); + args.offsets_ptr = &col_array.get_offsets(); } else { // select array_map((x,y)->x+y,c_array1,[0,1,2,3]) from array_test2; // c_array1: [0,1,2,3,4,5,6,7,8,9] const auto& array_offsets = assert_cast<const ColumnArray::ColumnOffsets&>(*first_array_offsets) - .get_data(); + .get_data(); if (nested_array_column_rows != col_array.get_data_ptr()->size() || (!array_offsets.empty() && memcmp(array_offsets.data(), col_array.get_offsets().data(), sizeof(array_offsets[0]) * array_offsets.size()) != 0)) { return Status::InvalidArgument( - "in array map function, the input column size " - "are " - "not equal completely, nested column data rows 1st size is {}, {}th " - "size is {}.", - nested_array_column_rows, i + 1, col_array.get_data_ptr()->size()); + "in array map function, the input column size " + "are " + "not equal completely, nested column data rows 1st size is {}, {}th " + "size is {}.", + nested_array_column_rows, i + 1, col_array.get_data_ptr()->size()); } } - - // insert the data column to the new block - ColumnWithTypeAndName data_column {col_array.get_data_ptr(), col_type.get_nested_type(), - "R" + array_column_type_name.name}; - lambda_block.insert(std::move(data_column)); + lambda_datas[i] = col_array.get_data_ptr(); + names.push_back("R" + array_column_type_name.name); + data_types.push_back(col_type.get_nested_type()); } - //3. child[0]->execute(new_block) - RETURN_IF_ERROR(children[0]->execute(context, &lambda_block, result_column_id)); + ColumnPtr result_col = nullptr; + DataTypePtr res_type; + std::string res_name; + + //process first row + args.array_start = (*args.offsets_ptr)[args.current_row_idx - 1]; + args.cur_size = (*args.offsets_ptr)[args.current_row_idx] - args.array_start; + + while (args.current_row_idx < block->rows()) { + Block lambda_block; + for (int i = 0; i < names.size(); i++) { + ColumnWithTypeAndName data_column; + if (_contains_column_id(args, i)) { + data_column = ColumnWithTypeAndName(data_types[i], names[i]); + } else { + data_column = ColumnWithTypeAndName(data_types[i]->create_column_const_with_default_value(0), data_types[i], names[i]); Review Comment: input 0 maybe throw error in const column c'tor? ########## be/src/vec/exprs/lambda_function/varray_map_function.cpp: ########## @@ -113,75 +158,175 @@ class ArrayMapFunction : public LambdaFunction { const auto& off_data = assert_cast<const ColumnArray::ColumnOffsets&>( col_array.get_offsets_column()); array_column_offset = off_data.clone_resized(col_array.get_offsets_column().size()); + args.offsets_ptr = &col_array.get_offsets(); } else { // select array_map((x,y)->x+y,c_array1,[0,1,2,3]) from array_test2; // c_array1: [0,1,2,3,4,5,6,7,8,9] const auto& array_offsets = assert_cast<const ColumnArray::ColumnOffsets&>(*first_array_offsets) - .get_data(); + .get_data(); if (nested_array_column_rows != col_array.get_data_ptr()->size() || (!array_offsets.empty() && memcmp(array_offsets.data(), col_array.get_offsets().data(), sizeof(array_offsets[0]) * array_offsets.size()) != 0)) { return Status::InvalidArgument( - "in array map function, the input column size " - "are " - "not equal completely, nested column data rows 1st size is {}, {}th " - "size is {}.", - nested_array_column_rows, i + 1, col_array.get_data_ptr()->size()); + "in array map function, the input column size " + "are " + "not equal completely, nested column data rows 1st size is {}, {}th " + "size is {}.", + nested_array_column_rows, i + 1, col_array.get_data_ptr()->size()); } } - - // insert the data column to the new block - ColumnWithTypeAndName data_column {col_array.get_data_ptr(), col_type.get_nested_type(), - "R" + array_column_type_name.name}; - lambda_block.insert(std::move(data_column)); + lambda_datas[i] = col_array.get_data_ptr(); + names.push_back("R" + array_column_type_name.name); + data_types.push_back(col_type.get_nested_type()); } - //3. child[0]->execute(new_block) - RETURN_IF_ERROR(children[0]->execute(context, &lambda_block, result_column_id)); + ColumnPtr result_col = nullptr; + DataTypePtr res_type; + std::string res_name; + + //process first row + args.array_start = (*args.offsets_ptr)[args.current_row_idx - 1]; + args.cur_size = (*args.offsets_ptr)[args.current_row_idx] - args.array_start; + + while (args.current_row_idx < block->rows()) { + Block lambda_block; + for (int i = 0; i < names.size(); i++) { + ColumnWithTypeAndName data_column; + if (_contains_column_id(args, i)) { + data_column = ColumnWithTypeAndName(data_types[i], names[i]); + } else { + data_column = ColumnWithTypeAndName(data_types[i]->create_column_const_with_default_value(0), data_types[i], names[i]); + } + lambda_block.insert(std::move(data_column)); + } + + MutableColumns columns = lambda_block.mutate_columns(); + while (columns[gap]->size() < batch_size) { + long max_step = batch_size - columns[gap]->size(); + long current_step = std::min(max_step, (long)(args.cur_size - args.current_offset_in_array)); + size_t pos = args.array_start + args.current_offset_in_array; + for (int i = 0; i < arguments.size(); ++i) { + columns[gap + i]->insert_range_from(*lambda_datas[i], pos, current_step); + } + args.current_offset_in_array += current_step; + args.current_repeat_times += current_step; + if (args.current_offset_in_array >= args.cur_size) { + args.current_row_eos = true; + } + _extend_data(columns, block, args, gap); + if (args.current_row_eos) { + args.current_row_idx++; + args.current_offset_in_array = 0; + if (args.current_row_idx >= block->rows()) { + break; + } + args.current_row_eos = false; + args.array_start = (*args.offsets_ptr)[args.current_row_idx - 1]; + args.cur_size = (*args.offsets_ptr)[args.current_row_idx] - args.array_start; + } + } + + lambda_block.set_columns(std::move(columns)); - auto res_col = lambda_block.get_by_position(*result_column_id) - .column->convert_to_full_column_if_const(); - auto res_type = lambda_block.get_by_position(*result_column_id).type; - auto res_name = lambda_block.get_by_position(*result_column_id).name; + //3. child[0]->execute(new_block) + RETURN_IF_ERROR(children[0]->execute(context, &lambda_block, result_column_id)); + + auto res_col = lambda_block.get_by_position(*result_column_id) + .column->convert_to_full_column_if_const(); + res_type = lambda_block.get_by_position(*result_column_id).type; + res_name = lambda_block.get_by_position(*result_column_id).name; + if (!result_col) { + result_col = std::move(res_col); + } else { + MutableColumnPtr column = (*std::move(result_col)).mutate(); + column->insert_range_from(*res_col, 0, res_col->size()); + } + } //4. get the result column after execution, reassemble it into a new array column, and return. ColumnWithTypeAndName result_arr; if (result_type->is_nullable()) { if (res_type->is_nullable()) { result_arr = {ColumnNullable::create( - ColumnArray::create(res_col, std::move(array_column_offset)), - std::move(outside_null_map)), - result_type, res_name}; + ColumnArray::create(result_col, std::move(array_column_offset)), + std::move(outside_null_map)), + result_type, res_name}; } else { // deal with eg: select array_map(x -> x is null, [null, 1, 2]); // need to create the nested column null map for column array - auto nested_null_map = ColumnUInt8::create(res_col->size(), 0); + auto nested_null_map = ColumnUInt8::create(result_col->size(), 0); result_arr = { - ColumnNullable::create( - ColumnArray::create( - ColumnNullable::create(res_col, std::move(nested_null_map)), - std::move(array_column_offset)), - std::move(outside_null_map)), - result_type, res_name}; + ColumnNullable::create( + ColumnArray::create( + ColumnNullable::create(result_col, std::move(nested_null_map)), + std::move(array_column_offset)), + std::move(outside_null_map)), + result_type, res_name}; } } else { if (res_type->is_nullable()) { - result_arr = {ColumnArray::create(res_col, std::move(array_column_offset)), - result_type, res_name}; + result_arr = {ColumnArray::create(result_col, std::move(array_column_offset)), + result_type, res_name}; } else { - auto nested_null_map = ColumnUInt8::create(res_col->size(), 0); + auto nested_null_map = ColumnUInt8::create(result_col->size(), 0); result_arr = {ColumnArray::create( - ColumnNullable::create(res_col, std::move(nested_null_map)), - std::move(array_column_offset)), - result_type, res_name}; + ColumnNullable::create(result_col, std::move(nested_null_map)), + std::move(array_column_offset)), + result_type, res_name}; } } block->insert(std::move(result_arr)); *result_column_id = block->columns() - 1; + return Status::OK(); } + +private: + + bool _contains_column_id(LambdaArgs& args, int id) { + const auto it = std::find(args.output_slot_ref_indexs.begin(), args.output_slot_ref_indexs.end(), id); + return it != args.output_slot_ref_indexs.end(); + } + + void _set_column_ref_column_id(VExprSPtr expr, int gap) { + for (const auto& child : expr->children()) { + if (child->is_column_ref()) { + auto* ref = static_cast<VColumnRef*>(child.get()); + ref->set_gap(gap); + } else { + _set_column_ref_column_id(child, gap); + } + } + } + + void _collect_slot_ref_column_id(VExprSPtr expr, LambdaArgs& args) { + for (const auto& child : expr->children()) { + if (child->is_slot_ref()) { + const auto* ref = static_cast<VSlotRef*>(child.get()); + args.output_slot_ref_indexs.push_back(ref->column_id()); + } else { + _collect_slot_ref_column_id(child, args); + } + } + } + + void _extend_data(std::vector<MutableColumnPtr>& columns, Block* block, LambdaArgs& args,int size) { + if (!args.current_repeat_times || !size) { + return; + } + for (int i = 0; i < size; i++) { + if (_contains_column_id(args, i)) { + auto src_column = block->get_by_position(i).column->convert_to_full_column_if_const(); + columns[i]->insert_many_from(*src_column, args.current_row_idx, args.current_repeat_times); + } else { + // must be column const Review Comment: could add dcheck here, check(is_column_const(column)) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org