morningman commented on code in PR #23352: URL: https://github.com/apache/doris/pull/23352#discussion_r1302596086
########## be/src/vec/exec/format/csv/csv_reader.cpp: ########## @@ -509,32 +506,15 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, return Status::OK(); } - if (_is_load) { - for (int i = 0; i < _file_slot_descs.size(); ++i) { - auto src_slot_desc = _file_slot_descs[i]; - int col_idx = _col_idxs[i]; - // col idx is out of range, fill with null. - const Slice& value = - col_idx < _split_values.size() ? _split_values[col_idx] : _s_null_slice; - // For load task, we always read "string" from file, so use "write_string_column" - _text_converter->write_string_column(src_slot_desc, &columns[i], value.data, value.size, - _escape != 0); - } - } else { - // if _split_values.size > _file_slot_descs.size() - // we only take the first few columns - for (int i = 0; i < _file_slot_descs.size(); ++i) { - auto src_slot_desc = _file_slot_descs[i]; - int col_idx = _col_idxs[i]; - // col idx is out of range, fill with null. - const Slice& value = - col_idx < _split_values.size() ? _split_values[col_idx] : _s_null_slice; - IColumn* col_ptr = const_cast<IColumn*>( - block->get_by_position(_file_slot_idx_map[i]).column.get()); - // For query task, we will convert values to final column type, so use "write_vec_column" - _text_converter->write_vec_column(src_slot_desc, col_ptr, value.data, value.size, true, - false); - } + for (int i = 0; i < _file_slot_descs.size(); ++i) { + // auto src_slot_desc = _file_slot_descs[i]; + int col_idx = _col_idxs[i]; + // col idx is out of range, fill with null. + const Slice& value = + col_idx < _split_values.size() ? _split_values[col_idx] : _s_null_slice; + // For load task, we always read "string" from file, so use "write_string_column" Review Comment: modify the comment ########## be/src/vec/data_types/serde/data_type_array_serde.cpp: ########## @@ -140,6 +131,62 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_text(IColumn& column, Slice offsets.emplace_back(offsets.back() + elem_deserialized); return st; } +Status DataTypeArraySerDe::deserialize_one_cell_from_csv(IColumn& column, Slice& slice, Review Comment: add empty line between 2 methods. Same as other places ########## gensrc/thrift/PlanNodes.thrift: ########## @@ -380,6 +380,7 @@ struct TFileScanRangeParams { 19: optional map<string, i32> slot_name_to_schema_pos 20: optional list<Exprs.TExpr> pre_filter_exprs_list 21: optional Types.TUniqueId load_id + 22: optional bool use_hive_text_serde // for text fileformat , if variable is true ,CsvReader will use hive_text_serde,else use json serde. Review Comment: Better use a `enum` type to define the kind of text, in case we may add other kind of text in future ########## be/src/vec/exec/format/csv/csv_reader.cpp: ########## @@ -509,32 +506,15 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, return Status::OK(); } - if (_is_load) { - for (int i = 0; i < _file_slot_descs.size(); ++i) { - auto src_slot_desc = _file_slot_descs[i]; - int col_idx = _col_idxs[i]; - // col idx is out of range, fill with null. - const Slice& value = - col_idx < _split_values.size() ? _split_values[col_idx] : _s_null_slice; - // For load task, we always read "string" from file, so use "write_string_column" - _text_converter->write_string_column(src_slot_desc, &columns[i], value.data, value.size, - _escape != 0); - } - } else { - // if _split_values.size > _file_slot_descs.size() - // we only take the first few columns - for (int i = 0; i < _file_slot_descs.size(); ++i) { - auto src_slot_desc = _file_slot_descs[i]; - int col_idx = _col_idxs[i]; - // col idx is out of range, fill with null. - const Slice& value = - col_idx < _split_values.size() ? _split_values[col_idx] : _s_null_slice; - IColumn* col_ptr = const_cast<IColumn*>( - block->get_by_position(_file_slot_idx_map[i]).column.get()); - // For query task, we will convert values to final column type, so use "write_vec_column" - _text_converter->write_vec_column(src_slot_desc, col_ptr, value.data, value.size, true, - false); - } + for (int i = 0; i < _file_slot_descs.size(); ++i) { + // auto src_slot_desc = _file_slot_descs[i]; Review Comment: remove unused code -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org