[GitHub] [doris] morningman commented on a diff in pull request #23352: [feature](csv_serde)1.append csv serde for serialize to csv and deserialize from csv. 2.let csvReader use csv serde not text_converter.

via GitHub Thu, 31 Aug 2023 08:50:15 -0700


morningman commented on code in PR #23352:
URL: https://github.com/apache/doris/pull/23352#discussion_r1302596086



##########
be/src/vec/exec/format/csv/csv_reader.cpp:
##########
@@ -509,32 +506,15 @@ Status CsvReader::_fill_dest_columns(const Slice& line, 
Block* block,
         return Status::OK();
     }
 
-    if (_is_load) {
-        for (int i = 0; i < _file_slot_descs.size(); ++i) {
-            auto src_slot_desc = _file_slot_descs[i];
-            int col_idx = _col_idxs[i];
-            // col idx is out of range, fill with null.
-            const Slice& value =
-                    col_idx < _split_values.size() ? _split_values[col_idx] : 
_s_null_slice;
-            // For load task, we always read "string" from file, so use 
"write_string_column"
-            _text_converter->write_string_column(src_slot_desc, &columns[i], 
value.data, value.size,
-                                                 _escape != 0);
-        }
-    } else {
-        // if _split_values.size > _file_slot_descs.size()
-        // we only take the first few columns
-        for (int i = 0; i < _file_slot_descs.size(); ++i) {
-            auto src_slot_desc = _file_slot_descs[i];
-            int col_idx = _col_idxs[i];
-            // col idx is out of range, fill with null.
-            const Slice& value =
-                    col_idx < _split_values.size() ? _split_values[col_idx] : 
_s_null_slice;
-            IColumn* col_ptr = const_cast<IColumn*>(
-                    
block->get_by_position(_file_slot_idx_map[i]).column.get());
-            // For query task, we will convert values to final column type, so 
use "write_vec_column"
-            _text_converter->write_vec_column(src_slot_desc, col_ptr, 
value.data, value.size, true,
-                                              false);
-        }
+    for (int i = 0; i < _file_slot_descs.size(); ++i) {
+        //            auto src_slot_desc = _file_slot_descs[i];
+        int col_idx = _col_idxs[i];
+        // col idx is out of range, fill with null.
+        const Slice& value =
+                col_idx < _split_values.size() ? _split_values[col_idx] : 
_s_null_slice;
+        // For load task, we always read "string" from file, so use 
"write_string_column"

Review Comment:
   modify the comment



##########
be/src/vec/data_types/serde/data_type_array_serde.cpp:
##########
@@ -140,6 +131,62 @@ Status 
DataTypeArraySerDe::deserialize_one_cell_from_text(IColumn& column, Slice
     offsets.emplace_back(offsets.back() + elem_deserialized);
     return st;
 }
+Status DataTypeArraySerDe::deserialize_one_cell_from_csv(IColumn& column, 
Slice& slice,

Review Comment:
   add empty line between 2 methods.
   Same as other places



##########
gensrc/thrift/PlanNodes.thrift:
##########
@@ -380,6 +380,7 @@ struct TFileScanRangeParams {
     19: optional map<string, i32> slot_name_to_schema_pos
     20: optional list<Exprs.TExpr> pre_filter_exprs_list
     21: optional Types.TUniqueId load_id
+    22: optional bool use_hive_text_serde // for text fileformat , if variable 
is true ,CsvReader will use hive_text_serde,else use json serde.

Review Comment:
   Better use a `enum` type to define the kind of text, in case we may add 
other kind of text in future



##########
be/src/vec/exec/format/csv/csv_reader.cpp:
##########
@@ -509,32 +506,15 @@ Status CsvReader::_fill_dest_columns(const Slice& line, 
Block* block,
         return Status::OK();
     }
 
-    if (_is_load) {
-        for (int i = 0; i < _file_slot_descs.size(); ++i) {
-            auto src_slot_desc = _file_slot_descs[i];
-            int col_idx = _col_idxs[i];
-            // col idx is out of range, fill with null.
-            const Slice& value =
-                    col_idx < _split_values.size() ? _split_values[col_idx] : 
_s_null_slice;
-            // For load task, we always read "string" from file, so use 
"write_string_column"
-            _text_converter->write_string_column(src_slot_desc, &columns[i], 
value.data, value.size,
-                                                 _escape != 0);
-        }
-    } else {
-        // if _split_values.size > _file_slot_descs.size()
-        // we only take the first few columns
-        for (int i = 0; i < _file_slot_descs.size(); ++i) {
-            auto src_slot_desc = _file_slot_descs[i];
-            int col_idx = _col_idxs[i];
-            // col idx is out of range, fill with null.
-            const Slice& value =
-                    col_idx < _split_values.size() ? _split_values[col_idx] : 
_s_null_slice;
-            IColumn* col_ptr = const_cast<IColumn*>(
-                    
block->get_by_position(_file_slot_idx_map[i]).column.get());
-            // For query task, we will convert values to final column type, so 
use "write_vec_column"
-            _text_converter->write_vec_column(src_slot_desc, col_ptr, 
value.data, value.size, true,
-                                              false);
-        }
+    for (int i = 0; i < _file_slot_descs.size(); ++i) {
+        //            auto src_slot_desc = _file_slot_descs[i];

Review Comment:
   remove unused code



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[GitHub] [doris] morningman commented on a diff in pull request #23352: [feature](csv_serde)1.append csv serde for serialize to csv and deserialize from csv. 2.let csvReader use csv serde not text_converter.

Reply via email to