morningman commented on code in PR #12275:
URL: https://github.com/apache/doris/pull/12275#discussion_r960667846


##########
be/src/vec/exec/scan/new_file_scanner.cpp:
##########
@@ -164,4 +203,151 @@ Status 
NewFileScanner::_fill_columns_from_path(vectorized::Block* _block, size_t
     return Status::OK();
 }
 
+Status NewFileScanNode::_filter_input_block(Block *block) {

Review Comment:
   Why not implement this in parent class `VScanner`?



##########
be/src/vec/exec/scan/new_file_text_scanner.cpp:
##########
@@ -26,8 +26,8 @@ namespace doris::vectorized {
 
 NewFileTextScanner::NewFileTextScanner(RuntimeState* state, NewFileScanNode* 
parent, int64_t limit,
                                        const TFileScanRange& scan_range, 
MemTracker* tracker,
-                                       RuntimeProfile* profile)
-        : NewFileScanner(state, parent, limit, scan_range, tracker, profile),
+                                       RuntimeProfile* profile, const 
std::vector<TExpr>& pre_filter_texprs)

Review Comment:
   How about pass this `pre_filter_texprs` when calling `scanner->prepare()`, 
same as we pass `_vconjunct_ctx` to it



##########
be/src/vec/exec/scan/new_file_scanner.cpp:
##########
@@ -164,4 +203,151 @@ Status 
NewFileScanner::_fill_columns_from_path(vectorized::Block* _block, size_t
     return Status::OK();
 }
 
+Status NewFileScanNode::_filter_input_block(Block *block) {
+    auto origin_column_num = _src_block.columns();
+    // filter block
+    auto old_rows = _src_block.rows();
+    
RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_vpre_filter_ctx_ptr, 
&_src_block,
+                                                           origin_column_num));
+    _counter->num_rows_unselected += old_rows - _src_block.rows();
+    return Status::OK();
+}
+
+Status NewFileScanNode::_convert_to_output_block(Block *output_block) {
+    if (_input_block_ptr == output_block) {
+        return Status::OK();
+    }
+    _fill_columns_from_path();
+    if (LIKELY(_src_block.rows() > 0)) {
+//        RETURN_IF_ERROR(BaseScanner::_filter_src_block());
+        RETURN_IF_ERROR(BaseScanner::_materialize_dest_block(dest_block));
+    }
+
+    return Status::OK();
+
+    return Status::OK();
+}
+
+void NewFileScanNode::_fill_columns_from_path() {
+    const TBrokerRangeDesc& range = _ranges.at(_next_range - 1);
+    if (range.__isset.num_of_columns_from_file) {
+        size_t start = range.num_of_columns_from_file;
+        size_t rows = _src_block.rows();
+
+        for (size_t i = 0; i < range.columns_from_path.size(); ++i) {
+            auto slot_desc = _src_slot_descs.at(i + start);
+            if (slot_desc == nullptr) continue;
+            auto is_nullable = slot_desc->is_nullable();
+            auto data_type = 
vectorized::DataTypeFactory::instance().create_data_type(TYPE_VARCHAR,
+                                                                               
       is_nullable);
+            auto data_column = data_type->create_column();
+            const std::string& column_from_path = range.columns_from_path[i];
+            for (size_t j = 0; j < rows; ++j) {
+                
data_column->insert_data(const_cast<char*>(column_from_path.c_str()),
+                                         column_from_path.size());
+            }
+            
_src_block.insert(vectorized::ColumnWithTypeAndName(std::move(data_column), 
data_type,
+                                                                
slot_desc->col_name()));
+        }
+    }
+}
+
+Status NewFileScanNode::_materialize_dest_block(vectorized::Block* dest_block) 
{

Review Comment:
   Should be moved to parent class?



##########
gensrc/thrift/PlanNodes.thrift:
##########
@@ -230,15 +230,17 @@ struct TFileScanRangeParams {
   2: optional TFileFormatType format_type;
   // use src_tuple_id to get all slots from src table include both file slot 
and partition slot.
   3: optional Types.TTupleId src_tuple_id;
+     // dest_tuple_id is the tuple id that need by scan node
+  4: required Types.TTupleId dest_tuple_id

Review Comment:
   You should not change the order of the fields



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to