[GitHub] [doris] xiaokang commented on a diff in pull request #16569: [Optimization](index) Optimization for no need to read raw data for index column that only in where clause

via GitHub Thu, 09 Feb 2023 05:13:55 -0800


xiaokang commented on code in PR #16569:
URL: https://github.com/apache/doris/pull/16569#discussion_r1101313266



##########
fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java:
##########
@@ -436,6 +441,60 @@ private void pushSortToOlapScan() {
         }
     }
 
+    /**
+     * outputColumnUniqueIds contain columns in OrderByExprs and outputExprs
+     * push output column unique id set to olap scan.

Review Comment:
   add more comment and example



##########
gensrc/thrift/PlanNodes.thrift:
##########
@@ -574,6 +574,8 @@ struct TOlapScanNode {
   11: optional bool enable_unique_key_merge_on_write
   12: optional TPushAggOp push_down_agg_type_opt
   13: optional bool use_topn_opt
+  14: optional list<Exprs.TExpr> ordering_exprs

Review Comment:
   what's ordering_exprs's usage?



##########
be/src/olap/reader.h:
##########
@@ -102,6 +102,7 @@ class TabletReader {
 
         std::vector<RowsetReaderSharedPtr> rs_readers;
         std::vector<uint32_t> return_columns;
+        std::set<int32_t> output_columns;

Review Comment:
   add commnt for difference between return_columns and output_columns



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1562,5 +1633,83 @@ Status 
SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
     return Status::OK();
 }
 
+/**
+ * solution 1: where cluase included nodes are all `and` leaf nodes,
+ * predicate pushed down and remove from vconjunct.
+ *  for example: where A = 1 and B = 'test' and B like '%he%';
+ *      column A : `A = 1` pushed down, this column's predicates all pushed 
down,
+ *                  call _check_column_pred_all_push_down will return true.
+ *      column B : `B = 'test'` pushed down, but `B like '%he%'` remain in 
vconjunct,
+ *                  call _check_column_pred_all_push_down will return false.
+ *
+ * solution 2: where cluase included nodes are compound or other complex 
conditions,
+ * predicate pushed down but still remain in vconjunct.
+ *  for exmple: where (A = 1 and B = 'test') or B = 'hi' or (C like '%ye%' and 
C > 'aa');
+ *      column A : `A = 1` pushed down, check it applyed by index,
+ *                  call _check_column_pred_all_push_down will return true.
+ *      column B : `B = 'test'`, `B = 'hi'` all pushed down, check them all 
applyed by index,
+ *                  call _check_column_pred_all_push_down will return true.
+ *      column C : `C like '%ye%'` not pushed down, `C > 'aa'` pushed down, 
only `C > 'aa'` applyed by index,
+ *                  call _check_column_pred_all_push_down will return false.
+*/
+bool SegmentIterator::_check_column_pred_all_push_down(ColumnPredicate* 
predicate,
+                                                       bool in_compound) {
+    if (_remaining_vconjunct_root == nullptr) {
+        return true;
+    }
+    std::unordered_map<std::string, std::vector<ColumnPredicateInfo>>
+            column_pred_in_remaining_vconjunct;
+    auto column_name = _schema.column(predicate->column_id())->name();
+    _column_predicate_info.reset(new ColumnPredicateInfo());
+    _find_pred_in_remaining_vconjunct_root(_remaining_vconjunct_root,

Review Comment:
   should only get column_name related predicated



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -419,12 +424,18 @@ Status SegmentIterator::_apply_bitmap_index() {
     for (auto pred : _col_predicates) {
         int32_t unique_id = _schema.unique_id(pred->column_id());
         if (_bitmap_index_iterators.count(unique_id) < 1 ||
-            _bitmap_index_iterators[unique_id] == nullptr) {
+            _bitmap_index_iterators[unique_id] == nullptr || pred->type() == 
PredicateType::BF) {

Review Comment:
   add comment



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1562,5 +1633,83 @@ Status 
SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
     return Status::OK();
 }
 
+/**
+ * solution 1: where cluase included nodes are all `and` leaf nodes,
+ * predicate pushed down and remove from vconjunct.
+ *  for example: where A = 1 and B = 'test' and B like '%he%';
+ *      column A : `A = 1` pushed down, this column's predicates all pushed 
down,
+ *                  call _check_column_pred_all_push_down will return true.
+ *      column B : `B = 'test'` pushed down, but `B like '%he%'` remain in 
vconjunct,
+ *                  call _check_column_pred_all_push_down will return false.
+ *
+ * solution 2: where cluase included nodes are compound or other complex 
conditions,
+ * predicate pushed down but still remain in vconjunct.
+ *  for exmple: where (A = 1 and B = 'test') or B = 'hi' or (C like '%ye%' and 
C > 'aa');
+ *      column A : `A = 1` pushed down, check it applyed by index,
+ *                  call _check_column_pred_all_push_down will return true.
+ *      column B : `B = 'test'`, `B = 'hi'` all pushed down, check them all 
applyed by index,
+ *                  call _check_column_pred_all_push_down will return true.
+ *      column C : `C like '%ye%'` not pushed down, `C > 'aa'` pushed down, 
only `C > 'aa'` applyed by index,
+ *                  call _check_column_pred_all_push_down will return false.
+*/
+bool SegmentIterator::_check_column_pred_all_push_down(ColumnPredicate* 
predicate,
+                                                       bool in_compound) {
+    if (_remaining_vconjunct_root == nullptr) {
+        return true;
+    }
+    std::unordered_map<std::string, std::vector<ColumnPredicateInfo>>
+            column_pred_in_remaining_vconjunct;

Review Comment:
   just use a vector



##########
fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java:
##########
@@ -436,6 +441,60 @@ private void pushSortToOlapScan() {
         }
     }
 
+    /**
+     * outputColumnUniqueIds contain columns in OrderByExprs and outputExprs
+     * push output column unique id set to olap scan.
+    */
+    private void pushOutColumnUniqueIdsToOlapScan(PlanFragment rootFragment, 
Analyzer analyzer) {
+        HashSet<Integer> outputColumnUniqueIds =  new HashSet<>();
+        ArrayList<Expr> outputExprs = rootFragment.getOutputExprs();
+        for (Expr expr : outputExprs) {
+            if (expr instanceof SlotRef) {
+                if (((SlotRef) expr).getColumn() != null) {
+                    outputColumnUniqueIds.add(((SlotRef) 
expr).getColumn().getUniqueId());
+                }
+            }
+        }
+
+        for (PlanFragment fragment : fragments) {
+            PlanNode node = fragment.getPlanRoot();
+            PlanNode parent = null;
+            while (node.getChildren().size() != 0) {
+                for (PlanNode childNode : node.getChildren()) {
+                    List<SlotId> outputSlotIds = childNode.getOutputSlotIds();
+                    if (outputSlotIds != null) {
+                        for (SlotId sid : outputSlotIds) {
+                            SlotDescriptor slotDesc = 
analyzer.getSlotDesc(sid);
+                            outputColumnUniqueIds.add(slotDesc.getUniqueId());
+                        }
+                    }
+                }
+                // OlapScanNode is the last node.
+                // So, just get the two node and check if they are SortNode 
and OlapScan.
+                parent = node;
+                node = node.getChildren().get(0);
+            }
+
+            if (parent instanceof SortNode) {

Review Comment:
   Is GROUP BY columns necessary to processed like ORDER BY columns?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [doris] xiaokang commented on a diff in pull request #16569: [Optimization](index) Optimization for no need to read raw data for index column that only in where clause

Reply via email to