xiaokang commented on code in PR #32620:
URL: https://github.com/apache/doris/pull/32620#discussion_r1561930236


##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {
 
     const std::string& expr_name() const override { return _expr_name; }
 
+    bool is_all_ones(const roaring::Roaring& r) {
+        return r.contains(0);
+        for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != 
r.end(); ++i) {
+            if (*i == 0) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    //   1. when meet 'or' conjunct: a or b, if b can apply index, return all 
rows, so b should not be extracted
+    //   2. when meet 'and' conjunct, function with column b can not apply 
inverted index
+    //      eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not 
for index, so b should not be extracted
+    //          but a and array_contains(b, 1), b can be applied inverted 
index, which b can be extracted
+    Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, 
std::pair<vectorized::NameAndTypePair,
+                                                         
segment_v2::InvertedIndexIterator*>>&
+                    colId_to_inverted_index_iter,
+            uint32_t num_rows, roaring::Roaring* bitmap) const override {
+        if (_op == TExprOpcode::COMPOUND_OR) {
+            for (auto child : _children) {
+                std::shared_ptr<roaring::Roaring> child_roaring =
+                        std::make_shared<roaring::Roaring>();
+                Status st = child->eval_inverted_index(context, 
colId_to_inverted_index_iter,
+                                                       num_rows, 
child_roaring.get());
+                if (!st.ok()) {
+                    continue;
+                }
+                *bitmap |= *child_roaring;
+                if (!child_roaring->isEmpty()) {

Review Comment:
   I think the check for NOT empty is wrong. And further, short circuit for OR 
is different to AND, you should check there is no zero in the total bitmap and 
skip remaining exprs.



##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {
 
     const std::string& expr_name() const override { return _expr_name; }
 
+    bool is_all_ones(const roaring::Roaring& r) {
+        return r.contains(0);
+        for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != 
r.end(); ++i) {
+            if (*i == 0) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    //   1. when meet 'or' conjunct: a or b, if b can apply index, return all 
rows, so b should not be extracted
+    //   2. when meet 'and' conjunct, function with column b can not apply 
inverted index
+    //      eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not 
for index, so b should not be extracted
+    //          but a and array_contains(b, 1), b can be applied inverted 
index, which b can be extracted
+    Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, 
std::pair<vectorized::NameAndTypePair,
+                                                         
segment_v2::InvertedIndexIterator*>>&
+                    colId_to_inverted_index_iter,
+            uint32_t num_rows, roaring::Roaring* bitmap) const override {
+        if (_op == TExprOpcode::COMPOUND_OR) {
+            for (auto child : _children) {
+                std::shared_ptr<roaring::Roaring> child_roaring =
+                        std::make_shared<roaring::Roaring>();
+                Status st = child->eval_inverted_index(context, 
colId_to_inverted_index_iter,
+                                                       num_rows, 
child_roaring.get());
+                if (!st.ok()) {
+                    continue;
+                }
+                *bitmap |= *child_roaring;
+                if (!child_roaring->isEmpty()) {
+                    // means inverted index filter do not reduce any rows
+                    // the left expr no need to be extracted by inverted index,
+                    // and cur roaring is all rows which means this inverted 
index is not useful,
+                    // do not need to calculate with res bitmap
+                    return Status::OK();
+                }
+            }
+        } else if (_op == TExprOpcode::COMPOUND_AND) {
+            for (auto child : _children) {
+                std::shared_ptr<roaring::Roaring> child_roaring =
+                        std::make_shared<roaring::Roaring>();
+                Status st = child->eval_inverted_index(context, 
colId_to_inverted_index_iter,
+                                                       num_rows, 
child_roaring.get());
+                if (!st.ok()) {
+                    continue;
+                }
+                *bitmap &= *child_roaring;
+                if (child_roaring->isEmpty()) {

Review Comment:
   It's OK to check child_roaring here, but it's more accurate to check bitmap.



##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {
 
     const std::string& expr_name() const override { return _expr_name; }
 
+    bool is_all_ones(const roaring::Roaring& r) {
+        return r.contains(0);
+        for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != 
r.end(); ++i) {
+            if (*i == 0) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    //   1. when meet 'or' conjunct: a or b, if b can apply index, return all 
rows, so b should not be extracted
+    //   2. when meet 'and' conjunct, function with column b can not apply 
inverted index
+    //      eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not 
for index, so b should not be extracted
+    //          but a and array_contains(b, 1), b can be applied inverted 
index, which b can be extracted
+    Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, 
std::pair<vectorized::NameAndTypePair,
+                                                         
segment_v2::InvertedIndexIterator*>>&
+                    colId_to_inverted_index_iter,
+            uint32_t num_rows, roaring::Roaring* bitmap) const override {
+        if (_op == TExprOpcode::COMPOUND_OR) {
+            for (auto child : _children) {
+                std::shared_ptr<roaring::Roaring> child_roaring =
+                        std::make_shared<roaring::Roaring>();
+                Status st = child->eval_inverted_index(context, 
colId_to_inverted_index_iter,
+                                                       num_rows, 
child_roaring.get());
+                if (!st.ok()) {
+                    continue;
+                }
+                *bitmap |= *child_roaring;
+                if (!child_roaring->isEmpty()) {
+                    // means inverted index filter do not reduce any rows
+                    // the left expr no need to be extracted by inverted index,
+                    // and cur roaring is all rows which means this inverted 
index is not useful,
+                    // do not need to calculate with res bitmap
+                    return Status::OK();
+                }
+            }
+        } else if (_op == TExprOpcode::COMPOUND_AND) {
+            for (auto child : _children) {
+                std::shared_ptr<roaring::Roaring> child_roaring =
+                        std::make_shared<roaring::Roaring>();
+                Status st = child->eval_inverted_index(context, 
colId_to_inverted_index_iter,
+                                                       num_rows, 
child_roaring.get());
+                if (!st.ok()) {
+                    continue;
+                }
+                *bitmap &= *child_roaring;
+                if (child_roaring->isEmpty()) {
+                    // the left expr no need to be extracted by inverted 
index, just return 0 rows
+                    // res bitmap will be zero
+                    return Status::OK();
+                }
+            }
+        } else if (_op == TExprOpcode::COMPOUND_NOT) {
+            std::shared_ptr<roaring::Roaring> child_roaring = 
std::make_shared<roaring::Roaring>();
+            Status st = _children[0]->eval_inverted_index(context, 
colId_to_inverted_index_iter,
+                                                          num_rows, 
child_roaring.get());
+            if (!st.ok()) {
+                return st;
+            }
+            *bitmap -= *child_roaring;

Review Comment:
   -= child_roaring or -child_roaring ?



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1209,6 +1226,34 @@ Status SegmentIterator::_apply_inverted_index() {
         }
     }
 
+    // support expr to evaluate inverted index
+    std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, 
InvertedIndexIterator*>>
+            iter_map;
+
+    for (auto col_id : _common_expr_columns) {
+        if (_check_apply_by_inverted_index(col_id)) {
+            iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id],
+                                              
_inverted_index_iterators[col_id].get());
+        }
+    }
+    for (auto expr_ctx : _common_expr_ctxs_push_down) {
+        // _inverted_index_iterators has all column ids which has inverted 
index
+        // _common_expr_columns has all column ids from 
_common_expr_ctxs_push_down
+        // if current bitmap is already empty just return
+        if (_row_bitmap.isEmpty()) {
+            break;
+        }
+        roaring::Roaring bitmap = _row_bitmap;

Review Comment:
   It's expensive to copy large bitmap.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to