xiaokang commented on code in PR #32620:
URL: https://github.com/apache/doris/pull/32620#discussion_r1548824626


##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,56 @@ class VCompoundPred : public VectorizedFnCall {
 
     const std::string& expr_name() const override { return _expr_name; }
 
+    bool is_all_ones(const roaring::Roaring& r) {
+        return r.contains(0);
+        for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != 
r.end(); ++i) {
+            if (*i == 0) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    //   1. when meet 'or' conjunct: a or b, if b can apply index, return all 
rows, so b should not be extracted
+    //   2. when meet 'and' conjunct, function with column b can not apply 
inverted index
+    //      eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not 
for index, so b should not be extracted
+    //          but a and array_contains(b, 1), b can be applied inverted 
index, which b can be extracted
+    Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, 
std::pair<vectorized::NameAndTypePair,
+                                                         
segment_v2::InvertedIndexIterator*>>&
+                    colId_invertedIndexIter_mapping,
+            uint32_t num_rows, roaring::Roaring* bitmap) const override {
+        if (_op == TExprOpcode::COMPOUND_OR) {
+            for (auto child : _children) {
+                Status st = child->eval_inverted_index(context, 
colId_invertedIndexIter_mapping,

Review Comment:
   It implies AND



##########
be/src/vec/functions/array/function_array_index.h:
##########
@@ -70,6 +74,11 @@ struct ArrayCountEqual {
     static constexpr void apply(ResultType& current, size_t j) noexcept { 
++current; }
 };
 
+struct ParamValue {
+    PrimitiveType type;
+    Field query_value;

Review Comment:
   just value to be more generic



##########
be/src/vec/exprs/vexpr_context.h:
##########
@@ -69,6 +70,21 @@ class VExprContext {
         return _fn_contexts[i].get();
     }
 
+    // execute expr with inverted index which column a, b has inverted indexes
+    //  but some situation although column b has indexes, but apply index is 
not useful, we should
+    //  skip this expr, just do not apply index anymore.
+    /**
+     * @param colId_invertedIndexIter_mapping contains all column id to 
inverted index iterator mapping from segmentIterator
+     * @param num_rows number of rows in one segment.
+     * @param bitmap roaring bitmap to store the result. 0 is present filed by 
index.
+     * @return status not ok means execute failed.
+     */
+    [[nodiscard]] Status eval_inverted_indexs(
+            const std::unordered_map<ColumnId, 
std::pair<vectorized::NameAndTypePair,
+                                                         
segment_v2::InvertedIndexIterator*>>&
+                    colId_invertedIndexIter_mapping,

Review Comment:
   use consistent variable name style: colid_to_inverted_index_iter



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1209,6 +1226,33 @@ Status SegmentIterator::_apply_inverted_index() {
         }
     }
 
+    // support expr to evaluate inverted index
+    std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, 
InvertedIndexIterator*>>
+            iter_map;
+
+    for (auto col_id : _common_expr_columns) {
+        if (_check_apply_by_inverted_index(col_id)) {
+            iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id],
+                                              
_inverted_index_iterators[col_id].get());
+        }
+    }
+    for (auto exprCtx : _common_expr_ctxs_push_down) {
+        // _inverted_index_iterators has all column ids which has inverted 
index
+        // _common_expr_columns has all column ids from 
_common_expr_ctxs_push_down
+        // if current bitmap is already empty just return
+        if (_row_bitmap.isEmpty()) {
+            break;
+        }
+        roaring::Roaring bitmap = _row_bitmap;
+        const Status st = exprCtx->eval_inverted_indexs(iter_map, num_rows(), 
&bitmap);
+        if (!st.ok()) {
+            LOG(WARNING) << "failed to evaluate index in expr" << 
exprCtx->root()->debug_string()
+                         << ", error msg: " << st;
+        } else {
+            _row_bitmap &= bitmap;

Review Comment:
   `_row_bitmap &= bitmap` implies that there is AND logic between 
_common_expr_ctxs_push_down and predicates. It's not always true.



##########
be/src/vec/functions/array/function_array_index.h:
##########
@@ -87,6 +96,58 @@ class FunctionArrayIndex : public IFunction {
 
     bool use_default_implementation_for_nulls() const override { return false; 
}
 
+    Status open(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            return Status::OK();

Review Comment:
   why?



##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,56 @@ class VCompoundPred : public VectorizedFnCall {
 
     const std::string& expr_name() const override { return _expr_name; }
 
+    bool is_all_ones(const roaring::Roaring& r) {
+        return r.contains(0);
+        for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != 
r.end(); ++i) {
+            if (*i == 0) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    //   1. when meet 'or' conjunct: a or b, if b can apply index, return all 
rows, so b should not be extracted
+    //   2. when meet 'and' conjunct, function with column b can not apply 
inverted index
+    //      eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not 
for index, so b should not be extracted
+    //          but a and array_contains(b, 1), b can be applied inverted 
index, which b can be extracted
+    Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, 
std::pair<vectorized::NameAndTypePair,
+                                                         
segment_v2::InvertedIndexIterator*>>&
+                    colId_invertedIndexIter_mapping,
+            uint32_t num_rows, roaring::Roaring* bitmap) const override {
+        if (_op == TExprOpcode::COMPOUND_OR) {
+            for (auto child : _children) {
+                Status st = child->eval_inverted_index(context, 
colId_invertedIndexIter_mapping,
+                                                       num_rows, bitmap);
+                if (!st.ok()) {
+                    return st;
+                }
+                if (!bitmap->contains(
+                            0)) { // the left expr no need to be extracted by 
inverted index
+                    return Status::OK();
+                }
+            }
+        } else if (_op == TExprOpcode::COMPOUND_AND) {
+            for (auto child : _children) {
+                Status st = child->eval_inverted_index(context, 
colId_invertedIndexIter_mapping,
+                                                       num_rows, bitmap);
+                if (!st.ok()) {
+                    return st;
+                }
+                if (bitmap->isEmpty()) { // the left expr no need to be 
extracted by inverted index
+                    return Status::OK();
+                }
+            }
+        } else {
+            return Status::InternalError(

Review Comment:
   process NOT



##########
be/src/olap/rowset/segment_v2/inverted_index_reader.h:
##########
@@ -72,6 +72,75 @@ class InvertedIndexIterator;
 class InvertedIndexQueryCacheHandle;
 class InvertedIndexFileReader;
 
+struct PrimitiveTypeConvertorHelper {

Review Comment:
   It can be moved to more generic file like PrimitiveTypeTraits.



##########
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:
##########
@@ -252,11 +252,12 @@ Status 
FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeSta
 
 Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* 
runtime_state,
                                   const std::string& column_name, const void* 
query_value,
-                                  InvertedIndexQueryType query_type,
+                                  PrimitiveType primitiveType, 
InvertedIndexQueryType query_type,
                                   std::shared_ptr<roaring::Roaring>& bit_map) {
     SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);
-
-    std::string search_str = reinterpret_cast<const 
StringRef*>(query_value)->to_string();
+    auto&& storage_value =

Review Comment:
   The duplicate code can be moved to 
`InvertedIndexIterator::read_from_inverted_index`



##########
be/src/vec/functions/function.h:
##########
@@ -395,6 +404,14 @@ class IFunction : public 
std::enable_shared_from_this<IFunction>,
         return Status::OK();
     }
 
+    Status eval_inverted_index(FunctionContext* context,
+                               const vectorized::NameAndTypePair& 
data_type_with_name,
+                               segment_v2::InvertedIndexIterator* iter, 
uint32_t num_rows,
+                               roaring::Roaring* bitmap) const override {
+        LOG(FATAL) << "eval_inverted_index is not implemented for IFunction";

Review Comment:
   Why not return ERROR



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to