xiaokang commented on code in PR #32620: URL: https://github.com/apache/doris/pull/32620#discussion_r1548824626
########## be/src/vec/exprs/vcompound_pred.h: ########## @@ -53,6 +53,56 @@ class VCompoundPred : public VectorizedFnCall { const std::string& expr_name() const override { return _expr_name; } + bool is_all_ones(const roaring::Roaring& r) { + return r.contains(0); + for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != r.end(); ++i) { + if (*i == 0) { + return false; + } + } + return true; + } + + // 1. when meet 'or' conjunct: a or b, if b can apply index, return all rows, so b should not be extracted + // 2. when meet 'and' conjunct, function with column b can not apply inverted index + // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not for index, so b should not be extracted + // but a and array_contains(b, 1), b can be applied inverted index, which b can be extracted + Status eval_inverted_index( + VExprContext* context, + const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, + segment_v2::InvertedIndexIterator*>>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap) const override { + if (_op == TExprOpcode::COMPOUND_OR) { + for (auto child : _children) { + Status st = child->eval_inverted_index(context, colId_invertedIndexIter_mapping, Review Comment: It implies AND ########## be/src/vec/functions/array/function_array_index.h: ########## @@ -70,6 +74,11 @@ struct ArrayCountEqual { static constexpr void apply(ResultType& current, size_t j) noexcept { ++current; } }; +struct ParamValue { + PrimitiveType type; + Field query_value; Review Comment: just value to be more generic ########## be/src/vec/exprs/vexpr_context.h: ########## @@ -69,6 +70,21 @@ class VExprContext { return _fn_contexts[i].get(); } + // execute expr with inverted index which column a, b has inverted indexes + // but some situation although column b has indexes, but apply index is not useful, we should + // skip this expr, just do not apply index anymore. + /** + * @param colId_invertedIndexIter_mapping contains all column id to inverted index iterator mapping from segmentIterator + * @param num_rows number of rows in one segment. + * @param bitmap roaring bitmap to store the result. 0 is present filed by index. + * @return status not ok means execute failed. + */ + [[nodiscard]] Status eval_inverted_indexs( + const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, + segment_v2::InvertedIndexIterator*>>& + colId_invertedIndexIter_mapping, Review Comment: use consistent variable name style: colid_to_inverted_index_iter ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1209,6 +1226,33 @@ Status SegmentIterator::_apply_inverted_index() { } } + // support expr to evaluate inverted index + std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, InvertedIndexIterator*>> + iter_map; + + for (auto col_id : _common_expr_columns) { + if (_check_apply_by_inverted_index(col_id)) { + iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id], + _inverted_index_iterators[col_id].get()); + } + } + for (auto exprCtx : _common_expr_ctxs_push_down) { + // _inverted_index_iterators has all column ids which has inverted index + // _common_expr_columns has all column ids from _common_expr_ctxs_push_down + // if current bitmap is already empty just return + if (_row_bitmap.isEmpty()) { + break; + } + roaring::Roaring bitmap = _row_bitmap; + const Status st = exprCtx->eval_inverted_indexs(iter_map, num_rows(), &bitmap); + if (!st.ok()) { + LOG(WARNING) << "failed to evaluate index in expr" << exprCtx->root()->debug_string() + << ", error msg: " << st; + } else { + _row_bitmap &= bitmap; Review Comment: `_row_bitmap &= bitmap` implies that there is AND logic between _common_expr_ctxs_push_down and predicates. It's not always true. ########## be/src/vec/functions/array/function_array_index.h: ########## @@ -87,6 +96,58 @@ class FunctionArrayIndex : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } + Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::THREAD_LOCAL) { + return Status::OK(); Review Comment: why? ########## be/src/vec/exprs/vcompound_pred.h: ########## @@ -53,6 +53,56 @@ class VCompoundPred : public VectorizedFnCall { const std::string& expr_name() const override { return _expr_name; } + bool is_all_ones(const roaring::Roaring& r) { + return r.contains(0); + for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != r.end(); ++i) { + if (*i == 0) { + return false; + } + } + return true; + } + + // 1. when meet 'or' conjunct: a or b, if b can apply index, return all rows, so b should not be extracted + // 2. when meet 'and' conjunct, function with column b can not apply inverted index + // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not for index, so b should not be extracted + // but a and array_contains(b, 1), b can be applied inverted index, which b can be extracted + Status eval_inverted_index( + VExprContext* context, + const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, + segment_v2::InvertedIndexIterator*>>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap) const override { + if (_op == TExprOpcode::COMPOUND_OR) { + for (auto child : _children) { + Status st = child->eval_inverted_index(context, colId_invertedIndexIter_mapping, + num_rows, bitmap); + if (!st.ok()) { + return st; + } + if (!bitmap->contains( + 0)) { // the left expr no need to be extracted by inverted index + return Status::OK(); + } + } + } else if (_op == TExprOpcode::COMPOUND_AND) { + for (auto child : _children) { + Status st = child->eval_inverted_index(context, colId_invertedIndexIter_mapping, + num_rows, bitmap); + if (!st.ok()) { + return st; + } + if (bitmap->isEmpty()) { // the left expr no need to be extracted by inverted index + return Status::OK(); + } + } + } else { + return Status::InternalError( Review Comment: process NOT ########## be/src/olap/rowset/segment_v2/inverted_index_reader.h: ########## @@ -72,6 +72,75 @@ class InvertedIndexIterator; class InvertedIndexQueryCacheHandle; class InvertedIndexFileReader; +struct PrimitiveTypeConvertorHelper { Review Comment: It can be moved to more generic file like PrimitiveTypeTraits. ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -252,11 +252,12 @@ Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeSta Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr<roaring::Roaring>& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); - - std::string search_str = reinterpret_cast<const StringRef*>(query_value)->to_string(); + auto&& storage_value = Review Comment: The duplicate code can be moved to `InvertedIndexIterator::read_from_inverted_index` ########## be/src/vec/functions/function.h: ########## @@ -395,6 +404,14 @@ class IFunction : public std::enable_shared_from_this<IFunction>, return Status::OK(); } + Status eval_inverted_index(FunctionContext* context, + const vectorized::NameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + roaring::Roaring* bitmap) const override { + LOG(FATAL) << "eval_inverted_index is not implemented for IFunction"; Review Comment: Why not return ERROR -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org