xiaokang commented on code in PR #41286: URL: https://github.com/apache/doris/pull/41286#discussion_r1777868603
########## be/src/vec/functions/array/function_arrays_overlap.h: ########## @@ -128,6 +129,87 @@ class FunctionArraysOverlap : public IFunction { return make_nullable(std::make_shared<DataTypeUInt8>()); } + /** + * eval inverted index. we can filter array rows with inverted index iter + * array_overlap(array, []) -> array_overlap(array, const value) + */ + Status evaluate_inverted_index( + const ColumnsWithTypeAndName& arguments, + const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names, + std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows, + segment_v2::InvertedIndexResultBitmap& bitmap_result) const override { + DCHECK(arguments.size() == 1); + DCHECK(data_type_with_names.size() == 1); + DCHECK(iterators.size() == 1); + auto* iter = iterators[0]; + if (iter == nullptr) { + return Status::OK(); + } + auto data_type_with_name = data_type_with_names[0]; + if (iter->get_inverted_index_reader_type() == + segment_v2::InvertedIndexReaderType::FULLTEXT) { + return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>( + "Inverted index evaluate skipped, FULLTEXT reader can not support " + "array_overlap"); + } + // in arrays_overlap param is array Field and const Field + ColumnPtr arg_column = arguments[0].column; + DataTypePtr arg_type = arguments[0].type; + if ((is_column_nullable(*arg_column) && !is_column_const(*remove_nullable(arg_column))) || + !is_column_const(*arg_column)) { Review Comment: logic error: if arg_column is nullable , it's always true. ########## be/src/vec/functions/array/function_arrays_overlap.h: ########## @@ -128,6 +129,87 @@ class FunctionArraysOverlap : public IFunction { return make_nullable(std::make_shared<DataTypeUInt8>()); } + /** + * eval inverted index. we can filter array rows with inverted index iter + * array_overlap(array, []) -> array_overlap(array, const value) + */ + Status evaluate_inverted_index( + const ColumnsWithTypeAndName& arguments, + const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names, + std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows, + segment_v2::InvertedIndexResultBitmap& bitmap_result) const override { + DCHECK(arguments.size() == 1); + DCHECK(data_type_with_names.size() == 1); + DCHECK(iterators.size() == 1); + auto* iter = iterators[0]; + if (iter == nullptr) { + return Status::OK(); + } + auto data_type_with_name = data_type_with_names[0]; + if (iter->get_inverted_index_reader_type() == + segment_v2::InvertedIndexReaderType::FULLTEXT) { + return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>( + "Inverted index evaluate skipped, FULLTEXT reader can not support " + "array_overlap"); + } + // in arrays_overlap param is array Field and const Field + ColumnPtr arg_column = arguments[0].column; + DataTypePtr arg_type = arguments[0].type; + if ((is_column_nullable(*arg_column) && !is_column_const(*remove_nullable(arg_column))) || + !is_column_const(*arg_column)) { + // if not we should skip inverted index and evaluate in expression + return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>( + "Inverted index evaluate skipped, array_overlap only support const value"); + } + + Field param_value; + arguments[0].column->get(0, param_value); + DCHECK(is_array(remove_nullable(arguments[0].type))); + auto nested_param_type = + check_and_get_data_type<DataTypeArray>(remove_nullable(arguments[0].type).get()) + ->get_nested_type() + ->get_type_as_type_descriptor() + .type; + // The current implementation for the inverted index of arrays cannot handle cases where the array contains null values, + // meaning an item in the array is null. + if (param_value.is_null()) { + return Status::OK(); + } + std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); + std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>(); + if (iter->has_null()) { + segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; + RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); + null_bitmap = null_bitmap_cache_handle.get_bitmap(); + } + std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr; + const Array& query_val = param_value.get<Array>(); + for (size_t i = 0; i < query_val.size(); ++i) { + Field nested_query_val = query_val[i]; + std::shared_ptr<roaring::Roaring> single_res = std::make_shared<roaring::Roaring>(); + RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( + nested_param_type, &nested_query_val, query_param)); + Status st = iter->read_from_inverted_index( + data_type_with_name.first, query_param->get_value(), + segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, single_res); + if (st.code() == ErrorCode::INVERTED_INDEX_NO_TERMS) { + // if analyzed param with no term, we do not filter any rows + // return all rows with OK status + roaring->addRange(0, num_rows); + break; + } else if (st != Status::OK()) { + return st; + } + *roaring |= *single_res; + } + + segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); + bitmap_result = result; Review Comment: Should you overrite bitmap_result or & ? ########## be/src/vec/functions/array/function_arrays_overlap.h: ########## @@ -128,6 +129,87 @@ class FunctionArraysOverlap : public IFunction { return make_nullable(std::make_shared<DataTypeUInt8>()); } + /** + * eval inverted index. we can filter array rows with inverted index iter + * array_overlap(array, []) -> array_overlap(array, const value) + */ + Status evaluate_inverted_index( + const ColumnsWithTypeAndName& arguments, + const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names, + std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows, + segment_v2::InvertedIndexResultBitmap& bitmap_result) const override { + DCHECK(arguments.size() == 1); + DCHECK(data_type_with_names.size() == 1); + DCHECK(iterators.size() == 1); + auto* iter = iterators[0]; + if (iter == nullptr) { + return Status::OK(); + } + auto data_type_with_name = data_type_with_names[0]; + if (iter->get_inverted_index_reader_type() == + segment_v2::InvertedIndexReaderType::FULLTEXT) { + return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>( + "Inverted index evaluate skipped, FULLTEXT reader can not support " + "array_overlap"); + } + // in arrays_overlap param is array Field and const Field + ColumnPtr arg_column = arguments[0].column; + DataTypePtr arg_type = arguments[0].type; + if ((is_column_nullable(*arg_column) && !is_column_const(*remove_nullable(arg_column))) || + !is_column_const(*arg_column)) { + // if not we should skip inverted index and evaluate in expression + return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>( + "Inverted index evaluate skipped, array_overlap only support const value"); + } + + Field param_value; + arguments[0].column->get(0, param_value); + DCHECK(is_array(remove_nullable(arguments[0].type))); + auto nested_param_type = + check_and_get_data_type<DataTypeArray>(remove_nullable(arguments[0].type).get()) + ->get_nested_type() + ->get_type_as_type_descriptor() + .type; + // The current implementation for the inverted index of arrays cannot handle cases where the array contains null values, + // meaning an item in the array is null. + if (param_value.is_null()) { + return Status::OK(); + } + std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); + std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>(); + if (iter->has_null()) { + segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; + RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); + null_bitmap = null_bitmap_cache_handle.get_bitmap(); + } + std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr; + const Array& query_val = param_value.get<Array>(); + for (size_t i = 0; i < query_val.size(); ++i) { + Field nested_query_val = query_val[i]; + std::shared_ptr<roaring::Roaring> single_res = std::make_shared<roaring::Roaring>(); + RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( + nested_param_type, &nested_query_val, query_param)); + Status st = iter->read_from_inverted_index( + data_type_with_name.first, query_param->get_value(), + segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, single_res); + if (st.code() == ErrorCode::INVERTED_INDEX_NO_TERMS) { + // if analyzed param with no term, we do not filter any rows Review Comment: Why do you need `analyze` for array_overlaps ? ########## be/src/vec/functions/array/function_arrays_overlap.h: ########## @@ -128,6 +129,87 @@ class FunctionArraysOverlap : public IFunction { return make_nullable(std::make_shared<DataTypeUInt8>()); } + /** + * eval inverted index. we can filter array rows with inverted index iter + * array_overlap(array, []) -> array_overlap(array, const value) Review Comment: What do you mean by `array_overlap(array, []) -> array_overlap(array, const value)` ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org