This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 603452d8804 [test](inverted index) add an Inverted Index Testing Switch (#38077) 603452d8804 is described below commit 603452d880445fba4bbf83df613a7cc3ecf08ac6 Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Fri Jul 19 18:21:26 2024 +0800 [test](inverted index) add an Inverted Index Testing Switch (#38077) ## Proposed changes 1. enable_fallback_on_missing_inverted_index and enable_match_without_inverted_index,Default is true. --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +- be/src/vec/functions/match.cpp | 60 ++++++++++++---------- be/src/vec/functions/match.h | 45 ++++++++-------- .../java/org/apache/doris/qe/SessionVariable.java | 21 ++++++++ gensrc/thrift/PaloInternalService.thrift | 6 ++- .../inverted_index_p0/test_no_index_match.groovy | 16 +++++- 6 files changed, 101 insertions(+), 51 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 61be47cced7..f05dfc44251 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -996,7 +996,9 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { } bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining) { - if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND || + bool is_fallback = + _opts.runtime_state->query_options().enable_fallback_on_missing_inverted_index; + if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && is_fallback) || res.code() == ErrorCode::INVERTED_INDEX_BYPASS || res.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED || (res.code() == ErrorCode::INVERTED_INDEX_NO_TERMS && need_remaining)) { diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp index fb70a684cb1..525e5b1fc2d 100644 --- a/be/src/vec/functions/match.cpp +++ b/be/src/vec/functions/match.cpp @@ -95,7 +95,7 @@ Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block, // set default value to 0, and match functions only need to set 1/true vec_res.resize_fill(input_rows_count); RETURN_IF_ERROR(execute_match( - column_name, match_query_str, input_rows_count, values, inverted_index_ctx, + context, column_name, match_query_str, input_rows_count, values, inverted_index_ctx, (array_col ? &(array_col->get_offsets()) : nullptr), vec_res)); block.replace_by_position(result, std::move(res)); } else { @@ -116,6 +116,10 @@ inline doris::segment_v2::InvertedIndexQueryType FunctionMatchBase::get_query_ty return doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY; } else if (fn_name == MATCH_PHRASE_FUNCTION) { return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY; + } else if (fn_name == MATCH_PHRASE_PREFIX_FUNCTION) { + return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY; + } else if (fn_name == MATCH_PHRASE_REGEXP_FUNCTION) { + return doris::segment_v2::InvertedIndexQueryType::MATCH_REGEXP_QUERY; } return doris::segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY; } @@ -151,16 +155,27 @@ inline std::vector<std::string> FunctionMatchBase::analyse_data_token( return data_tokens; } -Status FunctionMatchAny::execute_match(const std::string& column_name, +Status FunctionMatchBase::check(FunctionContext* context, const std::string& function_name) const { + if (!context->state()->query_options().enable_match_without_inverted_index) { + return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( + "{} not support execute_match", function_name); + } + + DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", { + return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( + "{} not support execute_match", function_name); + }); + + return Status::OK(); +} + +Status FunctionMatchAny::execute_match(FunctionContext* context, const std::string& column_name, const std::string& match_query_str, size_t input_rows_count, const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const { - DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", { - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "FunctionMatchAny not support execute_match"); - }) + RETURN_IF_ERROR(check(context, name)); doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; if (inverted_index_ctx) { @@ -201,16 +216,13 @@ Status FunctionMatchAny::execute_match(const std::string& column_name, return Status::OK(); } -Status FunctionMatchAll::execute_match(const std::string& column_name, +Status FunctionMatchAll::execute_match(FunctionContext* context, const std::string& column_name, const std::string& match_query_str, size_t input_rows_count, const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const { - DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", { - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "FunctionMatchAll not support execute_match"); - }) + RETURN_IF_ERROR(check(context, name)); doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; if (inverted_index_ctx) { @@ -257,16 +269,13 @@ Status FunctionMatchAll::execute_match(const std::string& column_name, return Status::OK(); } -Status FunctionMatchPhrase::execute_match(const std::string& column_name, +Status FunctionMatchPhrase::execute_match(FunctionContext* context, const std::string& column_name, const std::string& match_query_str, size_t input_rows_count, const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const { - DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", { - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "FunctionMatchPhrase not support execute_match"); - }) + RETURN_IF_ERROR(check(context, name)); doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; if (inverted_index_ctx) { @@ -330,13 +339,11 @@ Status FunctionMatchPhrase::execute_match(const std::string& column_name, } Status FunctionMatchPhrasePrefix::execute_match( - const std::string& column_name, const std::string& match_query_str, size_t input_rows_count, - const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, - const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const { - DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", { - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "FunctionMatchPhrasePrefix not support execute_match"); - }) + FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, const ColumnString* string_col, + InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, + ColumnUInt8::Container& result) const { + RETURN_IF_ERROR(check(context, name)); doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; if (inverted_index_ctx) { @@ -399,16 +406,13 @@ Status FunctionMatchPhrasePrefix::execute_match( return Status::OK(); } -Status FunctionMatchRegexp::execute_match(const std::string& column_name, +Status FunctionMatchRegexp::execute_match(FunctionContext* context, const std::string& column_name, const std::string& match_query_str, size_t input_rows_count, const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const { - DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", { - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "FunctionMatchRegexp not support execute_match"); - }) + RETURN_IF_ERROR(check(context, name)); doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; if (inverted_index_ctx) { diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h index aaa7d206c03..1265980987c 100644 --- a/be/src/vec/functions/match.h +++ b/be/src/vec/functions/match.h @@ -53,6 +53,8 @@ namespace doris::vectorized { const std::string MATCH_ANY_FUNCTION = "match_any"; const std::string MATCH_ALL_FUNCTION = "match_all"; const std::string MATCH_PHRASE_FUNCTION = "match_phrase"; +const std::string MATCH_PHRASE_PREFIX_FUNCTION = "match_phrase_prefix"; +const std::string MATCH_PHRASE_REGEXP_FUNCTION = "match_regexp"; class FunctionMatchBase : public IFunction { public: @@ -70,8 +72,9 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override; - virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, - size_t input_rows_count, const ColumnString* string_col, + virtual Status execute_match(FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const = 0; @@ -84,6 +87,8 @@ public: int32_t current_block_row_idx, const ColumnArray::Offsets64* array_offsets, int32_t& current_src_array_offset) const; + + Status check(FunctionContext* context, const std::string& function_name) const; }; class FunctionMatchAny : public FunctionMatchBase { @@ -93,9 +98,9 @@ public: String get_name() const override { return name; } - Status execute_match(const std::string& column_name, const std::string& match_query_str, - size_t input_rows_count, const ColumnString* string_col, - InvertedIndexCtx* inverted_index_ctx, + Status execute_match(FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const override; }; @@ -107,9 +112,9 @@ public: String get_name() const override { return name; } - Status execute_match(const std::string& column_name, const std::string& match_query_str, - size_t input_rows_count, const ColumnString* string_col, - InvertedIndexCtx* inverted_index_ctx, + Status execute_match(FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const override; }; @@ -121,9 +126,9 @@ public: String get_name() const override { return name; } - Status execute_match(const std::string& column_name, const std::string& match_query_str, - size_t input_rows_count, const ColumnString* string_col, - InvertedIndexCtx* inverted_index_ctx, + Status execute_match(FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const override; }; @@ -135,9 +140,9 @@ public: String get_name() const override { return name; } - Status execute_match(const std::string& column_name, const std::string& match_query_str, - size_t input_rows_count, const ColumnString* string_col, - InvertedIndexCtx* inverted_index_ctx, + Status execute_match(FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const override; }; @@ -149,9 +154,9 @@ public: String get_name() const override { return name; } - Status execute_match(const std::string& column_name, const std::string& match_query_str, - size_t input_rows_count, const ColumnString* string_col, - InvertedIndexCtx* inverted_index_ctx, + Status execute_match(FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const override; }; @@ -163,9 +168,9 @@ public: String get_name() const override { return name; } - Status execute_match(const std::string& column_name, const std::string& match_query_str, - size_t input_rows_count, const ColumnString* string_col, - InvertedIndexCtx* inverted_index_ctx, + Status execute_match(FunctionContext* context, const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& result) const override { return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 33f143eb98e..b49c8a27bc6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -633,6 +633,9 @@ public class SessionVariable implements Serializable, Writable { public static final String DISABLE_EMPTY_PARTITION_PRUNE = "disable_empty_partition_prune"; // CLOUD_VARIABLES_BEGIN + public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index"; + public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index"; + /** * If set false, user couldn't submit analyze SQL and FE won't allocate any related resources. */ @@ -2036,6 +2039,20 @@ public class SessionVariable implements Serializable, Writable { }) public boolean enableESParallelScroll = true; + @VariableMgr.VarAttr(name = ENABLE_MATCH_WITHOUT_INVERTED_INDEX, description = { + "开启无索引match查询功能,建议正式环境保持开启", + "Enable no-index match query functionality." + + " it is recommended to keep this enabled in the production environment." + }) + public boolean enableMatchWithoutInvertedIndex = true; + + @VariableMgr.VarAttr(name = ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX, description = { + "开启后在没有找到索引的情况下直接查询报错,建议正式环境保持开启", + "After enabling, it will directly query and report an error if no index is found." + + " It is recommended to keep this enabled in the production environment." + }) + public boolean enableFallbackOnMissingInvertedIndex = true; + public void setEnableEsParallelScroll(boolean enableESParallelScroll) { this.enableESParallelScroll = enableESParallelScroll; } @@ -3606,6 +3623,10 @@ public class SessionVariable implements Serializable, Writable { tResult.setEnableShortCircuitQueryAccessColumnStore(enableShortCircuitQueryAcessColumnStore); tResult.setReadCsvEmptyLineAsNull(readCsvEmptyLineAsNull); tResult.setSerdeDialect(getSerdeDialect()); + + tResult.setEnableMatchWithoutInvertedIndex(enableMatchWithoutInvertedIndex); + tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex); + tResult.setKeepCarriageReturn(keepCarriageReturn); return tResult; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 9300c22a398..1d91c855139 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -317,7 +317,11 @@ struct TQueryOptions { 118: optional TSerdeDialect serde_dialect = TSerdeDialect.DORIS; - 119: optional bool keep_carriage_return = false; // \n,\r\n split line in CSV. + 119: optional bool enable_match_without_inverted_index = true; + + 120: optional bool enable_fallback_on_missing_inverted_index = true; + + 121: optional bool keep_carriage_return = false; // \n,\r\n split line in CSV. // For cloud, to control if the content would be written into file cache 1000: optional bool disable_file_cache = false } diff --git a/regression-test/suites/inverted_index_p0/test_no_index_match.groovy b/regression-test/suites/inverted_index_p0/test_no_index_match.groovy index 0676fd4fc9d..cfa94b514a7 100644 --- a/regression-test/suites/inverted_index_p0/test_no_index_match.groovy +++ b/regression-test/suites/inverted_index_p0/test_no_index_match.groovy @@ -95,7 +95,21 @@ suite("test_no_index_match", "p0") { qt_sql """ select count() from ${testTable_unique} where (request match_phrase '欧冶工业品'); """ qt_sql """ select count() from ${testTable_unique} where (request match_phrase_prefix '欧冶工业品'); """ } finally { - } + } + + try { + """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable_unique} where (request match_phrase 'hm bg'); """ + } catch (Exception e) { + log.info(e.getMessage()); + assertTrue(e.getMessage().contains("match_phrase not support execute_match")) + } + + try { + """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable_unique} where (request match_phrase_prefix 'hm b'); """ + } catch (Exception e) { + log.info(e.getMessage()); + assertTrue(e.getMessage().contains("match_phrase_prefix not support execute_match")) + } } finally { } } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org