This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new a4324085138 [fix] (inverted index) Fix match function without inverted 
index (#38989) (#39221)
a4324085138 is described below

commit a4324085138c071c93eaaa5bea890981d559d651
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Tue Aug 13 17:18:54 2024 +0800

    [fix] (inverted index) Fix match function without inverted index (#38989) 
(#39221)
    
    ## Proposed changes
    
    pick from #38989
---
 be/src/vec/exprs/vmatch_predicate.cpp              |   8 ++
 be/src/vec/functions/match.cpp                     | 108 +++++++++------------
 be/src/vec/functions/match.h                       |   4 +
 .../apache/doris/analysis/InvertedIndexUtil.java   |  12 +++
 .../org/apache/doris/analysis/MatchPredicate.java  |  25 +++--
 .../main/java/org/apache/doris/catalog/Index.java  |  12 +++
 .../glue/translator/ExpressionTranslator.java      |  15 +--
 gensrc/thrift/Exprs.thrift                         |   2 +
 .../test_match_without_index.groovy                |  89 +++++++++++++++++
 9 files changed, 191 insertions(+), 84 deletions(-)

diff --git a/be/src/vec/exprs/vmatch_predicate.cpp 
b/be/src/vec/exprs/vmatch_predicate.cpp
index 757847707a2..9743edfb29b 100644
--- a/be/src/vec/exprs/vmatch_predicate.cpp
+++ b/be/src/vec/exprs/vmatch_predicate.cpp
@@ -17,6 +17,7 @@
 
 #include "vec/exprs/vmatch_predicate.h"
 
+#include <CLucene.h> // IWYU pragma: keep
 #include <fmt/format.h>
 #include <fmt/ranges.h> // IWYU pragma: keep
 #include <gen_cpp/Exprs_types.h>
@@ -29,6 +30,7 @@
 #include <string_view>
 #include <vector>
 
+#include "CLucene/analysis/standard95/StandardTokenizer.h"
 #include "common/status.h"
 #include "olap/rowset/segment_v2/inverted_index_reader.h"
 #include "vec/core/block.h"
@@ -53,6 +55,12 @@ VMatchPredicate::VMatchPredicate(const TExprNode& node) : 
VExpr(node) {
     _inverted_index_ctx->parser_mode = node.match_predicate.parser_mode;
     _inverted_index_ctx->char_filter_map = 
node.match_predicate.char_filter_map;
     _analyzer = 
InvertedIndexReader::create_analyzer(_inverted_index_ctx.get());
+    _analyzer->set_lowercase(node.match_predicate.parser_lowercase);
+    if (node.match_predicate.parser_stopwords == "none") {
+        _analyzer->set_stopwords(nullptr);
+    } else {
+        _analyzer->set_stopwords(&lucene::analysis::standard95::stop_words);
+    }
     _inverted_index_ctx->analyzer = _analyzer.get();
 }
 
diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index c6c9d2b0bfb..33500b61b32 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -123,6 +123,23 @@ inline doris::segment_v2::InvertedIndexQueryType 
FunctionMatchBase::get_query_ty
     return doris::segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY;
 }
 
+void FunctionMatchBase::analyse_query_str_token(std::vector<std::string>* 
query_tokens,
+                                                InvertedIndexCtx* 
inverted_index_ctx,
+                                                const std::string& 
match_query_str,
+                                                const std::string& 
column_name) {
+    VLOG_DEBUG << "begin to run " << get_name() << ", parser_type: "
+               << 
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type);
+    if (inverted_index_ctx->parser_type == 
InvertedIndexParserType::PARSER_NONE) {
+        query_tokens->emplace_back(match_query_str);
+        return;
+    }
+    auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
+                                                                        
match_query_str);
+    doris::segment_v2::InvertedIndexReader::get_analyse_result(
+            *query_tokens, reader.get(), inverted_index_ctx->analyzer, 
column_name,
+            get_query_type_from_fn_name());
+}
+
 inline std::vector<std::string> FunctionMatchBase::analyse_data_token(
         const std::string& column_name, InvertedIndexCtx* inverted_index_ctx,
         const ColumnString* string_col, int32_t current_block_row_idx,
@@ -133,10 +150,15 @@ inline std::vector<std::string> 
FunctionMatchBase::analyse_data_token(
         for (auto next_src_array_offset = 
(*array_offsets)[current_block_row_idx];
              current_src_array_offset < next_src_array_offset; 
++current_src_array_offset) {
             const auto& str_ref = 
string_col->get_data_at(current_src_array_offset);
+            if (inverted_index_ctx->parser_type == 
InvertedIndexParserType::PARSER_NONE) {
+                data_tokens.emplace_back(str_ref.to_string());
+                continue;
+            }
             auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(
                     inverted_index_ctx, str_ref.to_string());
 
             std::vector<std::string> element_tokens;
+
             doris::segment_v2::InvertedIndexReader::get_analyse_result(
                     element_tokens, reader.get(), 
inverted_index_ctx->analyzer, column_name,
                     query_type, false);
@@ -144,12 +166,15 @@ inline std::vector<std::string> 
FunctionMatchBase::analyse_data_token(
         }
     } else {
         const auto& str_ref = string_col->get_data_at(current_block_row_idx);
-        auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
-                                                                            
str_ref.to_string());
-
-        
doris::segment_v2::InvertedIndexReader::get_analyse_result(data_tokens, 
reader.get(),
-                                                                   
inverted_index_ctx->analyzer,
-                                                                   
column_name, query_type, false);
+        if (inverted_index_ctx->parser_type == 
InvertedIndexParserType::PARSER_NONE) {
+            data_tokens.emplace_back(str_ref.to_string());
+        } else {
+            auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(
+                    inverted_index_ctx, str_ref.to_string());
+            doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                    data_tokens, reader.get(), inverted_index_ctx->analyzer, 
column_name,
+                    query_type, false);
+        }
     }
     return data_tokens;
 }
@@ -176,23 +201,14 @@ Status FunctionMatchAny::execute_match(FunctionContext* 
context, const std::stri
                                        ColumnUInt8::Container& result) {
     RETURN_IF_ERROR(check(context, name));
 
-    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
-    if (inverted_index_ctx) {
-        parser_type = inverted_index_ctx->parser_type;
-    }
-    VLOG_DEBUG << "begin to run FunctionMatchAny::execute_match, parser_type: "
-               << inverted_index_parser_type_to_string(parser_type);
-    auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
-                                                                        
match_query_str);
     std::vector<std::string> query_tokens;
-    doris::segment_v2::InvertedIndexReader::get_analyse_result(
-            query_tokens, reader.get(), inverted_index_ctx->analyzer, 
column_name,
-            doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY);
+    analyse_query_str_token(&query_tokens, inverted_index_ctx, 
match_query_str, column_name);
     if (query_tokens.empty()) {
         LOG(WARNING) << fmt::format(
                 "token parser result is empty for query, "
                 "please check your query: '{}' and index parser: '{}'",
-                match_query_str, 
inverted_index_parser_type_to_string(parser_type));
+                match_query_str,
+                
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
         return Status::OK();
     }
 
@@ -223,23 +239,14 @@ Status FunctionMatchAll::execute_match(FunctionContext* 
context, const std::stri
                                        ColumnUInt8::Container& result) {
     RETURN_IF_ERROR(check(context, name));
 
-    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
-    if (inverted_index_ctx) {
-        parser_type = inverted_index_ctx->parser_type;
-    }
-    VLOG_DEBUG << "begin to run FunctionMatchAll::execute_match, parser_type: "
-               << inverted_index_parser_type_to_string(parser_type);
-    auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
-                                                                        
match_query_str);
     std::vector<std::string> query_tokens;
-    doris::segment_v2::InvertedIndexReader::get_analyse_result(
-            query_tokens, reader.get(), inverted_index_ctx->analyzer, 
column_name,
-            doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY);
+    analyse_query_str_token(&query_tokens, inverted_index_ctx, 
match_query_str, column_name);
     if (query_tokens.empty()) {
         LOG(WARNING) << fmt::format(
                 "token parser result is empty for query, "
                 "please check your query: '{}' and index parser: '{}'",
-                match_query_str, 
inverted_index_parser_type_to_string(parser_type));
+                match_query_str,
+                
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
         return Status::OK();
     }
 
@@ -276,23 +283,14 @@ Status 
FunctionMatchPhrase::execute_match(FunctionContext* context, const std::s
                                           ColumnUInt8::Container& result) {
     RETURN_IF_ERROR(check(context, name));
 
-    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
-    if (inverted_index_ctx) {
-        parser_type = inverted_index_ctx->parser_type;
-    }
-    VLOG_DEBUG << "begin to run FunctionMatchPhrase::execute_match, 
parser_type: "
-               << inverted_index_parser_type_to_string(parser_type);
-    auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
-                                                                        
match_query_str);
     std::vector<std::string> query_tokens;
-    doris::segment_v2::InvertedIndexReader::get_analyse_result(
-            query_tokens, reader.get(), inverted_index_ctx->analyzer, 
column_name,
-            doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
+    analyse_query_str_token(&query_tokens, inverted_index_ctx, 
match_query_str, column_name);
     if (query_tokens.empty()) {
         VLOG_DEBUG << fmt::format(
                 "token parser result is empty for query, "
                 "please check your query: '{}' and index parser: '{}'",
-                match_query_str, 
inverted_index_parser_type_to_string(parser_type));
+                match_query_str,
+                
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
         return Status::OK();
     }
 
@@ -344,25 +342,14 @@ Status FunctionMatchPhrasePrefix::execute_match(
         ColumnUInt8::Container& result) {
     RETURN_IF_ERROR(check(context, name));
 
-    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
-    if (inverted_index_ctx) {
-        parser_type = inverted_index_ctx->parser_type;
-    }
-    VLOG_DEBUG << "begin to run FunctionMatchPhrasePrefix::execute_match, 
parser_type: "
-               << inverted_index_parser_type_to_string(parser_type);
-
-    auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
-                                                                        
match_query_str);
     std::vector<std::string> query_tokens;
-    doris::segment_v2::InvertedIndexReader::get_analyse_result(
-            query_tokens, reader.get(), inverted_index_ctx->analyzer, 
column_name,
-            
doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY);
-
+    analyse_query_str_token(&query_tokens, inverted_index_ctx, 
match_query_str, column_name);
     if (query_tokens.empty()) {
         VLOG_DEBUG << fmt::format(
                 "token parser result is empty for query, "
                 "please check your query: '{}' and index parser: '{}'",
-                match_query_str, 
inverted_index_parser_type_to_string(parser_type));
+                match_query_str,
+                
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
         return Status::OK();
     }
 
@@ -413,18 +400,15 @@ Status 
FunctionMatchRegexp::execute_match(FunctionContext* context, const std::s
                                           ColumnUInt8::Container& result) {
     RETURN_IF_ERROR(check(context, name));
 
-    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
-    if (inverted_index_ctx) {
-        parser_type = inverted_index_ctx->parser_type;
-    }
     VLOG_DEBUG << "begin to run FunctionMatchRegexp::execute_match, 
parser_type: "
-               << inverted_index_parser_type_to_string(parser_type);
+               << 
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type);
 
     if (match_query_str.empty()) {
         VLOG_DEBUG << fmt::format(
                 "token parser result is empty for query, "
                 "please check your query: '{}' and index parser: '{}'",
-                match_query_str, 
inverted_index_parser_type_to_string(parser_type));
+                match_query_str,
+                
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
         return Status::OK();
     }
 
diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h
index 167d974c80c..10710998b11 100644
--- a/be/src/vec/functions/match.h
+++ b/be/src/vec/functions/match.h
@@ -81,6 +81,10 @@ public:
 
     doris::segment_v2::InvertedIndexQueryType get_query_type_from_fn_name();
 
+    void analyse_query_str_token(std::vector<std::string>* query_tokens,
+                                 InvertedIndexCtx* inverted_index_ctx,
+                                 const std::string& match_query_str, const 
std::string& field_name);
+
     std::vector<std::string> analyse_data_token(const std::string& column_name,
                                                 InvertedIndexCtx* 
inverted_index_ctx,
                                                 const ColumnString* string_col,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
index ea06db40c1d..2e71596b4b2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
@@ -101,6 +101,18 @@ public class InvertedIndexUtil {
         return charFilterMap;
     }
 
+    public static boolean getInvertedIndexParserLowercase(Map<String, String> 
properties) {
+        String lowercase = properties == null ? null : 
properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
+        // default is true if not set
+        return lowercase != null ? Boolean.parseBoolean(lowercase) : true;
+    }
+
+    public static String getInvertedIndexParserStopwords(Map<String, String> 
properties) {
+        String stopwrods = properties == null ? null : 
properties.get(INVERTED_INDEX_PARSER_STOPWORDS_KEY);
+        // default is "" if not set
+        return stopwrods != null ? stopwrods : "";
+    }
+
     public static void checkInvertedIndexParser(String indexColName, 
PrimitiveType colType,
             Map<String, String> properties) throws AnalysisException {
         String parser = null;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
index 8dcf38de2ed..428828d74bb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
@@ -142,6 +142,8 @@ public class MatchPredicate extends Predicate {
     private String invertedIndexParser;
     private String invertedIndexParserMode;
     private Map<String, String> invertedIndexCharFilter;
+    private boolean invertedIndexParserLowercase = true;
+    private String invertedIndexParserStopwords = "";
 
     public MatchPredicate(Operator op, Expr e1, Expr e2) {
         super();
@@ -162,23 +164,22 @@ public class MatchPredicate extends Predicate {
         invertedIndexParser = other.invertedIndexParser;
         invertedIndexParserMode = other.invertedIndexParserMode;
         invertedIndexCharFilter = other.invertedIndexCharFilter;
+        invertedIndexParserLowercase = other.invertedIndexParserLowercase;
+        invertedIndexParserStopwords = other.invertedIndexParserStopwords;
     }
 
     /**
      * use for Nereids ONLY
      */
     public MatchPredicate(Operator op, Expr e1, Expr e2, Type retType,
-            NullableMode nullableMode, String invertedIndexParser, String 
invertedIndexParserMode,
-            Map<String, String> invertedIndexCharFilter) {
+            NullableMode nullableMode, Index invertedIndex) {
         this(op, e1, e2);
-        if (invertedIndexParser != null) {
-            this.invertedIndexParser = invertedIndexParser;
-        }
-        if (invertedIndexParserMode != null) {
-            this.invertedIndexParserMode = invertedIndexParserMode;
-        }
-        if (invertedIndexParserMode != null) {
-            this.invertedIndexCharFilter = invertedIndexCharFilter;
+        if (invertedIndex != null) {
+            this.invertedIndexParser = invertedIndex.getInvertedIndexParser();
+            this.invertedIndexParserMode = 
invertedIndex.getInvertedIndexParserMode();
+            this.invertedIndexCharFilter = 
invertedIndex.getInvertedIndexCharFilter();
+            this.invertedIndexParserLowercase = 
invertedIndex.getInvertedIndexParserLowercase();
+            this.invertedIndexParserStopwords = 
invertedIndex.getInvertedIndexParserStopwords();
         }
         fn = new Function(new FunctionName(op.name), 
Lists.newArrayList(e1.getType(), e2.getType()), retType,
                 false, true, nullableMode);
@@ -212,6 +213,8 @@ public class MatchPredicate extends Predicate {
         msg.setOpcode(op.getOpcode());
         msg.match_predicate = new TMatchPredicate(invertedIndexParser, 
invertedIndexParserMode);
         msg.match_predicate.setCharFilterMap(invertedIndexCharFilter);
+        msg.match_predicate.setParserLowercase(invertedIndexParserLowercase);
+        msg.match_predicate.setParserStopwords(invertedIndexParserStopwords);
     }
 
     @Override
@@ -249,6 +252,8 @@ public class MatchPredicate extends Predicate {
                             invertedIndexParser = 
index.getInvertedIndexParser();
                             invertedIndexParserMode = 
index.getInvertedIndexParserMode();
                             invertedIndexCharFilter = 
index.getInvertedIndexCharFilter();
+                            invertedIndexParserLowercase = 
index.getInvertedIndexParserLowercase();
+                            invertedIndexParserStopwords = 
index.getInvertedIndexParserStopwords();
                             break;
                         }
                     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
index 41c11d2d987..b05c1d0afd4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
@@ -154,6 +154,18 @@ public class Index implements Writable {
         return InvertedIndexUtil.getInvertedIndexCharFilter(properties);
     }
 
+    public boolean getInvertedIndexParserLowercase() {
+        return InvertedIndexUtil.getInvertedIndexParserLowercase(properties);
+    }
+
+    public String getInvertedIndexParserStopwords() {
+        return InvertedIndexUtil.getInvertedIndexParserStopwords(properties);
+    }
+
+    public boolean isLightIndexChangeSupported() {
+        return indexType == IndexDef.IndexType.INVERTED;
+    }
+
     public String getComment() {
         return comment;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
index 581ed8f3dda..3892001f332 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
@@ -31,7 +31,6 @@ import org.apache.doris.analysis.FunctionCallExpr;
 import org.apache.doris.analysis.FunctionName;
 import org.apache.doris.analysis.FunctionParams;
 import org.apache.doris.analysis.IndexDef;
-import org.apache.doris.analysis.InvertedIndexUtil;
 import org.apache.doris.analysis.IsNullPredicate;
 import org.apache.doris.analysis.MatchPredicate;
 import org.apache.doris.analysis.OrderByElement;
@@ -96,9 +95,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
 
@@ -188,9 +185,7 @@ public class ExpressionTranslator extends 
DefaultExpressionVisitor<Expr, PlanTra
 
     @Override
     public Expr visitMatch(Match match, PlanTranslatorContext context) {
-        String invertedIndexParser = 
InvertedIndexUtil.INVERTED_INDEX_PARSER_UNKNOWN;
-        String invertedIndexParserMode = 
InvertedIndexUtil.INVERTED_INDEX_PARSER_COARSE_GRANULARITY;
-        Map<String, String> invertedIndexCharFilter = new HashMap<>();
+        Index invertedIndex = null;
         SlotRef left = (SlotRef) match.left().accept(this, context);
         OlapTable olapTbl = 
Optional.ofNullable(getOlapTableFromSlotDesc(left.getDesc()))
                                     .orElse(getOlapTableDirectly(left));
@@ -205,9 +200,7 @@ public class ExpressionTranslator extends 
DefaultExpressionVisitor<Expr, PlanTra
                 if (index.getIndexType() == IndexDef.IndexType.INVERTED) {
                     List<String> columns = index.getColumns();
                     if (columns != null && !columns.isEmpty() && 
left.getColumnName().equals(columns.get(0))) {
-                        invertedIndexParser = index.getInvertedIndexParser();
-                        invertedIndexParserMode = 
index.getInvertedIndexParserMode();
-                        invertedIndexCharFilter = 
index.getInvertedIndexCharFilter();
+                        invertedIndex = index;
                         break;
                     }
                 }
@@ -220,9 +213,7 @@ public class ExpressionTranslator extends 
DefaultExpressionVisitor<Expr, PlanTra
             match.right().accept(this, context),
             match.getDataType().toCatalogDataType(),
             NullableMode.DEPEND_ON_ARGUMENT,
-            invertedIndexParser,
-            invertedIndexParserMode,
-            invertedIndexCharFilter);
+            invertedIndex);
     }
 
     @Override
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index e102babca21..925a96d12b8 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -143,6 +143,8 @@ struct TMatchPredicate {
   1: required string parser_type;
   2: required string parser_mode;
   3: optional map<string, string> char_filter_map;
+  4: optional bool parser_lowercase = true;
+  5: optional string parser_stopwords = "";
 }
 
 struct TLiteralPredicate {
diff --git 
a/regression-test/suites/inverted_index_p0/test_match_without_index.groovy 
b/regression-test/suites/inverted_index_p0/test_match_without_index.groovy
new file mode 100644
index 00000000000..94cee179ed2
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_match_without_index.groovy
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_match_without_index", "p0") {
+
+    def testTable = "test_match_without_index"
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    sql """
+        CREATE TABLE ${testTable} (
+          `@timestamp` int(11) NULL COMMENT "",
+          `clientip` string NULL COMMENT "",
+          `request` string NULL COMMENT "",
+          `status` string NULL COMMENT "",
+          `size` int NULL COMMENT "",
+           INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
+            INDEX request_idx (`request`) USING INVERTED 
PROPERTIES("parser"="unicode", "lower_case" = "false") COMMENT '',
+            INDEX status_idx (`status`) USING INVERTED COMMENT '',
+            INDEX size_idx (`size`) USING INVERTED COMMENT ''
+          ) ENGINE=OLAP
+          DUPLICATE KEY(`@timestamp`)
+          COMMENT "OLAP"
+          DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
+          PROPERTIES (
+          "replication_allocation" = "tag.location.default: 1"
+        );
+      """
+
+    sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'HTTP GET', 
'200', 20); """
+    sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'Life is like a 
box of chocolates, you never know what you are going to get.', '200', 20); """
+    // sql """ """
+
+    List<Object> match_res_without_index = new ArrayList<>();
+    List<Object> match_res_with_index =new ArrayList<>();
+    def create_sql = {
+        List<String> list = new ArrayList<>()
+        list.add(" select count() from ${testTable} where clientip 
match_phrase '17' ");
+        list.add(" select count() from ${testTable} where clientip match_all 
'17' ");
+        list.add(" select count() from ${testTable} where clientip match_any 
'17' ");
+        list.add(" select count() from ${testTable} where request match_any 
'get' ");
+        list.add(" select count() from ${testTable} where request 
match_phrase_prefix 'like box' ");
+        return list;
+    }
+
+    def execute_sql = { resultList, sqlList ->
+        for (sqlStr in sqlList) {
+            def sqlResult = sql """ ${sqlStr} """
+            resultList.add(sqlResult)
+        }
+    }
+    
+    def compare_result = { executedSql ->
+        assertEquals(match_res_without_index.size(), 
match_res_with_index.size())
+        for (int i = 0; i < match_res_without_index.size(); i++) {
+            if (match_res_without_index[i] != match_res_with_index[i]) {
+                logger.info("sql is {}", executedSql[i])
+                logger.info("match_res_without_index is {}", 
match_res_without_index[i])
+                logger.info("match_res_with_index is {}", 
match_res_with_index[i])
+                assertTrue(false)
+            }
+        }
+    }
+
+    def index_sql = create_sql.call()
+    try {
+        
GetDebugPoint().enableDebugPointForAllBEs("return_inverted_index_bypass")
+        execute_sql.call(match_res_without_index, index_sql)
+    
+    } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("return_inverted_index_bypass")
+        execute_sql.call(match_res_with_index, index_sql)
+        compare_result.call(index_sql)
+    }
+    
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to