This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 300dc60c864 [feature](search) add exact dsl for search function 
(#56710)
300dc60c864 is described below

commit 300dc60c864dc7eb6d91f08263d8f2fbefb622d4
Author: Jack <[email protected]>
AuthorDate: Mon Oct 6 15:33:28 2025 +0800

    [feature](search) add exact dsl for search function (#56710)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #56139
    
    Problem Summary:
    This PR adds EXACT DSL functionality to the search function, enabling
    exact string matching without tokenization. This feature complements
    existing ANY/ALL operators that work with tokenized indexes by providing
    strict string equality matching.
---
 be/src/vec/functions/function_search.cpp           |  17 ++-
 .../org/apache/doris/nereids/search/SearchLexer.g4 |   7 +-
 .../apache/doris/nereids/search/SearchParser.g4    |   4 +-
 .../functions/scalar/SearchDslParser.java          |  11 ++
 .../data/search/test_search_exact_basic.out        |   9 ++
 .../data/search/test_search_exact_lowercase.out    |  32 ++++++
 .../data/search/test_search_exact_match.out        |  71 +++++++++++++
 .../data/search/test_search_exact_multi_index.out  |  28 +++++
 .../suites/search/test_search_exact_basic.groovy   |  55 ++++++++++
 .../search/test_search_exact_lowercase.groovy      |  69 +++++++++++++
 .../suites/search/test_search_exact_match.groovy   | 115 +++++++++++++++++++++
 .../search/test_search_exact_multi_index.groovy    |  70 +++++++++++++
 12 files changed, 483 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/functions/function_search.cpp 
b/be/src/vec/functions/function_search.cpp
index 780488db7c0..72cb12d886a 100644
--- a/be/src/vec/functions/function_search.cpp
+++ b/be/src/vec/functions/function_search.cpp
@@ -279,7 +279,8 @@ FunctionSearch::ClauseTypeCategory 
FunctionSearch::get_clause_type_category(
     if (clause_type == "AND" || clause_type == "OR" || clause_type == "NOT") {
         return ClauseTypeCategory::COMPOUND;
     } else if (clause_type == "TERM" || clause_type == "PREFIX" || clause_type 
== "WILDCARD" ||
-               clause_type == "REGEXP" || clause_type == "RANGE" || 
clause_type == "LIST") {
+               clause_type == "REGEXP" || clause_type == "RANGE" || 
clause_type == "LIST" ||
+               clause_type == "EXACT") {
         // Non-tokenized queries: exact matching, pattern matching, range, 
list operations
         return ClauseTypeCategory::NON_TOKENIZED;
     } else if (clause_type == "PHRASE" || clause_type == "MATCH" || 
clause_type == "ANY" ||
@@ -349,6 +350,9 @@ InvertedIndexQueryType 
FunctionSearch::clause_type_to_query_type(
             {"MATCH", InvertedIndexQueryType::MATCH_ANY_QUERY},
             {"ANY", InvertedIndexQueryType::MATCH_ANY_QUERY},
             {"ALL", InvertedIndexQueryType::MATCH_ALL_QUERY},
+
+            // Exact match without tokenization
+            {"EXACT", InvertedIndexQueryType::EQUAL_QUERY},
     };
 
     auto it = clause_type_map.find(clause_type);
@@ -532,6 +536,17 @@ Status FunctionSearch::build_leaf_query(const 
FunctionSearch& function, const TS
     }
 
     if (category == FunctionSearch::ClauseTypeCategory::NON_TOKENIZED) {
+        if (clause_type == "EXACT") {
+            // EXACT match: exact string matching without tokenization
+            // Note: EXACT prefers untokenized index (STRING_TYPE) which 
doesn't support lowercase
+            // If only tokenized index exists, EXACT may return empty results 
because
+            // tokenized indexes store individual tokens, not complete strings
+            *out = make_term_query(value_wstr);
+            VLOG_DEBUG << "search: EXACT clause processed, field=" << 
field_name << ", value='"
+                       << value << "'";
+            return Status::OK();
+        }
+
         if (clause_type == "PREFIX" || clause_type == "WILDCARD" || 
clause_type == "REGEXP" ||
             clause_type == "RANGE" || clause_type == "LIST") {
             VLOG_DEBUG << "search: clause type '" << clause_type
diff --git 
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
index 0f4b1b62eb9..812aaf4a7ef 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
@@ -58,9 +58,10 @@ REGEXP   : '/' (~[/] | '\\/')* '/' ;
 LBRACKET : '[' -> pushMode(RANGE_MODE) ;
 LBRACE   : '{' -> pushMode(RANGE_MODE) ;
 
-IN_LPAREN  : [Ii][Nn] '(' -> pushMode(LIST_MODE) ;
-ANY_LPAREN : [Aa][Nn][Yy] '(' -> pushMode(STRING_MODE) ;
-ALL_LPAREN : [Aa][Ll][Ll] '(' -> pushMode(STRING_MODE) ;
+IN_LPAREN    : [Ii][Nn] '(' -> pushMode(LIST_MODE) ;
+ANY_LPAREN   : [Aa][Nn][Yy] '(' -> pushMode(STRING_MODE) ;
+ALL_LPAREN   : [Aa][Ll][Ll] '(' -> pushMode(STRING_MODE) ;
+EXACT_LPAREN : [Ee][Xx][Aa][Cc][Tt] '(' -> pushMode(STRING_MODE) ;
 
 WS : [ \t\r\n\u3000]+ -> skip ;
 
diff --git 
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
index 0b4d4f66bbb..a2a357450bf 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
@@ -37,6 +37,7 @@ searchValue
     | rangeValue
     | listValue
     | anyAllValue
+    | exactValue
     ;
 
 rangeValue
@@ -47,4 +48,5 @@ rangeValue
 rangeEndpoint : RANGE_NUMBER | RANGE_STAR ;
 
 listValue   : IN_LPAREN LIST_TERM* LIST_RPAREN ;
-anyAllValue : (ANY_LPAREN | ALL_LPAREN) STRING_CONTENT? STRING_RPAREN ;
\ No newline at end of file
+anyAllValue : (ANY_LPAREN | ALL_LPAREN) STRING_CONTENT? STRING_RPAREN ;
+exactValue  : EXACT_LPAREN STRING_CONTENT? STRING_RPAREN ;
\ No newline at end of file
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 21c1e32e405..14ec3e03f96 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -125,6 +125,7 @@ public class SearchDslParser {
         LIST,       // field:IN(value1 value2)
         ANY,        // field:ANY(value) - any match
         ALL,        // field:ALL(value) - all match
+        EXACT,      // field:EXACT(value) - exact match without tokenization
         AND,        // clause1 AND clause2
         OR,         // clause1 OR clause2
         NOT         // NOT clause
@@ -298,6 +299,10 @@ public class SearchDslParser {
                 return createAnyAllNode(fieldName, 
ctx.anyAllValue().getText());
             }
 
+            if (ctx.exactValue() != null) {
+                return createExactNode(fieldName, ctx.exactValue().getText());
+            }
+
             // Fallback for unknown types
             return createTermNode(fieldName, ctx.getText());
         }
@@ -372,6 +377,12 @@ public class SearchDslParser {
             return new QsNode(QsClauseType.ANY, fieldName, innerContent);
         }
 
+        private QsNode createExactNode(String fieldName, String exactText) {
+            // Extract content between parentheses
+            String innerContent = extractParenthesesContent(exactText);
+            return new QsNode(QsClauseType.EXACT, fieldName, innerContent);
+        }
+
         private String extractParenthesesContent(String text) {
             int openParen = text.indexOf('(');
             int closeParen = text.lastIndexOf(')');
diff --git a/regression-test/data/search/test_search_exact_basic.out 
b/regression-test/data/search/test_search_exact_basic.out
new file mode 100644
index 00000000000..2983c4c3ec8
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_basic.out
@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !exact_whole --
+2      apple banana
+
+-- !exact_partial --
+1      apple
+
+-- !exact_case --
+4      Apple
diff --git a/regression-test/data/search/test_search_exact_lowercase.out 
b/regression-test/data/search/test_search_exact_lowercase.out
new file mode 100644
index 00000000000..5e7ebaacb64
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_lowercase.out
@@ -0,0 +1,32 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !normal_exact_lower --
+2      hello world
+
+-- !normal_exact_upper --
+3      HELLO WORLD
+
+-- !normal_exact_mixed --
+1      Hello World
+
+-- !mixed_exact_lower --
+2      hello world
+
+-- !mixed_exact_upper --
+3      HELLO WORLD
+
+-- !mixed_exact_mixed --
+1      Hello World
+
+-- !mixed_any_lowercase --
+1      Hello World
+2      hello world
+3      HELLO WORLD
+4      HeLLo WoRLd
+
+-- !exact_case_sensitive --
+5      Test Case
+
+-- !any_case_insensitive --
+5
+6
+
diff --git a/regression-test/data/search/test_search_exact_match.out 
b/regression-test/data/search/test_search_exact_match.out
new file mode 100644
index 00000000000..37218de9a1e
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_match.out
@@ -0,0 +1,71 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !exact_basic --
+1      Hello World
+
+-- !exact_no_partial --
+4      Hello
+
+-- !exact_case_sensitive --
+2      hello world
+
+-- !exact_different_case --
+3      HELLO WORLD
+
+-- !exact_spaces --
+6      a b c
+
+-- !exact_vs_any_exact --
+
+-- !exact_vs_any_any --
+1      This is a test document
+2      Another test document
+3      Third test document
+
+-- !exact_mixed_index --
+7      machine learning
+
+-- !exact_special_chars --
+9      Special!@#Chars
+
+-- !exact_and --
+1      Hello World
+
+-- !exact_or --
+1      Hello World
+2      hello world
+
+-- !exact_not --
+1      Hello World
+2      hello world
+4      Hello
+5      World
+6      a b c
+7      machine learning
+8      deep learning
+9      Special!@#Chars
+
+-- !exact_null --
+
+-- !exact_multiple --
+1      Hello World     test keyword
+
+-- !term_query --
+4      Hello
+
+-- !exact_query --
+4      Hello
+
+-- !exact_and_all --
+1      Hello World     This is a test document
+
+-- !exact_case_hello --
+4      hello
+
+-- !exact_case_HELLO --
+
+-- !exact_mixed_exact_match --
+1      tokenized value
+
+-- !any_mixed_token_match --
+1      tokenized value
+
diff --git a/regression-test/data/search/test_search_exact_multi_index.out 
b/regression-test/data/search/test_search_exact_multi_index.out
new file mode 100644
index 00000000000..fadcfd5d98c
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_multi_index.out
@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !exact_full_match --
+1      machine learning
+
+-- !exact_no_partial --
+3      machine
+
+-- !any_token_match --
+1      machine learning
+2      deep learning
+3      machine
+4      learning
+
+-- !all_token_match --
+1      machine learning
+
+-- !exact_strict --
+2      deep learning
+
+-- !any_loose --
+1      machine learning
+2      deep learning
+4      learning
+
+-- !mixed_exact_any --
+1      machine learning
+5      artificial intelligence
+
diff --git a/regression-test/suites/search/test_search_exact_basic.groovy 
b/regression-test/suites/search/test_search_exact_basic.groovy
new file mode 100644
index 00000000000..cf5701ac98f
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_basic.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_basic") {
+    def tableName = "exact_basic_test"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    // Simple table with basic index
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            name VARCHAR(200),
+            INDEX idx_name (name) USING INVERTED
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+    """
+
+    // Insert simple test data
+    sql """INSERT INTO ${tableName} VALUES
+        (1, 'apple'),
+        (2, 'apple banana'),
+        (3, 'banana'),
+        (4, 'Apple'),
+        (5, 'APPLE'),
+        (6, 'apple banana cherry')
+    """
+
+    Thread.sleep(3000)
+
+    // Test 1: EXACT should match the whole value
+    qt_exact_whole "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
name FROM ${tableName} WHERE search('name:EXACT(apple banana)') ORDER BY id"
+
+    // Test 2: EXACT should not match partial
+    qt_exact_partial "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, name FROM ${tableName} WHERE search('name:EXACT(apple)') ORDER BY id"
+
+    // Test 3: EXACT is case sensitive (without lowercase config)
+    qt_exact_case "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
name FROM ${tableName} WHERE search('name:EXACT(Apple)') ORDER BY id"
+}
diff --git a/regression-test/suites/search/test_search_exact_lowercase.groovy 
b/regression-test/suites/search/test_search_exact_lowercase.groovy
new file mode 100644
index 00000000000..9d1b3756cbc
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_lowercase.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_lowercase") {
+    def tableName = "exact_lowercase_test"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    // EXACT on mixed indexes: prefers untokenized, but untokenized index 
doesn't support lowercase
+    // So EXACT is always case-sensitive regardless of lowercase config
+    // This test verifies that EXACT behavior is consistent
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            text_normal VARCHAR(200),
+            text_mixed VARCHAR(200),
+            INDEX idx_normal (text_normal) USING INVERTED,
+            INDEX idx_mixed_tokenized (text_mixed) USING INVERTED 
PROPERTIES("parser" = "unicode", "lower_case" = "true"),
+            INDEX idx_mixed_untokenized (text_mixed) USING INVERTED
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+    """
+
+    // Insert test data with various cases
+    sql """INSERT INTO ${tableName} VALUES
+        (1, 'Hello World', 'Hello World'),
+        (2, 'hello world', 'hello world'),
+        (3, 'HELLO WORLD', 'HELLO WORLD'),
+        (4, 'HeLLo WoRLd', 'HeLLo WoRLd'),
+        (5, 'Test Case', 'Test Case'),
+        (6, 'TEST CASE', 'TEST CASE')
+    """
+
+    Thread.sleep(3000)
+
+    // Test 1: EXACT on normal field (untokenized) - case sensitive
+    qt_normal_exact_lower "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, text_normal FROM ${tableName} WHERE search('text_normal:EXACT(hello 
world)') ORDER BY id"
+    qt_normal_exact_upper "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, text_normal FROM ${tableName} WHERE search('text_normal:EXACT(HELLO 
WORLD)') ORDER BY id"
+    qt_normal_exact_mixed "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, text_normal FROM ${tableName} WHERE search('text_normal:EXACT(Hello 
World)') ORDER BY id"
+
+    // Test 2: EXACT on mixed index field
+    // EXACT prefers untokenized index, so it's case sensitive (lowercase 
config is ignored)
+    qt_mixed_exact_lower "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, text_mixed FROM ${tableName} WHERE search('text_mixed:EXACT(hello 
world)') ORDER BY id"
+    qt_mixed_exact_upper "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, text_mixed FROM ${tableName} WHERE search('text_mixed:EXACT(HELLO 
WORLD)') ORDER BY id"
+    qt_mixed_exact_mixed "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, text_mixed FROM ${tableName} WHERE search('text_mixed:EXACT(Hello 
World)') ORDER BY id"
+
+    // Test 3: Verify that ANY on mixed index uses tokenized index with 
lowercase
+    qt_mixed_any_lowercase "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, text_mixed FROM 
${tableName} WHERE search('text_mixed:ANY(hello world)') ORDER BY id"
+
+    // Test 4: Compare EXACT (case-sensitive) vs ANY (with lowercase)
+    qt_exact_case_sensitive "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM ${tableName} WHERE 
search('text_mixed:EXACT(Test Case)') ORDER BY id"
+    qt_any_case_insensitive "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM ${tableName} WHERE 
search('text_mixed:ANY(test case)') ORDER BY id"
+}
diff --git a/regression-test/suites/search/test_search_exact_match.groovy 
b/regression-test/suites/search/test_search_exact_match.groovy
new file mode 100644
index 00000000000..307963b19b1
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_match.groovy
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_match") {
+    def tableName = "search_exact_test_table"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    // Create test table with different index configurations
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            title VARCHAR(255),
+            content TEXT,
+            keyword VARCHAR(200),
+            mixed_index VARCHAR(200),
+            INDEX idx_title (title) USING INVERTED,
+            INDEX idx_content (content) USING INVERTED PROPERTIES("parser" = 
"english"),
+            INDEX idx_keyword (keyword) USING INVERTED,
+            INDEX idx_mixed_tokenized (mixed_index) USING INVERTED 
PROPERTIES("parser" = "standard"),
+            INDEX idx_mixed_untokenized (mixed_index) USING INVERTED
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 3
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    // Insert test data for EXACT matching
+    sql """INSERT INTO ${tableName} VALUES
+        (1, 'Hello World', 'This is a test document', 'test keyword', 
'tokenized value'),
+        (2, 'hello world', 'Another test document', 'hello world', 'another 
value'),
+        (3, 'HELLO WORLD', 'Third test document', 'HELLO WORLD', 'UPPER 
VALUE'),
+        (4, 'Hello', 'Just hello', 'hello', 'hello'),
+        (5, 'World', 'Just world', 'world', 'world'),
+        (6, 'a b c', 'Space separated', 'a b c', 'a b c'),
+        (7, 'machine learning', 'About ML', 'ML', 'machine learning'),
+        (8, 'deep learning', 'About DL', 'DL', 'deep learning'),
+        (9, 'Special!@#Chars', 'Special characters test', 'special!@#chars', 
'special chars')
+    """
+
+    // Wait for index building
+    Thread.sleep(5000)
+
+    // Test 1: EXACT match without tokenization - exact case matching
+    qt_exact_basic "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('title:EXACT(Hello World)') ORDER BY id"
+
+    // Test 2: EXACT match should not find partial matches (unlike ANY)
+    qt_exact_no_partial "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, title FROM ${tableName} WHERE search('title:EXACT(Hello)') ORDER BY id"
+
+    // Test 3: EXACT match on field without tokenization - case sensitive
+    qt_exact_case_sensitive "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, keyword FROM ${tableName} 
WHERE search('keyword:EXACT(hello world)') ORDER BY id"
+
+    // Test 4: EXACT match with different case
+    qt_exact_different_case "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, keyword FROM ${tableName} 
WHERE search('keyword:EXACT(HELLO WORLD)') ORDER BY id"
+
+    // Test 5: EXACT match with spaces
+    qt_exact_spaces "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, keyword FROM ${tableName} WHERE search('keyword:EXACT(a b c)') ORDER BY id"
+
+    // Test 6: Compare EXACT vs ANY on tokenized field
+    qt_exact_vs_any_exact "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, content FROM ${tableName} WHERE search('content:EXACT(test document)') 
ORDER BY id"
+    qt_exact_vs_any_any "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, content FROM ${tableName} WHERE search('content:ALL(test document)') 
ORDER BY id"
+
+    // Test 7: EXACT on field with mixed index (both tokenized and untokenized)
+    // EXACT should use the untokenized index
+    qt_exact_mixed_index "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, mixed_index FROM ${tableName} WHERE search('mixed_index:EXACT(machine 
learning)') ORDER BY id"
+
+    // Test 8: EXACT with special characters
+    qt_exact_special_chars "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} 
WHERE search('title:EXACT(Special!@#Chars)') ORDER BY id"
+
+    // Test 9: EXACT in boolean queries
+    qt_exact_and "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('title:EXACT(Hello World) AND 
content:test') ORDER BY id"
+    qt_exact_or "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('title:EXACT(Hello World) OR 
title:EXACT(hello world)') ORDER BY id"
+    qt_exact_not "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('NOT title:EXACT(HELLO WORLD)') ORDER BY 
id LIMIT 10"
+
+    // Test 10: EXACT with NULL values
+    sql """INSERT INTO ${tableName} VALUES (10, NULL, 'null title test', 
'null', 'null')"""
+    Thread.sleep(2000)
+    qt_exact_null "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('title:EXACT(NULL)') ORDER BY id"
+
+    // Test 11: Multiple EXACT conditions
+    qt_exact_multiple "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, title, keyword FROM ${tableName} WHERE search('title:EXACT(Hello World) AND 
keyword:EXACT(test keyword)') ORDER BY id"
+
+    // Test 12: EXACT vs regular term query comparison
+    qt_term_query "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('title:Hello') ORDER BY id"
+    qt_exact_query "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('title:EXACT(Hello)') ORDER BY id"
+
+    // Test 13: EXACT with ALL - combining different operators
+    qt_exact_and_all "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, title, content FROM ${tableName} WHERE search('title:EXACT(Hello World) AND 
content:ALL(test document)') ORDER BY id"
+
+    // Test 14: Case sensitivity without lowercase config
+    qt_exact_case_hello "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, keyword FROM ${tableName} WHERE search('keyword:EXACT(hello)') ORDER BY 
id"
+    qt_exact_case_HELLO "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, keyword FROM ${tableName} WHERE search('keyword:EXACT(HELLO)') ORDER BY 
id"
+
+    // Test 15: EXACT on mixed_index should prefer untokenized index
+    // So it should match exact string, not tokens
+    qt_exact_mixed_exact_match "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, mixed_index FROM 
${tableName} WHERE search('mixed_index:EXACT(tokenized value)') ORDER BY id"
+    qt_any_mixed_token_match "SELECT 
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, mixed_index FROM 
${tableName} WHERE search('mixed_index:ALL(tokenized value)') ORDER BY id"
+
+}
diff --git a/regression-test/suites/search/test_search_exact_multi_index.groovy 
b/regression-test/suites/search/test_search_exact_multi_index.groovy
new file mode 100644
index 00000000000..ab361dc45c0
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_multi_index.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_multi_index") {
+    def tableName = "exact_multi_index_test"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    // Table with multiple indexes on the same column
+    // This tests that EXACT uses the untokenized index (STRING_TYPE)
+    // while ANY/ALL use the tokenized index (FULLTEXT)
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            content VARCHAR(200),
+            INDEX idx_untokenized (content) USING INVERTED,
+            INDEX idx_tokenized (content) USING INVERTED PROPERTIES("parser" = 
"standard")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+    """
+
+    // Insert test data
+    sql """INSERT INTO ${tableName} VALUES
+        (1, 'machine learning'),
+        (2, 'deep learning'),
+        (3, 'machine'),
+        (4, 'learning'),
+        (5, 'artificial intelligence'),
+        (6, 'natural language processing')
+    """
+
+    Thread.sleep(3000)
+
+    // Test 1: EXACT should use untokenized index - exact match only
+    qt_exact_full_match "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, content FROM ${tableName} WHERE search('content:EXACT(machine 
learning)') ORDER BY id"
+
+    // Test 2: EXACT should not match partial tokens
+    qt_exact_no_partial "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) 
*/ id, content FROM ${tableName} WHERE search('content:EXACT(machine)') ORDER 
BY id"
+
+    // Test 3: ANY should use tokenized index - matches any token
+    qt_any_token_match "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, content FROM ${tableName} WHERE search('content:ANY(machine learning)') 
ORDER BY id"
+
+    // Test 4: ALL should use tokenized index - matches all tokens
+    qt_all_token_match "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, content FROM ${tableName} WHERE search('content:ALL(machine learning)') 
ORDER BY id"
+
+    // Test 5: Verify EXACT vs ANY behavior difference
+    // EXACT: only exact string match
+    // ANY: any token matches
+    qt_exact_strict "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, content FROM ${tableName} WHERE search('content:EXACT(deep learning)') 
ORDER BY id"
+    qt_any_loose "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
content FROM ${tableName} WHERE search('content:ANY(deep learning)') ORDER BY 
id"
+
+    // Test 6: Multiple conditions with EXACT and ANY
+    qt_mixed_exact_any "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ 
id, content FROM ${tableName} WHERE search('content:EXACT(machine learning) OR 
content:ANY(intelligence)') ORDER BY id"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to