This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 300dc60c864 [feature](search) add exact dsl for search function
(#56710)
300dc60c864 is described below
commit 300dc60c864dc7eb6d91f08263d8f2fbefb622d4
Author: Jack <[email protected]>
AuthorDate: Mon Oct 6 15:33:28 2025 +0800
[feature](search) add exact dsl for search function (#56710)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #56139
Problem Summary:
This PR adds EXACT DSL functionality to the search function, enabling
exact string matching without tokenization. This feature complements
existing ANY/ALL operators that work with tokenized indexes by providing
strict string equality matching.
---
be/src/vec/functions/function_search.cpp | 17 ++-
.../org/apache/doris/nereids/search/SearchLexer.g4 | 7 +-
.../apache/doris/nereids/search/SearchParser.g4 | 4 +-
.../functions/scalar/SearchDslParser.java | 11 ++
.../data/search/test_search_exact_basic.out | 9 ++
.../data/search/test_search_exact_lowercase.out | 32 ++++++
.../data/search/test_search_exact_match.out | 71 +++++++++++++
.../data/search/test_search_exact_multi_index.out | 28 +++++
.../suites/search/test_search_exact_basic.groovy | 55 ++++++++++
.../search/test_search_exact_lowercase.groovy | 69 +++++++++++++
.../suites/search/test_search_exact_match.groovy | 115 +++++++++++++++++++++
.../search/test_search_exact_multi_index.groovy | 70 +++++++++++++
12 files changed, 483 insertions(+), 5 deletions(-)
diff --git a/be/src/vec/functions/function_search.cpp
b/be/src/vec/functions/function_search.cpp
index 780488db7c0..72cb12d886a 100644
--- a/be/src/vec/functions/function_search.cpp
+++ b/be/src/vec/functions/function_search.cpp
@@ -279,7 +279,8 @@ FunctionSearch::ClauseTypeCategory
FunctionSearch::get_clause_type_category(
if (clause_type == "AND" || clause_type == "OR" || clause_type == "NOT") {
return ClauseTypeCategory::COMPOUND;
} else if (clause_type == "TERM" || clause_type == "PREFIX" || clause_type
== "WILDCARD" ||
- clause_type == "REGEXP" || clause_type == "RANGE" ||
clause_type == "LIST") {
+ clause_type == "REGEXP" || clause_type == "RANGE" ||
clause_type == "LIST" ||
+ clause_type == "EXACT") {
// Non-tokenized queries: exact matching, pattern matching, range,
list operations
return ClauseTypeCategory::NON_TOKENIZED;
} else if (clause_type == "PHRASE" || clause_type == "MATCH" ||
clause_type == "ANY" ||
@@ -349,6 +350,9 @@ InvertedIndexQueryType
FunctionSearch::clause_type_to_query_type(
{"MATCH", InvertedIndexQueryType::MATCH_ANY_QUERY},
{"ANY", InvertedIndexQueryType::MATCH_ANY_QUERY},
{"ALL", InvertedIndexQueryType::MATCH_ALL_QUERY},
+
+ // Exact match without tokenization
+ {"EXACT", InvertedIndexQueryType::EQUAL_QUERY},
};
auto it = clause_type_map.find(clause_type);
@@ -532,6 +536,17 @@ Status FunctionSearch::build_leaf_query(const
FunctionSearch& function, const TS
}
if (category == FunctionSearch::ClauseTypeCategory::NON_TOKENIZED) {
+ if (clause_type == "EXACT") {
+ // EXACT match: exact string matching without tokenization
+ // Note: EXACT prefers untokenized index (STRING_TYPE) which
doesn't support lowercase
+ // If only tokenized index exists, EXACT may return empty results
because
+ // tokenized indexes store individual tokens, not complete strings
+ *out = make_term_query(value_wstr);
+ VLOG_DEBUG << "search: EXACT clause processed, field=" <<
field_name << ", value='"
+ << value << "'";
+ return Status::OK();
+ }
+
if (clause_type == "PREFIX" || clause_type == "WILDCARD" ||
clause_type == "REGEXP" ||
clause_type == "RANGE" || clause_type == "LIST") {
VLOG_DEBUG << "search: clause type '" << clause_type
diff --git
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
index 0f4b1b62eb9..812aaf4a7ef 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
@@ -58,9 +58,10 @@ REGEXP : '/' (~[/] | '\\/')* '/' ;
LBRACKET : '[' -> pushMode(RANGE_MODE) ;
LBRACE : '{' -> pushMode(RANGE_MODE) ;
-IN_LPAREN : [Ii][Nn] '(' -> pushMode(LIST_MODE) ;
-ANY_LPAREN : [Aa][Nn][Yy] '(' -> pushMode(STRING_MODE) ;
-ALL_LPAREN : [Aa][Ll][Ll] '(' -> pushMode(STRING_MODE) ;
+IN_LPAREN : [Ii][Nn] '(' -> pushMode(LIST_MODE) ;
+ANY_LPAREN : [Aa][Nn][Yy] '(' -> pushMode(STRING_MODE) ;
+ALL_LPAREN : [Aa][Ll][Ll] '(' -> pushMode(STRING_MODE) ;
+EXACT_LPAREN : [Ee][Xx][Aa][Cc][Tt] '(' -> pushMode(STRING_MODE) ;
WS : [ \t\r\n\u3000]+ -> skip ;
diff --git
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
index 0b4d4f66bbb..a2a357450bf 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
@@ -37,6 +37,7 @@ searchValue
| rangeValue
| listValue
| anyAllValue
+ | exactValue
;
rangeValue
@@ -47,4 +48,5 @@ rangeValue
rangeEndpoint : RANGE_NUMBER | RANGE_STAR ;
listValue : IN_LPAREN LIST_TERM* LIST_RPAREN ;
-anyAllValue : (ANY_LPAREN | ALL_LPAREN) STRING_CONTENT? STRING_RPAREN ;
\ No newline at end of file
+anyAllValue : (ANY_LPAREN | ALL_LPAREN) STRING_CONTENT? STRING_RPAREN ;
+exactValue : EXACT_LPAREN STRING_CONTENT? STRING_RPAREN ;
\ No newline at end of file
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 21c1e32e405..14ec3e03f96 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -125,6 +125,7 @@ public class SearchDslParser {
LIST, // field:IN(value1 value2)
ANY, // field:ANY(value) - any match
ALL, // field:ALL(value) - all match
+ EXACT, // field:EXACT(value) - exact match without tokenization
AND, // clause1 AND clause2
OR, // clause1 OR clause2
NOT // NOT clause
@@ -298,6 +299,10 @@ public class SearchDslParser {
return createAnyAllNode(fieldName,
ctx.anyAllValue().getText());
}
+ if (ctx.exactValue() != null) {
+ return createExactNode(fieldName, ctx.exactValue().getText());
+ }
+
// Fallback for unknown types
return createTermNode(fieldName, ctx.getText());
}
@@ -372,6 +377,12 @@ public class SearchDslParser {
return new QsNode(QsClauseType.ANY, fieldName, innerContent);
}
+ private QsNode createExactNode(String fieldName, String exactText) {
+ // Extract content between parentheses
+ String innerContent = extractParenthesesContent(exactText);
+ return new QsNode(QsClauseType.EXACT, fieldName, innerContent);
+ }
+
private String extractParenthesesContent(String text) {
int openParen = text.indexOf('(');
int closeParen = text.lastIndexOf(')');
diff --git a/regression-test/data/search/test_search_exact_basic.out
b/regression-test/data/search/test_search_exact_basic.out
new file mode 100644
index 00000000000..2983c4c3ec8
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_basic.out
@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !exact_whole --
+2 apple banana
+
+-- !exact_partial --
+1 apple
+
+-- !exact_case --
+4 Apple
diff --git a/regression-test/data/search/test_search_exact_lowercase.out
b/regression-test/data/search/test_search_exact_lowercase.out
new file mode 100644
index 00000000000..5e7ebaacb64
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_lowercase.out
@@ -0,0 +1,32 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !normal_exact_lower --
+2 hello world
+
+-- !normal_exact_upper --
+3 HELLO WORLD
+
+-- !normal_exact_mixed --
+1 Hello World
+
+-- !mixed_exact_lower --
+2 hello world
+
+-- !mixed_exact_upper --
+3 HELLO WORLD
+
+-- !mixed_exact_mixed --
+1 Hello World
+
+-- !mixed_any_lowercase --
+1 Hello World
+2 hello world
+3 HELLO WORLD
+4 HeLLo WoRLd
+
+-- !exact_case_sensitive --
+5 Test Case
+
+-- !any_case_insensitive --
+5
+6
+
diff --git a/regression-test/data/search/test_search_exact_match.out
b/regression-test/data/search/test_search_exact_match.out
new file mode 100644
index 00000000000..37218de9a1e
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_match.out
@@ -0,0 +1,71 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !exact_basic --
+1 Hello World
+
+-- !exact_no_partial --
+4 Hello
+
+-- !exact_case_sensitive --
+2 hello world
+
+-- !exact_different_case --
+3 HELLO WORLD
+
+-- !exact_spaces --
+6 a b c
+
+-- !exact_vs_any_exact --
+
+-- !exact_vs_any_any --
+1 This is a test document
+2 Another test document
+3 Third test document
+
+-- !exact_mixed_index --
+7 machine learning
+
+-- !exact_special_chars --
+9 Special!@#Chars
+
+-- !exact_and --
+1 Hello World
+
+-- !exact_or --
+1 Hello World
+2 hello world
+
+-- !exact_not --
+1 Hello World
+2 hello world
+4 Hello
+5 World
+6 a b c
+7 machine learning
+8 deep learning
+9 Special!@#Chars
+
+-- !exact_null --
+
+-- !exact_multiple --
+1 Hello World test keyword
+
+-- !term_query --
+4 Hello
+
+-- !exact_query --
+4 Hello
+
+-- !exact_and_all --
+1 Hello World This is a test document
+
+-- !exact_case_hello --
+4 hello
+
+-- !exact_case_HELLO --
+
+-- !exact_mixed_exact_match --
+1 tokenized value
+
+-- !any_mixed_token_match --
+1 tokenized value
+
diff --git a/regression-test/data/search/test_search_exact_multi_index.out
b/regression-test/data/search/test_search_exact_multi_index.out
new file mode 100644
index 00000000000..fadcfd5d98c
--- /dev/null
+++ b/regression-test/data/search/test_search_exact_multi_index.out
@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !exact_full_match --
+1 machine learning
+
+-- !exact_no_partial --
+3 machine
+
+-- !any_token_match --
+1 machine learning
+2 deep learning
+3 machine
+4 learning
+
+-- !all_token_match --
+1 machine learning
+
+-- !exact_strict --
+2 deep learning
+
+-- !any_loose --
+1 machine learning
+2 deep learning
+4 learning
+
+-- !mixed_exact_any --
+1 machine learning
+5 artificial intelligence
+
diff --git a/regression-test/suites/search/test_search_exact_basic.groovy
b/regression-test/suites/search/test_search_exact_basic.groovy
new file mode 100644
index 00000000000..cf5701ac98f
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_basic.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_basic") {
+ def tableName = "exact_basic_test"
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+
+ // Simple table with basic index
+ sql """
+ CREATE TABLE ${tableName} (
+ id INT,
+ name VARCHAR(200),
+ INDEX idx_name (name) USING INVERTED
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+ """
+
+ // Insert simple test data
+ sql """INSERT INTO ${tableName} VALUES
+ (1, 'apple'),
+ (2, 'apple banana'),
+ (3, 'banana'),
+ (4, 'Apple'),
+ (5, 'APPLE'),
+ (6, 'apple banana cherry')
+ """
+
+ Thread.sleep(3000)
+
+ // Test 1: EXACT should match the whole value
+ qt_exact_whole "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
name FROM ${tableName} WHERE search('name:EXACT(apple banana)') ORDER BY id"
+
+ // Test 2: EXACT should not match partial
+ qt_exact_partial "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, name FROM ${tableName} WHERE search('name:EXACT(apple)') ORDER BY id"
+
+ // Test 3: EXACT is case sensitive (without lowercase config)
+ qt_exact_case "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
name FROM ${tableName} WHERE search('name:EXACT(Apple)') ORDER BY id"
+}
diff --git a/regression-test/suites/search/test_search_exact_lowercase.groovy
b/regression-test/suites/search/test_search_exact_lowercase.groovy
new file mode 100644
index 00000000000..9d1b3756cbc
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_lowercase.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_lowercase") {
+ def tableName = "exact_lowercase_test"
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+
+ // EXACT on mixed indexes: prefers untokenized, but untokenized index
doesn't support lowercase
+ // So EXACT is always case-sensitive regardless of lowercase config
+ // This test verifies that EXACT behavior is consistent
+ sql """
+ CREATE TABLE ${tableName} (
+ id INT,
+ text_normal VARCHAR(200),
+ text_mixed VARCHAR(200),
+ INDEX idx_normal (text_normal) USING INVERTED,
+ INDEX idx_mixed_tokenized (text_mixed) USING INVERTED
PROPERTIES("parser" = "unicode", "lower_case" = "true"),
+ INDEX idx_mixed_untokenized (text_mixed) USING INVERTED
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+ """
+
+ // Insert test data with various cases
+ sql """INSERT INTO ${tableName} VALUES
+ (1, 'Hello World', 'Hello World'),
+ (2, 'hello world', 'hello world'),
+ (3, 'HELLO WORLD', 'HELLO WORLD'),
+ (4, 'HeLLo WoRLd', 'HeLLo WoRLd'),
+ (5, 'Test Case', 'Test Case'),
+ (6, 'TEST CASE', 'TEST CASE')
+ """
+
+ Thread.sleep(3000)
+
+ // Test 1: EXACT on normal field (untokenized) - case sensitive
+ qt_normal_exact_lower "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, text_normal FROM ${tableName} WHERE search('text_normal:EXACT(hello
world)') ORDER BY id"
+ qt_normal_exact_upper "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, text_normal FROM ${tableName} WHERE search('text_normal:EXACT(HELLO
WORLD)') ORDER BY id"
+ qt_normal_exact_mixed "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, text_normal FROM ${tableName} WHERE search('text_normal:EXACT(Hello
World)') ORDER BY id"
+
+ // Test 2: EXACT on mixed index field
+ // EXACT prefers untokenized index, so it's case sensitive (lowercase
config is ignored)
+ qt_mixed_exact_lower "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, text_mixed FROM ${tableName} WHERE search('text_mixed:EXACT(hello
world)') ORDER BY id"
+ qt_mixed_exact_upper "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, text_mixed FROM ${tableName} WHERE search('text_mixed:EXACT(HELLO
WORLD)') ORDER BY id"
+ qt_mixed_exact_mixed "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, text_mixed FROM ${tableName} WHERE search('text_mixed:EXACT(Hello
World)') ORDER BY id"
+
+ // Test 3: Verify that ANY on mixed index uses tokenized index with
lowercase
+ qt_mixed_any_lowercase "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, text_mixed FROM
${tableName} WHERE search('text_mixed:ANY(hello world)') ORDER BY id"
+
+ // Test 4: Compare EXACT (case-sensitive) vs ANY (with lowercase)
+ qt_exact_case_sensitive "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM ${tableName} WHERE
search('text_mixed:EXACT(Test Case)') ORDER BY id"
+ qt_any_case_insensitive "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM ${tableName} WHERE
search('text_mixed:ANY(test case)') ORDER BY id"
+}
diff --git a/regression-test/suites/search/test_search_exact_match.groovy
b/regression-test/suites/search/test_search_exact_match.groovy
new file mode 100644
index 00000000000..307963b19b1
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_match.groovy
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_match") {
+ def tableName = "search_exact_test_table"
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+
+ // Create test table with different index configurations
+ sql """
+ CREATE TABLE ${tableName} (
+ id INT,
+ title VARCHAR(255),
+ content TEXT,
+ keyword VARCHAR(200),
+ mixed_index VARCHAR(200),
+ INDEX idx_title (title) USING INVERTED,
+ INDEX idx_content (content) USING INVERTED PROPERTIES("parser" =
"english"),
+ INDEX idx_keyword (keyword) USING INVERTED,
+ INDEX idx_mixed_tokenized (mixed_index) USING INVERTED
PROPERTIES("parser" = "standard"),
+ INDEX idx_mixed_untokenized (mixed_index) USING INVERTED
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 3
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ // Insert test data for EXACT matching
+ sql """INSERT INTO ${tableName} VALUES
+ (1, 'Hello World', 'This is a test document', 'test keyword',
'tokenized value'),
+ (2, 'hello world', 'Another test document', 'hello world', 'another
value'),
+ (3, 'HELLO WORLD', 'Third test document', 'HELLO WORLD', 'UPPER
VALUE'),
+ (4, 'Hello', 'Just hello', 'hello', 'hello'),
+ (5, 'World', 'Just world', 'world', 'world'),
+ (6, 'a b c', 'Space separated', 'a b c', 'a b c'),
+ (7, 'machine learning', 'About ML', 'ML', 'machine learning'),
+ (8, 'deep learning', 'About DL', 'DL', 'deep learning'),
+ (9, 'Special!@#Chars', 'Special characters test', 'special!@#chars',
'special chars')
+ """
+
+ // Wait for index building
+ Thread.sleep(5000)
+
+ // Test 1: EXACT match without tokenization - exact case matching
+ qt_exact_basic "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('title:EXACT(Hello World)') ORDER BY id"
+
+ // Test 2: EXACT match should not find partial matches (unlike ANY)
+ qt_exact_no_partial "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, title FROM ${tableName} WHERE search('title:EXACT(Hello)') ORDER BY id"
+
+ // Test 3: EXACT match on field without tokenization - case sensitive
+ qt_exact_case_sensitive "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, keyword FROM ${tableName}
WHERE search('keyword:EXACT(hello world)') ORDER BY id"
+
+ // Test 4: EXACT match with different case
+ qt_exact_different_case "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, keyword FROM ${tableName}
WHERE search('keyword:EXACT(HELLO WORLD)') ORDER BY id"
+
+ // Test 5: EXACT match with spaces
+ qt_exact_spaces "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, keyword FROM ${tableName} WHERE search('keyword:EXACT(a b c)') ORDER BY id"
+
+ // Test 6: Compare EXACT vs ANY on tokenized field
+ qt_exact_vs_any_exact "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, content FROM ${tableName} WHERE search('content:EXACT(test document)')
ORDER BY id"
+ qt_exact_vs_any_any "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, content FROM ${tableName} WHERE search('content:ALL(test document)')
ORDER BY id"
+
+ // Test 7: EXACT on field with mixed index (both tokenized and untokenized)
+ // EXACT should use the untokenized index
+ qt_exact_mixed_index "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, mixed_index FROM ${tableName} WHERE search('mixed_index:EXACT(machine
learning)') ORDER BY id"
+
+ // Test 8: EXACT with special characters
+ qt_exact_special_chars "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName}
WHERE search('title:EXACT(Special!@#Chars)') ORDER BY id"
+
+ // Test 9: EXACT in boolean queries
+ qt_exact_and "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('title:EXACT(Hello World) AND
content:test') ORDER BY id"
+ qt_exact_or "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('title:EXACT(Hello World) OR
title:EXACT(hello world)') ORDER BY id"
+ qt_exact_not "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('NOT title:EXACT(HELLO WORLD)') ORDER BY
id LIMIT 10"
+
+ // Test 10: EXACT with NULL values
+ sql """INSERT INTO ${tableName} VALUES (10, NULL, 'null title test',
'null', 'null')"""
+ Thread.sleep(2000)
+ qt_exact_null "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('title:EXACT(NULL)') ORDER BY id"
+
+ // Test 11: Multiple EXACT conditions
+ qt_exact_multiple "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, title, keyword FROM ${tableName} WHERE search('title:EXACT(Hello World) AND
keyword:EXACT(test keyword)') ORDER BY id"
+
+ // Test 12: EXACT vs regular term query comparison
+ qt_term_query "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('title:Hello') ORDER BY id"
+ qt_exact_query "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('title:EXACT(Hello)') ORDER BY id"
+
+ // Test 13: EXACT with ALL - combining different operators
+ qt_exact_and_all "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, title, content FROM ${tableName} WHERE search('title:EXACT(Hello World) AND
content:ALL(test document)') ORDER BY id"
+
+ // Test 14: Case sensitivity without lowercase config
+ qt_exact_case_hello "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, keyword FROM ${tableName} WHERE search('keyword:EXACT(hello)') ORDER BY
id"
+ qt_exact_case_HELLO "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, keyword FROM ${tableName} WHERE search('keyword:EXACT(HELLO)') ORDER BY
id"
+
+ // Test 15: EXACT on mixed_index should prefer untokenized index
+ // So it should match exact string, not tokens
+ qt_exact_mixed_exact_match "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, mixed_index FROM
${tableName} WHERE search('mixed_index:EXACT(tokenized value)') ORDER BY id"
+ qt_any_mixed_token_match "SELECT
/*+SET_VAR(enable_common_expr_pushdown=true) */ id, mixed_index FROM
${tableName} WHERE search('mixed_index:ALL(tokenized value)') ORDER BY id"
+
+}
diff --git a/regression-test/suites/search/test_search_exact_multi_index.groovy
b/regression-test/suites/search/test_search_exact_multi_index.groovy
new file mode 100644
index 00000000000..ab361dc45c0
--- /dev/null
+++ b/regression-test/suites/search/test_search_exact_multi_index.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_exact_multi_index") {
+ def tableName = "exact_multi_index_test"
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+
+ // Table with multiple indexes on the same column
+ // This tests that EXACT uses the untokenized index (STRING_TYPE)
+ // while ANY/ALL use the tokenized index (FULLTEXT)
+ sql """
+ CREATE TABLE ${tableName} (
+ id INT,
+ content VARCHAR(200),
+ INDEX idx_untokenized (content) USING INVERTED,
+ INDEX idx_tokenized (content) USING INVERTED PROPERTIES("parser" =
"standard")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+ """
+
+ // Insert test data
+ sql """INSERT INTO ${tableName} VALUES
+ (1, 'machine learning'),
+ (2, 'deep learning'),
+ (3, 'machine'),
+ (4, 'learning'),
+ (5, 'artificial intelligence'),
+ (6, 'natural language processing')
+ """
+
+ Thread.sleep(3000)
+
+ // Test 1: EXACT should use untokenized index - exact match only
+ qt_exact_full_match "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, content FROM ${tableName} WHERE search('content:EXACT(machine
learning)') ORDER BY id"
+
+ // Test 2: EXACT should not match partial tokens
+ qt_exact_no_partial "SELECT /*+SET_VAR(enable_common_expr_pushdown=true)
*/ id, content FROM ${tableName} WHERE search('content:EXACT(machine)') ORDER
BY id"
+
+ // Test 3: ANY should use tokenized index - matches any token
+ qt_any_token_match "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, content FROM ${tableName} WHERE search('content:ANY(machine learning)')
ORDER BY id"
+
+ // Test 4: ALL should use tokenized index - matches all tokens
+ qt_all_token_match "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, content FROM ${tableName} WHERE search('content:ALL(machine learning)')
ORDER BY id"
+
+ // Test 5: Verify EXACT vs ANY behavior difference
+ // EXACT: only exact string match
+ // ANY: any token matches
+ qt_exact_strict "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, content FROM ${tableName} WHERE search('content:EXACT(deep learning)')
ORDER BY id"
+ qt_any_loose "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
content FROM ${tableName} WHERE search('content:ANY(deep learning)') ORDER BY
id"
+
+ // Test 6: Multiple conditions with EXACT and ANY
+ qt_mixed_exact_any "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */
id, content FROM ${tableName} WHERE search('content:EXACT(machine learning) OR
content:ANY(intelligence)') ORDER BY id"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]