This is an automated email from the ASF dual-hosted git repository.

airborne12 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c24d454f15c [fix](search) reject Lucene-syntax SEARCH on columns 
without inverted index (#63637)
c24d454f15c is described below

commit c24d454f15cee2d937ef4749270a3ecb449eafe6
Author: Jack <[email protected]>
AuthorDate: Wed May 27 20:29:42 2026 +0800

    [fix](search) reject Lucene-syntax SEARCH on columns without inverted index 
(#63637)
    
    ## Proposed changes
    
    Issue Number: close #N/A
    
    ### What problem does this PR solve?
    
    SEARCH (Lucene syntax) predicates against columns that have no inverted
    index silently fall back to an empty bitmap on BE (`vsearch.cpp` and
    `function_search.cpp` only log a WARNING then `return Status::OK()` with
    an empty result), making the query look like *no rows matched*. That is
    indistinguishable from a successful query that simply found nothing and
    misleads users.
    
    This PR adds a planning-time check in `RewriteSearchToSlots`, matching
    the existing "column does not exist" behavior — fail fast with a clear
    `AnalysisException` instead of letting BE silently return FALSE.
    
    - **Normal columns**: require `OlapTable.getInvertedIndex(column, null)
    != null`.
    - **Variant subcolumns** (`parent.path`): require any `INVERTED` index
    whose first column equals the parent variant column; the concrete
    subcolumn binding is still resolved per-segment in BE, consistent with
    the `is_variant_sub` branch in `function_search.cpp`.
    
    Also hardens `OlapTable.getInvertedIndex` against NPE when the table has
    no `TableIndexes` set (returns `null` instead of dereferencing).
    
    Error message example:
    
    ```
    Field 'msg_body' has no inverted index, cannot be used in search: 
msg_body:error.
    Create an inverted index on the column first (ALTER TABLE ... ADD INDEX ... 
USING INVERTED).
    ```
---
 .../java/org/apache/doris/catalog/OlapTable.java   |   3 +
 .../rules/rewrite/RewriteSearchToSlots.java        |  63 +++++++++++-
 .../rules/rewrite/RewriteSearchToSlotsTest.java    | 109 ++++++++++++++++++++-
 .../suites/search/test_search_function.groovy      |  13 ++-
 4 files changed, 183 insertions(+), 5 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 38cd24c9e2a..30373f0a3c2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -4004,6 +4004,9 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf, GsonPostProc
     }
 
     public Index getInvertedIndex(Column column, List<String> subPath, String 
analyzer) {
+        if (indexes == null) {
+            return null;
+        }
         List<Index> invertedIndexes = new ArrayList<>();
         for (Index index : indexes.getIndexes()) {
             if (index.getIndexType() == IndexType.INVERTED) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
index 8c257d5060f..ca5d2d93b29 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
@@ -18,6 +18,10 @@
 package org.apache.doris.nereids.rules.rewrite;
 
 import org.apache.doris.analysis.SearchDslParser;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Index;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.info.IndexType;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
@@ -126,17 +130,24 @@ public class RewriteSearchToSlots extends 
OneRewriteRuleFactory {
                                 "Field '%s' is not VARIANT type for subcolumn 
access: %s",
                                 parentFieldName, search.getDslString()));
                     }
+                    String normalizedParentFieldName = parentSlot.getName();
+
+                    // Check the parent variant column has at least one 
INVERTED index. The concrete
+                    // subcolumn binding is resolved per-segment in BE, so we 
only enforce the parent
+                    // level here. See function_search.cpp is_variant_sub 
branch.
+                    checkInvertedIndexExists(scan.getTable(), 
normalizedParentFieldName,
+                            search.getDslString(), true);
 
                     // Create ElementAt expression for variant subcolumn
                     // This will be converted to an extracted column slot by 
VariantSubPathPruning rule
                     // If the subcolumn doesn't exist, ElementAt will remain 
and BE will handle it gracefully
                     childExpr = new ElementAt(parentSlot, new 
StringLiteral(subcolumnPath));
-                    normalizedFieldName = originalFieldName; // Keep full path 
for field binding
+                    normalizedFieldName = normalizedParentFieldName + "." + 
subcolumnPath;
 
                     LOG.info(
                             "Created ElementAt expression for variant 
subcolumn: parent='{}', "
                                     + "subcolumn='{}', field_name='{}'",
-                            parentFieldName, subcolumnPath, 
normalizedFieldName);
+                            normalizedParentFieldName, subcolumnPath, 
normalizedFieldName);
                 } else {
                     // Normal field - find slot directly
                     Slot slot = findSlotByName(originalFieldName, scan);
@@ -145,6 +156,7 @@ public class RewriteSearchToSlots extends 
OneRewriteRuleFactory {
                                 "Field '%s' not found in table for search: %s",
                                 originalFieldName, search.getDslString()));
                     }
+                    checkInvertedIndexExists(scan.getTable(), slot.getName(), 
search.getDslString(), false);
                     childExpr = slot;
                     normalizedFieldName = slot.getName();
                 }
@@ -167,6 +179,53 @@ public class RewriteSearchToSlots extends 
OneRewriteRuleFactory {
         }
     }
 
+    /**
+     * Ensure the column referenced by a Lucene-syntax SEARCH predicate has an 
inverted index.
+     * Without this check the BE path would silently fall back to an empty 
bitmap (i.e. all FALSE),
+     * which is indistinguishable from "no rows matched" to the user. Throw at 
planning time so the
+     * behavior is consistent with referencing a non-existent column.
+     *
+     * @param table         table backing the LogicalOlapScan
+     * @param columnName    column name (parent column name when 
isVariantParent)
+     * @param dsl           original DSL, used in the error message
+     * @param isVariantParent true when {@code columnName} is the parent of a 
variant subcolumn
+     *                        access (e.g. {@code msg.body}); for that case 
any INVERTED index on
+     *                        the parent column is accepted because the 
concrete subcolumn binding
+     *                        is resolved per-segment in BE.
+     */
+    private void checkInvertedIndexExists(OlapTable table, String columnName, 
String dsl,
+            boolean isVariantParent) {
+        Column column = table.getColumn(columnName);
+        if (column == null) {
+            // Field existence is already validated by findSlotByName; if we 
reach here the schema
+            // changed concurrently. Surface a clear error rather than fall 
through.
+            throw new AnalysisException(String.format(
+                    "Column '%s' not found in table '%s' for search: %s",
+                    columnName, table.getName(), dsl));
+        }
+
+        if (isVariantParent) {
+            for (Index index : table.getIndexes()) {
+                if (index.getIndexType() != IndexType.INVERTED) {
+                    continue;
+                }
+                List<String> columns = index.getColumns();
+                if (columns != null && !columns.isEmpty()
+                        && columnName.equalsIgnoreCase(columns.get(0))) {
+                    return;
+                }
+            }
+        } else if (table.getInvertedIndex(column, null) != null) {
+            return;
+        }
+
+        throw new AnalysisException(String.format(
+                "Field '%s' has no inverted index, cannot be used in search: 
%s. "
+                        + "Create an inverted index on the column first "
+                        + "(ALTER TABLE ... ADD INDEX ... USING INVERTED).",
+                columnName, dsl));
+    }
+
     private Slot findSlotByName(String fieldName, LogicalOlapScan scan) {
         // Direct match only - variant subcolumns are handled by caller
         for (Slot slot : scan.getOutput()) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
index d4e54cf92da..a72b6ff8dd0 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
@@ -18,16 +18,27 @@
 package org.apache.doris.nereids.rules.rewrite;
 
 import org.apache.doris.analysis.SearchDslParser;
+import org.apache.doris.catalog.AggregateType;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Index;
+import org.apache.doris.catalog.KeysType;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionInfo;
+import org.apache.doris.catalog.TableIndexes;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.catalog.info.IndexType;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.SearchExpression;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Search;
 import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
 import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
 import org.apache.doris.nereids.types.StringType;
 import org.apache.doris.nereids.util.PlanConstructor;
+import org.apache.doris.thrift.TStorageType;
 
 import com.google.common.collect.ImmutableList;
 import org.junit.jupiter.api.Assertions;
@@ -229,7 +240,7 @@ public class RewriteSearchToSlotsTest {
     @Test
     public void testRewriteSearchHandlesCaseInsensitiveField() throws 
Exception {
         LogicalOlapScan scan = new 
LogicalOlapScan(PlanConstructor.getNextRelationId(),
-                PlanConstructor.student, ImmutableList.of("db"));
+                buildStudentWithInvertedIndexOnName(100L), 
ImmutableList.of("db"));
         Search searchFunc = new Search(new StringLiteral("NAME:alice"));
 
         Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
@@ -250,6 +261,31 @@ public class RewriteSearchToSlotsTest {
         Assertions.assertEquals("name", normalizedPlan.getRoot().getField());
     }
 
+    @Test
+    public void testRewriteSearchHandlesCaseInsensitiveVariantParentField() 
throws Exception {
+        LogicalOlapScan scan = new 
LogicalOlapScan(PlanConstructor.getNextRelationId(),
+                buildVariantTableWithInvertedIndex(102L), 
ImmutableList.of("db"));
+        Search searchFunc = new Search(new StringLiteral("V.foo:bar"));
+
+        Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
+                "rewriteSearch", Search.class, LogicalOlapScan.class);
+        rewriteMethod.setAccessible(true);
+
+        Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan);
+        Assertions.assertInstanceOf(SearchExpression.class, rewritten);
+
+        SearchExpression searchExpression = (SearchExpression) rewritten;
+        Assertions.assertEquals(1, searchExpression.getSlotChildren().size());
+        Assertions.assertTrue(searchExpression.getSlotChildren().get(0) 
instanceof ElementAt);
+        ElementAt elementAt = (ElementAt) 
searchExpression.getSlotChildren().get(0);
+        Assertions.assertTrue(elementAt.child(0) instanceof SlotReference);
+        Assertions.assertEquals("v", ((SlotReference) 
elementAt.child(0)).getName());
+
+        SearchDslParser.QsPlan normalizedPlan = searchExpression.getQsPlan();
+        Assertions.assertEquals("v.foo", 
normalizedPlan.getFieldBindings().get(0).getFieldName());
+        Assertions.assertEquals("v.foo", normalizedPlan.getRoot().getField());
+    }
+
     @Test
     public void testRewriteSearchThrowsWhenFieldMissing() throws Exception {
         LogicalOlapScan scan = new 
LogicalOlapScan(PlanConstructor.getNextRelationId(),
@@ -266,4 +302,75 @@ public class RewriteSearchToSlotsTest {
         Assertions.assertInstanceOf(AnalysisException.class, 
thrown.getCause());
         
Assertions.assertTrue(thrown.getCause().getMessage().contains("unknown_field"));
     }
+
+    @Test
+    public void testRewriteSearchThrowsWhenColumnHasNoInvertedIndex() throws 
Exception {
+        // PlanConstructor.student has the 'name' column but no inverted index 
on it. The rewrite
+        // must surface a clear error instead of letting BE silently return an 
empty bitmap.
+        LogicalOlapScan scan = new 
LogicalOlapScan(PlanConstructor.getNextRelationId(),
+                PlanConstructor.student, ImmutableList.of("db"));
+        Search searchFunc = new Search(new StringLiteral("name:alice"));
+
+        Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
+                "rewriteSearch", Search.class, LogicalOlapScan.class);
+        rewriteMethod.setAccessible(true);
+
+        InvocationTargetException thrown = 
Assertions.assertThrows(InvocationTargetException.class,
+                () -> rewriteMethod.invoke(rewriteRule, searchFunc, scan));
+        Assertions.assertNotNull(thrown.getCause());
+        Assertions.assertInstanceOf(AnalysisException.class, 
thrown.getCause());
+        
Assertions.assertTrue(thrown.getCause().getMessage().contains("inverted index"),
+                "Error message should mention inverted index, got: " + 
thrown.getCause().getMessage());
+        Assertions.assertTrue(thrown.getCause().getMessage().contains("name"));
+    }
+
+    @Test
+    public void testRewriteSearchSucceedsWhenColumnHasInvertedIndex() throws 
Exception {
+        LogicalOlapScan scan = new 
LogicalOlapScan(PlanConstructor.getNextRelationId(),
+                buildStudentWithInvertedIndexOnName(101L), 
ImmutableList.of("db"));
+        Search searchFunc = new Search(new StringLiteral("name:alice"));
+
+        Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
+                "rewriteSearch", Search.class, LogicalOlapScan.class);
+        rewriteMethod.setAccessible(true);
+
+        Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan);
+        Assertions.assertInstanceOf(SearchExpression.class, rewritten);
+
+        SearchExpression searchExpression = (SearchExpression) rewritten;
+        Assertions.assertEquals(1, searchExpression.getSlotChildren().size());
+        Assertions.assertTrue(searchExpression.getSlotChildren().get(0) 
instanceof SlotReference);
+        Assertions.assertEquals("name",
+                ((SlotReference) 
searchExpression.getSlotChildren().get(0)).getName());
+    }
+
+    private static OlapTable buildStudentWithInvertedIndexOnName(long tableId) 
{
+        List<Column> columns = ImmutableList.of(
+                new Column("id", Type.INT, true, AggregateType.NONE, "0", ""),
+                new Column("gender", Type.INT, false, AggregateType.NONE, "0", 
""),
+                new Column("name", Type.STRING, true, AggregateType.NONE, "", 
""),
+                new Column("age", Type.INT, true, AggregateType.NONE, "", ""));
+        Index invertedOnName = new Index(1L, "idx_name", 
ImmutableList.of("name"),
+                IndexType.INVERTED, null, "");
+        OlapTable table = new OlapTable(tableId, 
"student_with_inverted_index", false, columns,
+                KeysType.PRIMARY_KEYS, new PartitionInfo(), null,
+                new TableIndexes(ImmutableList.of(invertedOnName)));
+        table.setIndexMeta(-1, "student_with_inverted_index", 
table.getFullSchema(),
+                0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS);
+        return table;
+    }
+
+    private static OlapTable buildVariantTableWithInvertedIndex(long tableId) {
+        List<Column> columns = ImmutableList.of(
+                new Column("id", Type.INT, true, AggregateType.NONE, "0", ""),
+                new Column("v", Type.VARIANT, false, AggregateType.NONE, "", 
""));
+        Index invertedOnVariant = new Index(2L, "idx_v", ImmutableList.of("v"),
+                IndexType.INVERTED, null, "");
+        OlapTable table = new OlapTable(tableId, 
"variant_with_inverted_index", false, columns,
+                KeysType.PRIMARY_KEYS, new PartitionInfo(), null,
+                new TableIndexes(ImmutableList.of(invertedOnVariant)));
+        table.setIndexMeta(-1, "variant_with_inverted_index", 
table.getFullSchema(),
+                0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS);
+        return table;
+    }
 }
diff --git a/regression-test/suites/search/test_search_function.groovy 
b/regression-test/suites/search/test_search_function.groovy
index 61ee8e4b026..25fb08cef5a 100644
--- a/regression-test/suites/search/test_search_function.groovy
+++ b/regression-test/suites/search/test_search_function.groovy
@@ -153,11 +153,20 @@ suite("test_search_function", "p0") {
     // Test 21: ALL query test
     qt_sql "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title 
FROM ${indexTableName} WHERE search('tags:ALL(machine learning)') ORDER BY id"
 
-    // Test 22: Search on non-indexed table (will throw exception)
+    // Test 22: Search on non-indexed table — must now throw at FE planning 
time.
+    // After the fix for Jira CIR-20006, RewriteSearchToSlots refuses to 
rewrite
+    // a SEARCH predicate against a column that has no inverted index, with an
+    // AnalysisException that names the column and points at "inverted index".
+    boolean threw = false
     try {
         sql """SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, 
title FROM ${tableName} WHERE search('title:Machine') ORDER BY id"""
     } catch (Exception e) {
+        threw = true
         logger.info(e.getMessage())
-        assertTrue(e.getMessage().contains("SearchExpr should not be executed 
without inverted index"))
+        assertTrue(e.getMessage().contains("inverted index"),
+                   "expected error to mention 'inverted index', got: 
${e.getMessage()}")
+        assertTrue(e.getMessage().contains("title"),
+                   "expected error to mention 'title', got: ${e.getMessage()}")
     }
+    assertTrue(threw, "expected AnalysisException for SEARCH on column without 
inverted index")
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to