This is an automated email from the ASF dual-hosted git repository.
airborne12 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c24d454f15c [fix](search) reject Lucene-syntax SEARCH on columns
without inverted index (#63637)
c24d454f15c is described below
commit c24d454f15cee2d937ef4749270a3ecb449eafe6
Author: Jack <[email protected]>
AuthorDate: Wed May 27 20:29:42 2026 +0800
[fix](search) reject Lucene-syntax SEARCH on columns without inverted index
(#63637)
## Proposed changes
Issue Number: close #N/A
### What problem does this PR solve?
SEARCH (Lucene syntax) predicates against columns that have no inverted
index silently fall back to an empty bitmap on BE (`vsearch.cpp` and
`function_search.cpp` only log a WARNING then `return Status::OK()` with
an empty result), making the query look like *no rows matched*. That is
indistinguishable from a successful query that simply found nothing and
misleads users.
This PR adds a planning-time check in `RewriteSearchToSlots`, matching
the existing "column does not exist" behavior — fail fast with a clear
`AnalysisException` instead of letting BE silently return FALSE.
- **Normal columns**: require `OlapTable.getInvertedIndex(column, null)
!= null`.
- **Variant subcolumns** (`parent.path`): require any `INVERTED` index
whose first column equals the parent variant column; the concrete
subcolumn binding is still resolved per-segment in BE, consistent with
the `is_variant_sub` branch in `function_search.cpp`.
Also hardens `OlapTable.getInvertedIndex` against NPE when the table has
no `TableIndexes` set (returns `null` instead of dereferencing).
Error message example:
```
Field 'msg_body' has no inverted index, cannot be used in search:
msg_body:error.
Create an inverted index on the column first (ALTER TABLE ... ADD INDEX ...
USING INVERTED).
```
---
.../java/org/apache/doris/catalog/OlapTable.java | 3 +
.../rules/rewrite/RewriteSearchToSlots.java | 63 +++++++++++-
.../rules/rewrite/RewriteSearchToSlotsTest.java | 109 ++++++++++++++++++++-
.../suites/search/test_search_function.groovy | 13 ++-
4 files changed, 183 insertions(+), 5 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 38cd24c9e2a..30373f0a3c2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -4004,6 +4004,9 @@ public class OlapTable extends Table implements
MTMVRelatedTableIf, GsonPostProc
}
public Index getInvertedIndex(Column column, List<String> subPath, String
analyzer) {
+ if (indexes == null) {
+ return null;
+ }
List<Index> invertedIndexes = new ArrayList<>();
for (Index index : indexes.getIndexes()) {
if (index.getIndexType() == IndexType.INVERTED) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
index 8c257d5060f..ca5d2d93b29 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
@@ -18,6 +18,10 @@
package org.apache.doris.nereids.rules.rewrite;
import org.apache.doris.analysis.SearchDslParser;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Index;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.info.IndexType;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
@@ -126,17 +130,24 @@ public class RewriteSearchToSlots extends
OneRewriteRuleFactory {
"Field '%s' is not VARIANT type for subcolumn
access: %s",
parentFieldName, search.getDslString()));
}
+ String normalizedParentFieldName = parentSlot.getName();
+
+ // Check the parent variant column has at least one
INVERTED index. The concrete
+ // subcolumn binding is resolved per-segment in BE, so we
only enforce the parent
+ // level here. See function_search.cpp is_variant_sub
branch.
+ checkInvertedIndexExists(scan.getTable(),
normalizedParentFieldName,
+ search.getDslString(), true);
// Create ElementAt expression for variant subcolumn
// This will be converted to an extracted column slot by
VariantSubPathPruning rule
// If the subcolumn doesn't exist, ElementAt will remain
and BE will handle it gracefully
childExpr = new ElementAt(parentSlot, new
StringLiteral(subcolumnPath));
- normalizedFieldName = originalFieldName; // Keep full path
for field binding
+ normalizedFieldName = normalizedParentFieldName + "." +
subcolumnPath;
LOG.info(
"Created ElementAt expression for variant
subcolumn: parent='{}', "
+ "subcolumn='{}', field_name='{}'",
- parentFieldName, subcolumnPath,
normalizedFieldName);
+ normalizedParentFieldName, subcolumnPath,
normalizedFieldName);
} else {
// Normal field - find slot directly
Slot slot = findSlotByName(originalFieldName, scan);
@@ -145,6 +156,7 @@ public class RewriteSearchToSlots extends
OneRewriteRuleFactory {
"Field '%s' not found in table for search: %s",
originalFieldName, search.getDslString()));
}
+ checkInvertedIndexExists(scan.getTable(), slot.getName(),
search.getDslString(), false);
childExpr = slot;
normalizedFieldName = slot.getName();
}
@@ -167,6 +179,53 @@ public class RewriteSearchToSlots extends
OneRewriteRuleFactory {
}
}
+ /**
+ * Ensure the column referenced by a Lucene-syntax SEARCH predicate has an
inverted index.
+ * Without this check the BE path would silently fall back to an empty
bitmap (i.e. all FALSE),
+ * which is indistinguishable from "no rows matched" to the user. Throw at
planning time so the
+ * behavior is consistent with referencing a non-existent column.
+ *
+ * @param table table backing the LogicalOlapScan
+ * @param columnName column name (parent column name when
isVariantParent)
+ * @param dsl original DSL, used in the error message
+ * @param isVariantParent true when {@code columnName} is the parent of a
variant subcolumn
+ * access (e.g. {@code msg.body}); for that case
any INVERTED index on
+ * the parent column is accepted because the
concrete subcolumn binding
+ * is resolved per-segment in BE.
+ */
+ private void checkInvertedIndexExists(OlapTable table, String columnName,
String dsl,
+ boolean isVariantParent) {
+ Column column = table.getColumn(columnName);
+ if (column == null) {
+ // Field existence is already validated by findSlotByName; if we
reach here the schema
+ // changed concurrently. Surface a clear error rather than fall
through.
+ throw new AnalysisException(String.format(
+ "Column '%s' not found in table '%s' for search: %s",
+ columnName, table.getName(), dsl));
+ }
+
+ if (isVariantParent) {
+ for (Index index : table.getIndexes()) {
+ if (index.getIndexType() != IndexType.INVERTED) {
+ continue;
+ }
+ List<String> columns = index.getColumns();
+ if (columns != null && !columns.isEmpty()
+ && columnName.equalsIgnoreCase(columns.get(0))) {
+ return;
+ }
+ }
+ } else if (table.getInvertedIndex(column, null) != null) {
+ return;
+ }
+
+ throw new AnalysisException(String.format(
+ "Field '%s' has no inverted index, cannot be used in search:
%s. "
+ + "Create an inverted index on the column first "
+ + "(ALTER TABLE ... ADD INDEX ... USING INVERTED).",
+ columnName, dsl));
+ }
+
private Slot findSlotByName(String fieldName, LogicalOlapScan scan) {
// Direct match only - variant subcolumns are handled by caller
for (Slot slot : scan.getOutput()) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
index d4e54cf92da..a72b6ff8dd0 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java
@@ -18,16 +18,27 @@
package org.apache.doris.nereids.rules.rewrite;
import org.apache.doris.analysis.SearchDslParser;
+import org.apache.doris.catalog.AggregateType;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Index;
+import org.apache.doris.catalog.KeysType;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionInfo;
+import org.apache.doris.catalog.TableIndexes;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.catalog.info.IndexType;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.SearchExpression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Search;
import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.util.PlanConstructor;
+import org.apache.doris.thrift.TStorageType;
import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Assertions;
@@ -229,7 +240,7 @@ public class RewriteSearchToSlotsTest {
@Test
public void testRewriteSearchHandlesCaseInsensitiveField() throws
Exception {
LogicalOlapScan scan = new
LogicalOlapScan(PlanConstructor.getNextRelationId(),
- PlanConstructor.student, ImmutableList.of("db"));
+ buildStudentWithInvertedIndexOnName(100L),
ImmutableList.of("db"));
Search searchFunc = new Search(new StringLiteral("NAME:alice"));
Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
@@ -250,6 +261,31 @@ public class RewriteSearchToSlotsTest {
Assertions.assertEquals("name", normalizedPlan.getRoot().getField());
}
+ @Test
+ public void testRewriteSearchHandlesCaseInsensitiveVariantParentField()
throws Exception {
+ LogicalOlapScan scan = new
LogicalOlapScan(PlanConstructor.getNextRelationId(),
+ buildVariantTableWithInvertedIndex(102L),
ImmutableList.of("db"));
+ Search searchFunc = new Search(new StringLiteral("V.foo:bar"));
+
+ Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
+ "rewriteSearch", Search.class, LogicalOlapScan.class);
+ rewriteMethod.setAccessible(true);
+
+ Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan);
+ Assertions.assertInstanceOf(SearchExpression.class, rewritten);
+
+ SearchExpression searchExpression = (SearchExpression) rewritten;
+ Assertions.assertEquals(1, searchExpression.getSlotChildren().size());
+ Assertions.assertTrue(searchExpression.getSlotChildren().get(0)
instanceof ElementAt);
+ ElementAt elementAt = (ElementAt)
searchExpression.getSlotChildren().get(0);
+ Assertions.assertTrue(elementAt.child(0) instanceof SlotReference);
+ Assertions.assertEquals("v", ((SlotReference)
elementAt.child(0)).getName());
+
+ SearchDslParser.QsPlan normalizedPlan = searchExpression.getQsPlan();
+ Assertions.assertEquals("v.foo",
normalizedPlan.getFieldBindings().get(0).getFieldName());
+ Assertions.assertEquals("v.foo", normalizedPlan.getRoot().getField());
+ }
+
@Test
public void testRewriteSearchThrowsWhenFieldMissing() throws Exception {
LogicalOlapScan scan = new
LogicalOlapScan(PlanConstructor.getNextRelationId(),
@@ -266,4 +302,75 @@ public class RewriteSearchToSlotsTest {
Assertions.assertInstanceOf(AnalysisException.class,
thrown.getCause());
Assertions.assertTrue(thrown.getCause().getMessage().contains("unknown_field"));
}
+
+ @Test
+ public void testRewriteSearchThrowsWhenColumnHasNoInvertedIndex() throws
Exception {
+ // PlanConstructor.student has the 'name' column but no inverted index
on it. The rewrite
+ // must surface a clear error instead of letting BE silently return an
empty bitmap.
+ LogicalOlapScan scan = new
LogicalOlapScan(PlanConstructor.getNextRelationId(),
+ PlanConstructor.student, ImmutableList.of("db"));
+ Search searchFunc = new Search(new StringLiteral("name:alice"));
+
+ Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
+ "rewriteSearch", Search.class, LogicalOlapScan.class);
+ rewriteMethod.setAccessible(true);
+
+ InvocationTargetException thrown =
Assertions.assertThrows(InvocationTargetException.class,
+ () -> rewriteMethod.invoke(rewriteRule, searchFunc, scan));
+ Assertions.assertNotNull(thrown.getCause());
+ Assertions.assertInstanceOf(AnalysisException.class,
thrown.getCause());
+
Assertions.assertTrue(thrown.getCause().getMessage().contains("inverted index"),
+ "Error message should mention inverted index, got: " +
thrown.getCause().getMessage());
+ Assertions.assertTrue(thrown.getCause().getMessage().contains("name"));
+ }
+
+ @Test
+ public void testRewriteSearchSucceedsWhenColumnHasInvertedIndex() throws
Exception {
+ LogicalOlapScan scan = new
LogicalOlapScan(PlanConstructor.getNextRelationId(),
+ buildStudentWithInvertedIndexOnName(101L),
ImmutableList.of("db"));
+ Search searchFunc = new Search(new StringLiteral("name:alice"));
+
+ Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
+ "rewriteSearch", Search.class, LogicalOlapScan.class);
+ rewriteMethod.setAccessible(true);
+
+ Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan);
+ Assertions.assertInstanceOf(SearchExpression.class, rewritten);
+
+ SearchExpression searchExpression = (SearchExpression) rewritten;
+ Assertions.assertEquals(1, searchExpression.getSlotChildren().size());
+ Assertions.assertTrue(searchExpression.getSlotChildren().get(0)
instanceof SlotReference);
+ Assertions.assertEquals("name",
+ ((SlotReference)
searchExpression.getSlotChildren().get(0)).getName());
+ }
+
+ private static OlapTable buildStudentWithInvertedIndexOnName(long tableId)
{
+ List<Column> columns = ImmutableList.of(
+ new Column("id", Type.INT, true, AggregateType.NONE, "0", ""),
+ new Column("gender", Type.INT, false, AggregateType.NONE, "0",
""),
+ new Column("name", Type.STRING, true, AggregateType.NONE, "",
""),
+ new Column("age", Type.INT, true, AggregateType.NONE, "", ""));
+ Index invertedOnName = new Index(1L, "idx_name",
ImmutableList.of("name"),
+ IndexType.INVERTED, null, "");
+ OlapTable table = new OlapTable(tableId,
"student_with_inverted_index", false, columns,
+ KeysType.PRIMARY_KEYS, new PartitionInfo(), null,
+ new TableIndexes(ImmutableList.of(invertedOnName)));
+ table.setIndexMeta(-1, "student_with_inverted_index",
table.getFullSchema(),
+ 0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS);
+ return table;
+ }
+
+ private static OlapTable buildVariantTableWithInvertedIndex(long tableId) {
+ List<Column> columns = ImmutableList.of(
+ new Column("id", Type.INT, true, AggregateType.NONE, "0", ""),
+ new Column("v", Type.VARIANT, false, AggregateType.NONE, "",
""));
+ Index invertedOnVariant = new Index(2L, "idx_v", ImmutableList.of("v"),
+ IndexType.INVERTED, null, "");
+ OlapTable table = new OlapTable(tableId,
"variant_with_inverted_index", false, columns,
+ KeysType.PRIMARY_KEYS, new PartitionInfo(), null,
+ new TableIndexes(ImmutableList.of(invertedOnVariant)));
+ table.setIndexMeta(-1, "variant_with_inverted_index",
table.getFullSchema(),
+ 0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS);
+ return table;
+ }
}
diff --git a/regression-test/suites/search/test_search_function.groovy
b/regression-test/suites/search/test_search_function.groovy
index 61ee8e4b026..25fb08cef5a 100644
--- a/regression-test/suites/search/test_search_function.groovy
+++ b/regression-test/suites/search/test_search_function.groovy
@@ -153,11 +153,20 @@ suite("test_search_function", "p0") {
// Test 21: ALL query test
qt_sql "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title
FROM ${indexTableName} WHERE search('tags:ALL(machine learning)') ORDER BY id"
- // Test 22: Search on non-indexed table (will throw exception)
+ // Test 22: Search on non-indexed table — must now throw at FE planning
time.
+ // After the fix for Jira CIR-20006, RewriteSearchToSlots refuses to
rewrite
+ // a SEARCH predicate against a column that has no inverted index, with an
+ // AnalysisException that names the column and points at "inverted index".
+ boolean threw = false
try {
sql """SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id,
title FROM ${tableName} WHERE search('title:Machine') ORDER BY id"""
} catch (Exception e) {
+ threw = true
logger.info(e.getMessage())
- assertTrue(e.getMessage().contains("SearchExpr should not be executed
without inverted index"))
+ assertTrue(e.getMessage().contains("inverted index"),
+ "expected error to mention 'inverted index', got:
${e.getMessage()}")
+ assertTrue(e.getMessage().contains("title"),
+ "expected error to mention 'title', got: ${e.getMessage()}")
}
+ assertTrue(threw, "expected AnalysisException for SEARCH on column without
inverted index")
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]