This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 4b78331a543 [pick](branch-4.0)[fix](inverted index) Make
select_best_reader deterministic for multi-index columns #61596 (#61692)
4b78331a543 is described below
commit 4b78331a5439a950feaf5d5f28d146c1e08a93ac
Author: Jack <[email protected]>
AuthorDate: Wed Mar 25 15:11:58 2026 +0800
[pick](branch-4.0)[fix](inverted index) Make select_best_reader
deterministic for multi-index
columns #61596 (#61692)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #61596
Problem Summary:
Pick #61596 to branch-4.0.
When a column has multiple inverted indexes (e.g., different analyzers),
select_best_reader could return different readers depending on
std::unordered_map iteration order, causing non-deterministic behavior.
This fix sorts candidate readers by index_id to ensure deterministic
selection.
### Release note
None
### Check List (For Author)
- Test
- [x] Regression test
- [x] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason
- Behavior changed:
- [x] No.
- [ ] Yes.
- Does this need documentation?
- [x] No.
- [ ] Yes.
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label
---
.../rowset/segment_v2/inverted_index_iterator.cpp | 78 +++++---
.../segment_v2/inverted_index_iterator_test.cpp | 47 ++++-
.../test_build_index_multi_analyzer_order.out | 33 ++++
.../test_build_index_multi_analyzer_order.groovy | 210 +++++++++++++++++++++
4 files changed, 334 insertions(+), 34 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_iterator.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_iterator.cpp
index fa0a7488015..9b3665904cb 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_iterator.cpp
@@ -149,6 +149,36 @@ Status
InvertedIndexIterator::try_read_from_inverted_index(const InvertedIndexRe
return Status::OK();
}
+// When multiple candidates of the preferred type exist, pick the one with
+// the smallest index_id so that the choice is deterministic regardless of
+// the order indexes appear in the rowset schema. Different segments may
+// have different index orderings (e.g. after sequential BUILD INDEX
+// operations), and relying on iteration order would cause inconsistent
+// query results across segments.
+static const ReaderEntry* pick_preferred(const std::vector<const
ReaderEntry*>& candidates,
+ InvertedIndexReaderType
preferred_type) {
+ const ReaderEntry* best = nullptr;
+ for (const auto* entry : candidates) {
+ if (entry->type == preferred_type) {
+ if (best == nullptr || entry->reader->get_index_id() <
best->reader->get_index_id()) {
+ best = entry;
+ }
+ }
+ }
+ return best;
+}
+
+static const ReaderEntry* pick_smallest_index_id(
+ const std::vector<const ReaderEntry*>& candidates) {
+ const ReaderEntry* best = candidates.front();
+ for (const auto* entry : candidates) {
+ if (entry->reader->get_index_id() < best->reader->get_index_id()) {
+ best = entry;
+ }
+ }
+ return best;
+}
+
Result<InvertedIndexReaderPtr> InvertedIndexIterator::select_for_text(
const AnalyzerMatchResult& match, InvertedIndexQueryType query_type,
const std::string& analyzer_key) {
@@ -167,24 +197,20 @@ Result<InvertedIndexReaderPtr>
InvertedIndexIterator::select_for_text(
// MATCH queries prefer FULLTEXT
if (is_match_query(query_type)) {
- for (const auto* entry : match.candidates) {
- if (entry->type == InvertedIndexReaderType::FULLTEXT) {
- return entry->reader;
- }
+ if (auto* best = pick_preferred(match.candidates,
InvertedIndexReaderType::FULLTEXT)) {
+ return best->reader;
}
}
// EQUAL queries prefer STRING_TYPE for exact match
if (is_equal_query(query_type)) {
- for (const auto* entry : match.candidates) {
- if (entry->type == InvertedIndexReaderType::STRING_TYPE) {
- return entry->reader;
- }
+ if (auto* best = pick_preferred(match.candidates,
InvertedIndexReaderType::STRING_TYPE)) {
+ return best->reader;
}
}
- // Default: return first candidate
- return match.candidates.front()->reader;
+ // Default: smallest index_id for deterministic selection
+ return pick_smallest_index_id(match.candidates)->reader;
}
Result<InvertedIndexReaderPtr> InvertedIndexIterator::select_for_numeric(
@@ -196,27 +222,21 @@ Result<InvertedIndexReaderPtr>
InvertedIndexIterator::select_for_numeric(
// RANGE queries prefer BKD
if (is_range_query(query_type)) {
- for (const auto* entry : match.candidates) {
- if (entry->type == InvertedIndexReaderType::BKD) {
- return entry->reader;
- }
+ if (const auto* best = pick_preferred(match.candidates,
InvertedIndexReaderType::BKD)) {
+ return best->reader;
}
}
- // Fallback priority: BKD > STRING_TYPE > others
- for (const auto* entry : match.candidates) {
- if (entry->type == InvertedIndexReaderType::BKD) {
- return entry->reader;
- }
+ // Fallback priority: BKD > STRING_TYPE > smallest index_id
+ if (const auto* best = pick_preferred(match.candidates,
InvertedIndexReaderType::BKD)) {
+ return best->reader;
}
- for (const auto* entry : match.candidates) {
- if (entry->type == InvertedIndexReaderType::STRING_TYPE) {
- return entry->reader;
- }
+ if (const auto* best = pick_preferred(match.candidates,
InvertedIndexReaderType::STRING_TYPE)) {
+ return best->reader;
}
- // Last resort: first available
- return match.candidates.front()->reader;
+ // Last resort: smallest index_id for deterministic selection
+ return pick_smallest_index_id(match.candidates)->reader;
}
Result<InvertedIndexReaderPtr> InvertedIndexIterator::select_best_reader(
@@ -257,12 +277,12 @@ Result<InvertedIndexReaderPtr>
InvertedIndexIterator::select_best_reader(
return select_for_numeric(match, query_type);
}
- // Default: return first candidate or error
+ // Default: return deterministic candidate or error
if (match.empty()) {
return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_NO_TERMS>(
"No available inverted index readers for column type."));
}
- return match.candidates.front()->reader;
+ return pick_smallest_index_id(match.candidates)->reader;
}
Result<InvertedIndexReaderPtr> InvertedIndexIterator::select_best_reader(
@@ -287,7 +307,7 @@ Result<InvertedIndexReaderPtr>
InvertedIndexIterator::select_best_reader(
return entry.reader;
}
- // Match and return first candidate
+ // Match and return deterministic candidate
auto match = AnalyzerKeyMatcher::match(normalized_key, _reader_entries,
_key_to_entries);
if (match.empty()) {
@@ -299,7 +319,7 @@ Result<InvertedIndexReaderPtr>
InvertedIndexIterator::select_best_reader(
"No available inverted index readers."));
}
- return match.candidates.front()->reader;
+ return pick_smallest_index_id(match.candidates)->reader;
}
IndexReaderPtr InvertedIndexIterator::get_reader(IndexReaderType type) const {
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_iterator_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index_iterator_test.cpp
index 71e29d89407..1dbc513b7ab 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_iterator_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_iterator_test.cpp
@@ -35,12 +35,12 @@ class MockInvertedIndexReader : public InvertedIndexReader {
public:
// Factory method to create instances
static std::shared_ptr<MockInvertedIndexReader> create(
- const std::map<std::string, std::string>& properties) {
+ const std::map<std::string, std::string>& properties, int64_t
index_id = 1) {
auto index = std::make_shared<TabletIndex>();
// Initialize TabletIndex with protobuf to ensure all fields are
properly set
TabletIndexPB pb;
- pb.set_index_id(1);
- pb.set_index_name("test_index");
+ pb.set_index_id(index_id);
+ pb.set_index_name("test_index_" + std::to_string(index_id));
pb.set_index_type(IndexType::INVERTED);
index->init_from_pb(pb);
return std::shared_ptr<MockInvertedIndexReader>(
@@ -85,7 +85,8 @@ class InvertedIndexIteratorTest : public testing::Test {
protected:
std::shared_ptr<MockInvertedIndexReader> create_mock_reader(
const std::string& analyzer_key,
- InvertedIndexReaderType type = InvertedIndexReaderType::FULLTEXT) {
+ InvertedIndexReaderType type = InvertedIndexReaderType::FULLTEXT,
+ int64_t index_id = 1) {
std::map<std::string, std::string> properties;
// New design: empty string means "user did not specify", non-empty
means explicit.
// We only set properties when analyzer_key is non-empty.
@@ -96,7 +97,7 @@ protected:
properties[INVERTED_INDEX_ANALYZER_NAME_KEY] = analyzer_key;
}
}
- auto reader = MockInvertedIndexReader::create(properties);
+ auto reader = MockInvertedIndexReader::create(properties, index_id);
reader->set_type(type);
return reader;
}
@@ -341,4 +342,40 @@ TEST_F(InvertedIndexIteratorTest,
EdgeCase_GetReaderByType) {
EXPECT_EQ(reader3, nullptr);
}
+// Test: select_best_reader picks the smallest index_id when multiple
+// candidates match, regardless of insertion order. This ensures consistent
+// index selection across segments with different schema orderings.
+TEST_F(InvertedIndexIteratorTest, SelectBestReader_DeterministicByIndexId) {
+ // Simulate two segments with the same indexes in different order.
+ // Both should select the reader with the smallest index_id.
+ auto reader_id_100 = create_mock_reader("my_analyzer1",
InvertedIndexReaderType::FULLTEXT, 100);
+ auto reader_id_50 = create_mock_reader("my_analyzer2",
InvertedIndexReaderType::FULLTEXT, 50);
+
+ // Segment 1: add id=100 first, then id=50
+ {
+ InvertedIndexIterator iter;
+ iter.add_reader(InvertedIndexReaderType::FULLTEXT, reader_id_100);
+ iter.add_reader(InvertedIndexReaderType::FULLTEXT, reader_id_50);
+
+ auto col_type = std::make_shared<vectorized::DataTypeString>();
+ auto result =
+ iter.select_best_reader(col_type,
InvertedIndexQueryType::MATCH_REGEXP_QUERY, "");
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result.value()->get_index_id(), 50);
+ }
+
+ // Segment 2: add id=50 first, then id=100 (opposite order)
+ {
+ InvertedIndexIterator iter;
+ iter.add_reader(InvertedIndexReaderType::FULLTEXT, reader_id_50);
+ iter.add_reader(InvertedIndexReaderType::FULLTEXT, reader_id_100);
+
+ auto col_type = std::make_shared<vectorized::DataTypeString>();
+ auto result =
+ iter.select_best_reader(col_type,
InvertedIndexQueryType::MATCH_REGEXP_QUERY, "");
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result.value()->get_index_id(), 50);
+ }
+}
+
} // namespace doris::segment_v2
diff --git
a/regression-test/data/inverted_index_p0/index_change/test_build_index_multi_analyzer_order.out
b/regression-test/data/inverted_index_p0/index_change/test_build_index_multi_analyzer_order.out
new file mode 100644
index 00000000000..d9ca22ea2bc
--- /dev/null
+++
b/regression-test/data/inverted_index_p0/index_change/test_build_index_multi_analyzer_order.out
@@ -0,0 +1,33 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !build_regex --
+11
+
+-- !baseline_regex --
+11
+
+-- !build_batch1 --
+9
+
+-- !baseline_batch1 --
+9
+
+-- !build_specific --
+3
+4
+5
+6
+7
+
+-- !baseline_specific --
+3
+4
+5
+6
+7
+
+-- !build_term --
+1
+
+-- !baseline_term --
+1
+
diff --git
a/regression-test/suites/inverted_index_p0/index_change/test_build_index_multi_analyzer_order.groovy
b/regression-test/suites/inverted_index_p0/index_change/test_build_index_multi_analyzer_order.groovy
new file mode 100644
index 00000000000..72df30031e3
--- /dev/null
+++
b/regression-test/suites/inverted_index_p0/index_change/test_build_index_multi_analyzer_order.groovy
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_build_index_multi_analyzer_order") {
+ // Test that BUILD INDEX with multiple analyzers on the same column
+ // produces consistent query results regardless of build order.
+ // Regression test for: build index creates rowset schemas with
+ // index order different from tablet schema, causing select_best_reader
+ // to pick different indexes for different segments.
+
+ // Cloud mode does not support BUILD INDEX with specified index name
+ if (isCloudMode()) {
+ return
+ }
+
+ def timeout = 60000
+ def delta_time = 1000
+ def alter_res = "null"
+ def useTime = 0
+
+ def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+ for (int t = delta_time; t <= OpTimeout; t += delta_time) {
+ alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName =
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+ if (alter_res.size() > 0 && alter_res[0][9] == "FINISHED") {
+ sleep(3000)
+ break
+ }
+ useTime = t
+ sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish
timeout")
+ }
+
+ def wait_for_build_index_finish = { table_name, OpTimeout ->
+ for (int t = delta_time; t <= OpTimeout; t += delta_time) {
+ alter_res = sql """SHOW BUILD INDEX WHERE TableName =
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+ if (alter_res.size() > 0 && alter_res[0][7] == "FINISHED") {
+ sleep(3000)
+ break
+ }
+ useTime = t
+ sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_build_index_finish timeout")
+ }
+
+ // Create custom analyzers
+ sql """ CREATE INVERTED INDEX TOKENIZER IF NOT EXISTS
edge_ngram_test_tokenizer
+ PROPERTIES ("type" = "edge_ngram", "min_gram" = "3", "max_gram" =
"10", "token_chars" = "digit"); """
+ sql """ CREATE INVERTED INDEX ANALYZER IF NOT EXISTS edge_ngram_test
+ PROPERTIES ("tokenizer" = "edge_ngram_test_tokenizer"); """
+ sql """ CREATE INVERTED INDEX ANALYZER IF NOT EXISTS standard_test_default
+ PROPERTIES ("tokenizer" = "standard", "token_filter" =
"word_delimiter, asciifolding, lowercase"); """
+
+ // Table 1: build index path (add idx_ch0, insert, drop, add two new
indexes, build with interleaved inserts)
+ def tableBuild = "test_build_idx_multi_analyzer_build"
+ // Table 2: baseline (indexes at creation time)
+ def tableBaseline = "test_build_idx_multi_analyzer_baseline"
+
+ sql """ DROP TABLE IF EXISTS ${tableBuild} """
+ sql """ DROP TABLE IF EXISTS ${tableBaseline} """
+
+ // === Table 1: Dynamic index lifecycle ===
+ sql """
+ CREATE TABLE ${tableBuild} (
+ a BIGINT NOT NULL AUTO_INCREMENT,
+ ch TEXT NULL
+ ) DUPLICATE KEY(a)
+ DISTRIBUTED BY HASH(a) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1");
+ """
+
+ // Add initial plain inverted index
+ sql """ ALTER TABLE ${tableBuild} ADD INDEX idx_ch0(ch) USING INVERTED; """
+ wait_for_latest_op_on_table_finish(tableBuild, timeout)
+
+ // Insert batch 1
+ sql """ INSERT INTO ${tableBuild} VALUES
+ (1, '1949 West German federal election'),
+ (2, 'Category 1062 deaths'),
+ (3, 'File Swallow Project Gutenberg eBook 11921 jpg'),
+ (4, '80386DX processor'),
+ (5, 'List of minor planets 49001 to 50000'),
+ (6, 'HIP 57087 star'),
+ (7, 'File Paul Verlaine Project Gutenberg eText 15112 png'),
+ (8, 'Simple text without digits'),
+ (9, '1700 in Canada'),
+ (10, 'Category 1649 births');
+ """
+
+ // Drop the initial index
+ sql """ ALTER TABLE ${tableBuild} DROP INDEX idx_ch0; """
+ wait_for_latest_op_on_table_finish(tableBuild, timeout)
+
+ // Add two new indexes with different analyzers (edge_ngram first, then
standard)
+ sql """ ALTER TABLE ${tableBuild} ADD INDEX idx_ch_edge(ch) USING INVERTED
+ PROPERTIES("analyzer" = "edge_ngram_test", "support_phrase" =
"true"); """
+ wait_for_latest_op_on_table_finish(tableBuild, timeout)
+
+ sql """ ALTER TABLE ${tableBuild} ADD INDEX idx_ch_std(ch) USING INVERTED
+ PROPERTIES("analyzer" = "standard_test_default", "support_phrase"
= "true"); """
+ wait_for_latest_op_on_table_finish(tableBuild, timeout)
+
+ // Insert batch 2 (uses new schema with both indexes)
+ sql """ INSERT INTO ${tableBuild} VALUES
+ (11, 'Another text 2024'),
+ (12, 'No digits here');
+ """
+
+ // Build idx_ch_std FIRST (reverse order from ALTER)
+ sql """ BUILD INDEX idx_ch_std ON ${tableBuild}; """
+ wait_for_build_index_finish(tableBuild, timeout)
+
+ // Insert batch 3
+ sql """ INSERT INTO ${tableBuild} VALUES
+ (13, 'Third batch 9999'),
+ (14, 'Also no digits');
+ """
+
+ // Build idx_ch_edge SECOND
+ sql """ BUILD INDEX idx_ch_edge ON ${tableBuild}; """
+ wait_for_build_index_finish(tableBuild, timeout)
+
+ // === Table 2: Baseline with indexes at creation ===
+ sql """
+ CREATE TABLE ${tableBaseline} (
+ a BIGINT NOT NULL AUTO_INCREMENT,
+ ch TEXT NULL,
+ INDEX idx_ch_edge(ch) USING INVERTED PROPERTIES("analyzer" =
"edge_ngram_test", "support_phrase" = "true"),
+ INDEX idx_ch_std(ch) USING INVERTED PROPERTIES("analyzer" =
"standard_test_default", "support_phrase" = "true")
+ ) DUPLICATE KEY(a)
+ DISTRIBUTED BY HASH(a) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1");
+ """
+
+ sql """ INSERT INTO ${tableBaseline} VALUES
+ (1, '1949 West German federal election'),
+ (2, 'Category 1062 deaths'),
+ (3, 'File Swallow Project Gutenberg eBook 11921 jpg'),
+ (4, '80386DX processor'),
+ (5, 'List of minor planets 49001 to 50000'),
+ (6, 'HIP 57087 star'),
+ (7, 'File Paul Verlaine Project Gutenberg eText 15112 png'),
+ (8, 'Simple text without digits'),
+ (9, '1700 in Canada'),
+ (10, 'Category 1649 births'),
+ (11, 'Another text 2024'),
+ (12, 'No digits here'),
+ (13, 'Third batch 9999'),
+ (14, 'Also no digits');
+ """
+
+ sql "sync"
+
+ // Pin fuzzy variable: search() requires common expr pushdown to work
+ sql "SET enable_common_expr_pushdown = true"
+
+ // === Verification: regex /\d\d\d\d/ should match exactly 4-digit tokens
===
+ // With edge_ngram analyzer, "11921" produces tokens: "119", "1192",
"11921"
+ // So "1192" (4-digit) matches /\d\d\d\d/.
+ // With standard_test_default, "11921" stays as "11921" (5-digit), no
4-digit match.
+ // Both tables should use the same index (edge_ngram, added first in ALTER
order)
+ // and return the same results.
+
+ // Test 1: Total count with regex query
+ order_qt_build_regex """ SELECT count(*) FROM ${tableBuild}
+ WHERE search('/\\\\d\\\\d\\\\d\\\\d/',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}');
"""
+
+ order_qt_baseline_regex """ SELECT count(*) FROM ${tableBaseline}
+ WHERE search('/\\\\d\\\\d\\\\d\\\\d/',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}');
"""
+
+ // Test 2: Batch 1 only (the data that went through build index)
+ order_qt_build_batch1 """ SELECT count(*) FROM ${tableBuild}
+ WHERE a <= 10 AND search('/\\\\d\\\\d\\\\d\\\\d/',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}');
"""
+
+ order_qt_baseline_batch1 """ SELECT count(*) FROM ${tableBaseline}
+ WHERE a <= 10 AND search('/\\\\d\\\\d\\\\d\\\\d/',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}');
"""
+
+ // Test 3: Verify specific rows with 5+ digit tokens are found
+ // These rows contain tokens like "11921", "80386", "49001", "50000",
"57087", "15112"
+ // which produce 4-digit edge n-grams matching /\d\d\d\d/
+ order_qt_build_specific """ SELECT a FROM ${tableBuild}
+ WHERE a IN (3, 4, 5, 6, 7) AND search('/\\\\d\\\\d\\\\d\\\\d/',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}')
+ ORDER BY a; """
+
+ order_qt_baseline_specific """ SELECT a FROM ${tableBaseline}
+ WHERE a IN (3, 4, 5, 6, 7) AND search('/\\\\d\\\\d\\\\d\\\\d/',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}')
+ ORDER BY a; """
+
+ // Test 4: Simple term query should work on both
+ order_qt_build_term """ SELECT count(*) FROM ${tableBuild}
+ WHERE search('1949',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}');
"""
+
+ order_qt_baseline_term """ SELECT count(*) FROM ${tableBaseline}
+ WHERE search('1949',
'{"default_operator":"OR","default_field":"ch","minimum_should_match":0,"mode":"lucene"}');
"""
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]