This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 2cbbaa3b07f [Fix](inverted index) fix rename column build index bug (#50056) 2cbbaa3b07f is described below commit 2cbbaa3b07f2605230fcf10862dcfe6c6c6bf178 Author: airborne12 <jiang...@selectdb.com> AuthorDate: Wed Apr 16 13:12:58 2025 +0800 [Fix](inverted index) fix rename column build index bug (#50056) Problem Summary: fix DCHECK error after multiple add/drop rename column when building inverted index ``` F20250408 11:43:45.886974 1066024 index_builder.cpp:423] Check failed: output_rowset_schema->has_inverted_index_with_index_id(index_id) *** Check failure stack trace: *** F20250408 11:43:45.886988 1066022 index_builder.cpp:423] Check failed: output_rowset_schema->has_inverted_index_with_index_id(index_id) *** Check failure stack trace: *** @ 0x590b0f234d26 google::LogMessage::SendToLog() @ 0x590b0f234d26 google::LogMessage::SendToLog() @ 0x590b0f231770 google::LogMessage::Flush() @ 0x590b0f231770 google::LogMessage::Flush() @ 0x590b0f235569 google::LogMessageFatal::~LogMessageFatal() @ 0x590b0f235569 google::LogMessageFatal::~LogMessageFatal() @ 0x590ad85d3459 doris::IndexBuilder::handle_single_rowset() @ 0x590ad85daf25 doris::IndexBuilder::handle_inverted_index_data() @ 0x590ad85d3459 doris::IndexBuilder::handle_single_rowset() @ 0x590ad85ddc31 doris::IndexBuilder::do_build_inverted_index() @ 0x590ad85daf25 doris::IndexBuilder::handle_inverted_index_data() @ 0x590ad711ad86 doris::StorageEngine::_handle_index_change() @ 0x590ad85ddc31 doris::IndexBuilder::do_build_inverted_index() @ 0x590ad711a1da doris::StorageEngine::process_index_change_task() @ 0x590ad711ad86 doris::StorageEngine::_handle_index_change() @ 0x590ad857b89d doris::EngineIndexChangeTask::execute() @ 0x590ad711a1da doris::StorageEngine::process_index_change_task() @ 0x590ad49deea2 doris::alter_inverted_index_callback() @ 0x590ad4a1de9d _ZNSt17_Function_handlerIFvvEZZN5doris14TaskWorkerPool11submit_taskERKNS1_17TAgentTaskRequestEENK3$_0clIS5_EEDaOT_EUlvE_E9_M_invokeERKSt9_Any_data @ 0x590ad857b89d doris::EngineIndexChangeTask::execute() @ 0x590ad936128b doris::ThreadPool::dispatch_thread() @ 0x590ad49deea2 doris::alter_inverted_index_callback() @ 0x590ad4a1de9d _ZNSt17_Function_handlerIFvvEZZN5doris14TaskWorkerPool11submit_taskERKNS1_17TAgentTaskRequestEENK3$_0clIS5_EEDaOT_EUlvE_E9_M_invokeERKSt9_Any_data @ 0x590ad9337828 doris::Thread::supervise_thread() @ 0x740bd7c9caa4 (unknown) @ 0x740bd7d29c3c (unknown) @ (nil) (unknown) *** Query id: 0-0 *** *** is nereids: 0 *** *** tablet id: 0 *** *** Aborted at 1744083830 (unix time) try "date -d @1744083830" if you are using GNU date *** *** Current BE git commitID: 3640f6c240 *** *** SIGABRT unknown detail explain (@0x10417b) received by PID 1065339 (TID 1066024 OR 0x7408c42006c0) from PID 1065339; stack trace: *** @ 0x590ad936128b doris::ThreadPool::dispatch_thread() @ 0x590ad9337828 doris::Thread::supervise_thread() @ 0x740bd7c9caa4 (unknown) @ 0x740bd7d29c3c (unknown) @ (nil) (unknown) 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421 1# 0x0000740BD7C45330 in /lib/x86_64-linux-gnu/libc.so.6 2# pthread_kill in /lib/x86_64-linux-gnu/libc.so.6 3# gsignal in /lib/x86_64-linux-gnu/libc.so.6 4# abort in /lib/x86_64-linux-gnu/libc.so.6 5# 0x0000590B0F23F5FD in /root/wyx/doris/be/lib/doris_be 6# 0x0000590B0F231C3A in /root/wyx/doris/be/lib/doris_be 7# google::LogMessage::SendToLog() in /root/wyx/doris/be/lib/doris_be 8# google::LogMessage::Flush() in /root/wyx/doris/be/lib/doris_be 9# google::LogMessageFatal::~LogMessageFatal() in /root/wyx/doris/be/lib/doris_be 10# doris::IndexBuilder::handle_single_rowset(std::shared_ptr<doris::RowsetMeta>, std::vector<std::shared_ptr<doris::segment_v2::Segment>, std::allocator<std::shared_ptr<doris::segment_v2::Segment> > >&) at /home/zcp/repo_center/doris_master/doris/be/src/olap/task/index_builder.cpp:423 11# doris::IndexBuilder::handle_inverted_index_data() at /home/zcp/repo_center/doris_master/doris/be/src/olap/task/index_builder.cpp:711 12# doris::IndexBuilder::do_build_inverted_index() in /root/wyx/doris/be/lib/doris_be 13# doris::StorageEngine::_handle_index_change(std::shared_ptr<doris::IndexBuilder>) at /home/zcp/repo_center/doris_master/doris/be/src/olap/olap_server.cpp:1213 14# doris::StorageEngine::process_index_change_task(doris::TAlterInvertedIndexReq const&) at /home/zcp/repo_center/doris_master/doris/be/src/olap/olap_server.cpp:1207 15# doris::EngineIndexChangeTask::execute() in /root/wyx/doris/be/lib/doris_be 16# doris::alter_inverted_index_callback(doris::StorageEngine&, doris::TAgentTaskRequest const&) in /root/wyx/doris/be/lib/doris_be 17# std::_Function_handler<void (), doris::TaskWorkerPool::submit_task(doris::TAgentTaskRequest const&)::$_0::operator()<doris::TAgentTaskRequest const&>(doris::TAgentTaskRequest const&) const::{lambda()#1}>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 18# doris::ThreadPool::dispatch_thread() in /root/wyx/doris/be/lib/doris_be 19# doris::Thread::supervise_thread(void*) at /home/zcp/repo_center/doris_master/doris/be/src/util/thread.cpp:497 20# 0x0000740BD7C9CAA4 in /lib/x86_64-linux-gnu/libc.so.6 21# 0x0000740BD7D29C3C in /lib/x86_64-linux-gnu/libc.so.6 ``` --- be/src/olap/tablet_schema.cpp | 7 +- be/src/olap/task/index_builder.cpp | 3 +- be/test/olap/index_builder_test.cpp | 175 ++++++++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 5 deletions(-) diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 601ce807737..0af4a520a80 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -748,10 +748,9 @@ void TabletIndex::init_from_thrift(const TOlapTableIndex& index, col_unique_ids[i] = tablet_schema.column(column_idx).unique_id(); } else { // if column unique id not found by column name, find by column unique id - // column unique id can not bigger than tablet schema column size, if bigger than column size means - // this column is a new column added by light schema change - if (index.__isset.column_unique_ids && - index.column_unique_ids[i] < tablet_schema.num_columns()) { + // column unique id can not found means this column is a new column added by light schema change + if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() && + tablet_schema.has_column_unique_id(index.column_unique_ids[i])) { col_unique_ids[i] = index.column_unique_ids[i]; } else { col_unique_ids[i] = -1; diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 463a00179f7..cf85d7025d0 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -400,7 +400,8 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta auto column_name = inverted_index.columns[0]; auto column_idx = output_rowset_schema->field_index(column_name); if (column_idx < 0) { - if (!inverted_index.column_unique_ids.empty()) { + if (inverted_index.__isset.column_unique_ids && + !inverted_index.column_unique_ids.empty()) { column_idx = output_rowset_schema->field_index( inverted_index.column_unique_ids[0]); } diff --git a/be/test/olap/index_builder_test.cpp b/be/test/olap/index_builder_test.cpp index c2483e95cad..b5ae890f19a 100644 --- a/be/test/olap/index_builder_test.cpp +++ b/be/test/olap/index_builder_test.cpp @@ -1077,6 +1077,181 @@ TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { // but the file count verification above should be sufficient to confirm behavior } +TEST_F(IndexBuilderTest, RenameColumnIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14679); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + auto schema = std::make_shared<TabletSchema>(); + + schema->_keys_type = KeysType::UNIQUE_KEYS; + schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; + + // Create the first key column + TabletColumn column_1; + column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT); + column_1.set_unique_id(1); + column_1.set_name("k1"); + column_1.set_is_key(true); + schema->append_column(column_1); + + // Create the second key column + TabletColumn column_2; + column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT); + // not sequential unique_id + column_2.set_unique_id(3); + column_2.set_name("k2"); + column_2.set_is_key(false); + schema->append_column(column_2); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + schema->append_index(std::move(initial_index)); + + // 3. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15679); + writer_context.tablet_id = 15679; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15679); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 4. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 5. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Prepare indexes for building - valid k2 and non-existent k3 + _alter_indexes.clear(); + + // Index for rename column "k2" to "k3" + TOlapTableIndex index2; + index2.index_id = 3; + index2.columns.emplace_back("k3"); // This column doesn't exist in the schema + index2.index_name = "k3_index"; + index2.index_type = TIndexType::INVERTED; + index2.column_unique_ids.push_back(3); + index2.__isset.column_unique_ids = true; + _alter_indexes.push_back(index2); + + // 7. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 8. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building + + // 9. Build indexes - should only build for existing columns + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 11. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 1) + << "Old directory should contain exactly 1 .idx file for the original k1 index"; + EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) + EXPECT_EQ(new_idx_file_count, 1) + << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 12. Verify the tablet schema - would need to examine tablet_schema here + // k1 and k2 indexes should exist, k3 index should not + // Note: In production code, additional verification of schema would be done here +} TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14679); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org