This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2cbbaa3b07f [Fix](inverted index) fix rename column build index bug 
(#50056)
2cbbaa3b07f is described below

commit 2cbbaa3b07f2605230fcf10862dcfe6c6c6bf178
Author: airborne12 <jiang...@selectdb.com>
AuthorDate: Wed Apr 16 13:12:58 2025 +0800

    [Fix](inverted index) fix rename column build index bug (#50056)
    
    Problem Summary:
    fix DCHECK error after multiple add/drop rename column when building
    inverted index
    
    ```
    F20250408 11:43:45.886974 1066024 index_builder.cpp:423] Check failed: 
output_rowset_schema->has_inverted_index_with_index_id(index_id) 
    *** Check failure stack trace: ***
    F20250408 11:43:45.886988 1066022 index_builder.cpp:423] Check failed: 
output_rowset_schema->has_inverted_index_with_index_id(index_id) 
    *** Check failure stack trace: ***
        @     0x590b0f234d26  google::LogMessage::SendToLog()
        @     0x590b0f234d26  google::LogMessage::SendToLog()
        @     0x590b0f231770  google::LogMessage::Flush()
        @     0x590b0f231770  google::LogMessage::Flush()
        @     0x590b0f235569  google::LogMessageFatal::~LogMessageFatal()
        @     0x590b0f235569  google::LogMessageFatal::~LogMessageFatal()
        @     0x590ad85d3459  doris::IndexBuilder::handle_single_rowset()
        @     0x590ad85daf25  doris::IndexBuilder::handle_inverted_index_data()
        @     0x590ad85d3459  doris::IndexBuilder::handle_single_rowset()
        @     0x590ad85ddc31  doris::IndexBuilder::do_build_inverted_index()
        @     0x590ad85daf25  doris::IndexBuilder::handle_inverted_index_data()
        @     0x590ad711ad86  doris::StorageEngine::_handle_index_change()
        @     0x590ad85ddc31  doris::IndexBuilder::do_build_inverted_index()
        @     0x590ad711a1da  doris::StorageEngine::process_index_change_task()
        @     0x590ad711ad86  doris::StorageEngine::_handle_index_change()
        @     0x590ad857b89d  doris::EngineIndexChangeTask::execute()
        @     0x590ad711a1da  doris::StorageEngine::process_index_change_task()
        @     0x590ad49deea2  doris::alter_inverted_index_callback()
        @     0x590ad4a1de9d  
_ZNSt17_Function_handlerIFvvEZZN5doris14TaskWorkerPool11submit_taskERKNS1_17TAgentTaskRequestEENK3$_0clIS5_EEDaOT_EUlvE_E9_M_invokeERKSt9_Any_data
        @     0x590ad857b89d  doris::EngineIndexChangeTask::execute()
        @     0x590ad936128b  doris::ThreadPool::dispatch_thread()
        @     0x590ad49deea2  doris::alter_inverted_index_callback()
        @     0x590ad4a1de9d  
_ZNSt17_Function_handlerIFvvEZZN5doris14TaskWorkerPool11submit_taskERKNS1_17TAgentTaskRequestEENK3$_0clIS5_EEDaOT_EUlvE_E9_M_invokeERKSt9_Any_data
        @     0x590ad9337828  doris::Thread::supervise_thread()
        @     0x740bd7c9caa4  (unknown)
        @     0x740bd7d29c3c  (unknown)
        @              (nil)  (unknown)
    *** Query id: 0-0 ***
    *** is nereids: 0 ***
    *** tablet id: 0 ***
    *** Aborted at 1744083830 (unix time) try "date -d @1744083830" if you are 
using GNU date ***
    *** Current BE git commitID: 3640f6c240 ***
    *** SIGABRT unknown detail explain (@0x10417b) received by PID 1065339 (TID 
1066024 OR 0x7408c42006c0) from PID 1065339; stack trace: ***
        @     0x590ad936128b  doris::ThreadPool::dispatch_thread()
        @     0x590ad9337828  doris::Thread::supervise_thread()
        @     0x740bd7c9caa4  (unknown)
        @     0x740bd7d29c3c  (unknown)
        @              (nil)  (unknown)
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421
     1# 0x0000740BD7C45330 in /lib/x86_64-linux-gnu/libc.so.6
     2# pthread_kill in /lib/x86_64-linux-gnu/libc.so.6
     3# gsignal in /lib/x86_64-linux-gnu/libc.so.6
     4# abort in /lib/x86_64-linux-gnu/libc.so.6
     5# 0x0000590B0F23F5FD in /root/wyx/doris/be/lib/doris_be
     6# 0x0000590B0F231C3A in /root/wyx/doris/be/lib/doris_be
     7# google::LogMessage::SendToLog() in /root/wyx/doris/be/lib/doris_be
     8# google::LogMessage::Flush() in /root/wyx/doris/be/lib/doris_be
     9# google::LogMessageFatal::~LogMessageFatal() in 
/root/wyx/doris/be/lib/doris_be
    10# 
doris::IndexBuilder::handle_single_rowset(std::shared_ptr<doris::RowsetMeta>, 
std::vector<std::shared_ptr<doris::segment_v2::Segment>, 
std::allocator<std::shared_ptr<doris::segment_v2::Segment> > >&) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/task/index_builder.cpp:423
    11# doris::IndexBuilder::handle_inverted_index_data() at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/task/index_builder.cpp:711
    12# doris::IndexBuilder::do_build_inverted_index() in 
/root/wyx/doris/be/lib/doris_be
    13# 
doris::StorageEngine::_handle_index_change(std::shared_ptr<doris::IndexBuilder>)
 at /home/zcp/repo_center/doris_master/doris/be/src/olap/olap_server.cpp:1213
    14# 
doris::StorageEngine::process_index_change_task(doris::TAlterInvertedIndexReq 
const&) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/olap_server.cpp:1207
    15# doris::EngineIndexChangeTask::execute() in 
/root/wyx/doris/be/lib/doris_be
    16# doris::alter_inverted_index_callback(doris::StorageEngine&, 
doris::TAgentTaskRequest const&) in /root/wyx/doris/be/lib/doris_be
    17# std::_Function_handler<void (), 
doris::TaskWorkerPool::submit_task(doris::TAgentTaskRequest 
const&)::$_0::operator()<doris::TAgentTaskRequest 
const&>(doris::TAgentTaskRequest const&) 
const::{lambda()#1}>::_M_invoke(std::_Any_data const&) at 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291
    18# doris::ThreadPool::dispatch_thread() in /root/wyx/doris/be/lib/doris_be
    19# doris::Thread::supervise_thread(void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/util/thread.cpp:497
    20# 0x0000740BD7C9CAA4 in /lib/x86_64-linux-gnu/libc.so.6
    21# 0x0000740BD7D29C3C in /lib/x86_64-linux-gnu/libc.so.6
    ```
---
 be/src/olap/tablet_schema.cpp       |   7 +-
 be/src/olap/task/index_builder.cpp  |   3 +-
 be/test/olap/index_builder_test.cpp | 175 ++++++++++++++++++++++++++++++++++++
 3 files changed, 180 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 601ce807737..0af4a520a80 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -748,10 +748,9 @@ void TabletIndex::init_from_thrift(const TOlapTableIndex& 
index,
             col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
         } else {
             // if column unique id not found by column name, find by column 
unique id
-            // column unique id can not bigger than tablet schema column size, 
if bigger than column size means
-            // this column is a new column added by light schema change
-            if (index.__isset.column_unique_ids &&
-                index.column_unique_ids[i] < tablet_schema.num_columns()) {
+            // column unique id can not found means this column is a new 
column added by light schema change
+            if (index.__isset.column_unique_ids && 
!index.column_unique_ids.empty() &&
+                
tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
                 col_unique_ids[i] = index.column_unique_ids[i];
             } else {
                 col_unique_ids[i] = -1;
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 463a00179f7..cf85d7025d0 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -400,7 +400,8 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
                 auto column_name = inverted_index.columns[0];
                 auto column_idx = 
output_rowset_schema->field_index(column_name);
                 if (column_idx < 0) {
-                    if (!inverted_index.column_unique_ids.empty()) {
+                    if (inverted_index.__isset.column_unique_ids &&
+                        !inverted_index.column_unique_ids.empty()) {
                         column_idx = output_rowset_schema->field_index(
                                 inverted_index.column_unique_ids[0]);
                     }
diff --git a/be/test/olap/index_builder_test.cpp 
b/be/test/olap/index_builder_test.cpp
index c2483e95cad..b5ae890f19a 100644
--- a/be/test/olap/index_builder_test.cpp
+++ b/be/test/olap/index_builder_test.cpp
@@ -1077,6 +1077,181 @@ TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) {
     // but the file count verification above should be sufficient to confirm 
behavior
 }
 
+TEST_F(IndexBuilderTest, RenameColumnIndexTest) {
+    // 0. prepare tablet path
+    auto tablet_path = _absolute_dir + "/" + std::to_string(14679);
+    _tablet->_tablet_path = tablet_path;
+    
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+    auto schema = std::make_shared<TabletSchema>();
+
+    schema->_keys_type = KeysType::UNIQUE_KEYS;
+    schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2;
+
+    // Create the first key column
+    TabletColumn column_1;
+    column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    column_1.set_unique_id(1);
+    column_1.set_name("k1");
+    column_1.set_is_key(true);
+    schema->append_column(column_1);
+
+    // Create the second key column
+    TabletColumn column_2;
+    column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    // not sequential unique_id
+    column_2.set_unique_id(3);
+    column_2.set_name("k2");
+    column_2.set_is_key(false);
+    schema->append_column(column_2);
+
+    // 1. Prepare data for writing
+    RowsetSharedPtr rowset;
+    const int num_rows = 1000;
+
+    // 2. First add an initial index to the schema (for k1 column)
+    TabletIndex initial_index;
+    initial_index._index_id = 1;
+    initial_index._index_name = "k1_index";
+    initial_index._index_type = IndexType::INVERTED;
+    initial_index._col_unique_ids.push_back(1); // unique_id for k1
+    schema->append_index(std::move(initial_index));
+
+    // 3. Create a rowset writer context
+    RowsetWriterContext writer_context;
+    writer_context.rowset_id.init(15679);
+    writer_context.tablet_id = 15679;
+    writer_context.tablet_schema_hash = 567997577;
+    writer_context.partition_id = 10;
+    writer_context.rowset_type = BETA_ROWSET;
+    writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15679);
+    writer_context.rowset_state = VISIBLE;
+    writer_context.tablet_schema = schema;
+    writer_context.version.first = 10;
+    writer_context.version.second = 10;
+
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+    // 4. Create a rowset writer
+    auto res = RowsetFactory::create_rowset_writer(*_engine_ref, 
writer_context, false);
+    ASSERT_TRUE(res.has_value()) << res.error();
+    auto rowset_writer = std::move(res).value();
+
+    // 5. Write data to the rowset
+    {
+        vectorized::Block block = _tablet_schema->create_block();
+        auto columns = block.mutate_columns();
+
+        // Add data for k1 and k2 columns
+        for (int i = 0; i < num_rows; ++i) {
+            // k1 column (int)
+            int32_t k1 = i * 10;
+            columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+            // k2 column (int)
+            int32_t k2 = i % 100;
+            columns[1]->insert_data((const char*)&k2, sizeof(k2));
+        }
+
+        // Add the block to the rowset
+        Status s = rowset_writer->add_block(&block);
+        ASSERT_TRUE(s.ok()) << s.to_string();
+
+        // Flush the writer
+        s = rowset_writer->flush();
+        ASSERT_TRUE(s.ok()) << s.to_string();
+
+        // Build the rowset
+        ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+        // Add the rowset to the tablet
+        ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+    }
+
+    // 6. Prepare indexes for building - valid k2 and non-existent k3
+    _alter_indexes.clear();
+
+    // Index for rename column "k2" to "k3"
+    TOlapTableIndex index2;
+    index2.index_id = 3;
+    index2.columns.emplace_back("k3"); // This column doesn't exist in the 
schema
+    index2.index_name = "k3_index";
+    index2.index_type = TIndexType::INVERTED;
+    index2.column_unique_ids.push_back(3);
+    index2.__isset.column_unique_ids = true;
+    _alter_indexes.push_back(index2);
+
+    // 7. Create IndexBuilder
+    IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), 
_tablet, _columns,
+                         _alter_indexes, false);
+
+    // 8. Initialize and verify
+    auto status = builder.init();
+    EXPECT_TRUE(status.ok()) << status.to_string();
+    EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered 
for building
+
+    // 9. Build indexes - should only build for existing columns
+    status = builder.do_build_inverted_index();
+    EXPECT_TRUE(status.ok()) << status.to_string();
+
+    // 10. Check paths and files
+    auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679);
+    auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679);
+    bool old_exists = false;
+    bool new_exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, 
&old_exists).ok());
+    EXPECT_TRUE(old_exists);
+    EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, 
&new_exists).ok());
+    EXPECT_TRUE(new_exists);
+
+    // 11. Check files in old and new directories
+    std::vector<io::FileInfo> old_files;
+    bool old_dir_exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()
+                        ->list(old_tablet_path, true, &old_files, 
&old_dir_exists)
+                        .ok());
+    EXPECT_TRUE(old_dir_exists);
+    int old_idx_file_count = 0;
+    int old_dat_file_count = 0;
+    for (const auto& file : old_files) {
+        std::string filename = file.file_name;
+        if (filename.find(".idx") != std::string::npos) {
+            old_idx_file_count++;
+        }
+        if (filename.find(".dat") != std::string::npos) {
+            old_dat_file_count++;
+        }
+    }
+    EXPECT_EQ(old_idx_file_count, 1)
+            << "Old directory should contain exactly 1 .idx file for the 
original k1 index";
+    EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 
1 .dat file";
+
+    std::vector<io::FileInfo> new_files;
+    bool new_dir_exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()
+                        ->list(new_tablet_path, true, &new_files, 
&new_dir_exists)
+                        .ok());
+    EXPECT_TRUE(new_dir_exists);
+    int new_idx_file_count = 0;
+    int new_dat_file_count = 0;
+    for (const auto& file : new_files) {
+        std::string filename = file.file_name;
+        if (filename.find(".idx") != std::string::npos) {
+            new_idx_file_count++;
+        }
+        if (filename.find(".dat") != std::string::npos) {
+            new_dat_file_count++;
+        }
+    }
+    // Should have 2 index files: original k1 index and new k2 index (k3 
should be skipped)
+    EXPECT_EQ(new_idx_file_count, 1)
+            << "New directory should contain exactly 1 .idx files (for k1 and 
k2, not k3)";
+    EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 
1 .dat file";
+
+    // 12. Verify the tablet schema - would need to examine tablet_schema here
+    // k1 and k2 indexes should exist, k3 index should not
+    // Note: In production code, additional verification of schema would be 
done here
+}
 TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) {
     // 0. prepare tablet path
     auto tablet_path = _absolute_dir + "/" + std::to_string(14679);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to