This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 384165405a6 [fix](index build) Correct inverted index behavior after dynamically adding a column (#48389) 384165405a6 is described below commit 384165405a688546c220cf20cca605338285ff28 Author: airborne12 <jiang...@selectdb.com> AuthorDate: Mon Mar 3 15:28:07 2025 +0800 [fix](index build) Correct inverted index behavior after dynamically adding a column (#48389) Problem Summary: This pull request includes changes to improve the handling of inverted indexes and to add new test cases for index changes on new columns. The most important changes include fixing the placement of inverted index writers, adding new test cases for inverted index changes, and modifying the test output file to reflect the new test cases. --- be/src/olap/task/index_builder.cpp | 11 +- be/src/olap/task/index_builder.h | 18 +- be/test/olap/index_builder_test.cpp | 2422 ++++++++++++++++++++ .../test_index_change_on_new_column.out | Bin 245 -> 611 bytes .../test_index_change_on_new_column.groovy | 90 +- 5 files changed, 2526 insertions(+), 15 deletions(-) diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 85f76651046..792812200bb 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -448,14 +448,13 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } } + // DO NOT forget inverted_index_file_writer for the segment, otherwise, original inverted index will be deleted. + _inverted_index_file_writers.emplace(seg_ptr->id(), + std::move(inverted_index_file_writer)); if (return_columns.empty()) { // no columns to read - break; + continue; } - - _inverted_index_file_writers.emplace(seg_ptr->id(), - std::move(inverted_index_file_writer)); - // create iterator for each segment StorageReadOptions read_options; OlapReaderStatistics stats; @@ -840,10 +839,12 @@ Status IndexBuilder::modify_rowsets(const Merger::Statistics* stats) { RETURN_IF_ERROR(_tablet->modify_rowsets(_output_rowsets, _input_rowsets, true)); } +#ifndef BE_TEST { std::shared_lock rlock(_tablet->get_header_lock()); _tablet->save_meta(); } +#endif return Status::OK(); } diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h index 8c996bb400d..69ca5f39c4d 100644 --- a/be/src/olap/task/index_builder.h +++ b/be/src/olap/task/index_builder.h @@ -47,16 +47,16 @@ public: IndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, const std::vector<TColumn>& columns, const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, bool is_drop_op = false); - ~IndexBuilder(); + virtual ~IndexBuilder(); - Status init(); - Status do_build_inverted_index(); - Status update_inverted_index_info(); - Status handle_inverted_index_data(); - Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, - std::vector<segment_v2::SegmentSharedPtr>& segments); - Status modify_rowsets(const Merger::Statistics* stats = nullptr); - void gc_output_rowset(); + virtual Status init(); + virtual Status do_build_inverted_index(); + virtual Status update_inverted_index_info(); + virtual Status handle_inverted_index_data(); + virtual Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, + std::vector<segment_v2::SegmentSharedPtr>& segments); + virtual Status modify_rowsets(const Merger::Statistics* stats = nullptr); + virtual void gc_output_rowset(); private: Status _write_inverted_index_data(TabletSchemaSPtr tablet_schema, int32_t segment_idx, diff --git a/be/test/olap/index_builder_test.cpp b/be/test/olap/index_builder_test.cpp new file mode 100644 index 00000000000..c2483e95cad --- /dev/null +++ b/be/test/olap/index_builder_test.cpp @@ -0,0 +1,2422 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/index_builder.h" + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "olap/olap_common.h" +#include "olap/rowset/beta_rowset.h" +#include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_writer_context.h" +#include "olap/storage_engine.h" +#include "olap/tablet_fwd.h" +#include "olap/tablet_schema.h" + +namespace doris { +using namespace testing; + +class IndexBuilderTest : public ::testing::Test { +protected: + void SetUp() override { + char buffer[MAX_PATH_LEN]; + ASSERT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); + _current_dir = std::string(buffer); + _absolute_dir = _current_dir + "/" + std::string(dest_dir); + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok()); + + std::vector<StorePath> paths; + paths.emplace_back(config::storage_root_path, -1); + + // tmp dir + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok()); + paths.emplace_back(std::string(tmp_dir), 1024000000); + auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths); + EXPECT_TRUE(tmp_file_dirs->init().ok()); + ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs)); + + // use memory limit + int64_t inverted_index_cache_limit = 0; + _inverted_index_searcher_cache = std::unique_ptr<segment_v2::InvertedIndexSearcherCache>( + InvertedIndexSearcherCache::create_global_instance(inverted_index_cache_limit, + 256)); + + ExecEnv::GetInstance()->set_inverted_index_searcher_cache( + _inverted_index_searcher_cache.get()); + doris::EngineOptions options; + options.store_paths = paths; + + auto engine = std::make_unique<StorageEngine>(options); + _engine_ref = engine.get(); + _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir); + ASSERT_TRUE(_data_dir->update_capacity().ok()); + ExecEnv::GetInstance()->set_storage_engine(std::move(engine)); + + _tablet_meta = create_tablet_meta(); + + // Create tablet meta + // auto* tablet_schema = _tablet_meta->mutable_tablet_schema(); + _tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(_tablet_schema, KeysType::DUP_KEYS); + // Initialize tablet + _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + } + + void TearDown() override { + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); + ExecEnv::GetInstance()->set_storage_engine(nullptr); + _tablet.reset(); + } + + void create_tablet_schema(TabletSchemaSPtr tablet_schema, KeysType keystype, + int num_value_col = 1) { + // Set basic properties of TabletSchema directly + tablet_schema->_keys_type = keystype; + tablet_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; + + // Create the first key column + TabletColumn column_1; + column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT); + column_1.set_unique_id(1); + column_1.set_name("k1"); + column_1.set_is_key(true); + tablet_schema->append_column(column_1); + + // Create the second key column + TabletColumn column_2; + column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT); + column_2.set_unique_id(2); + column_2.set_name("k2"); + column_2.set_is_key(false); + tablet_schema->append_column(column_2); + } + + TabletMetaSharedPtr create_tablet_meta() { + TabletMetaPB tablet_meta_pb; + tablet_meta_pb.set_table_id(1); + tablet_meta_pb.set_tablet_id(15673); + tablet_meta_pb.set_schema_hash(567997577); + tablet_meta_pb.set_shard_id(0); + tablet_meta_pb.set_creation_time(1575351212); + + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + tablet_meta->init_from_pb(tablet_meta_pb); + return tablet_meta; + } + + // Helper to create rowset meta + void init_rs_meta(RowsetMetaSharedPtr& rs_meta, TabletSchemaSPtr tablet_schema, int64_t start, + int64_t end) { + RowsetMetaPB rowset_meta_pb; + rowset_meta_pb.set_rowset_id(540081); + rowset_meta_pb.set_tablet_id(15673); + rowset_meta_pb.set_tablet_schema_hash(567997577); + rowset_meta_pb.set_rowset_type(RowsetTypePB::BETA_ROWSET); + rowset_meta_pb.set_rowset_state(RowsetStatePB::VISIBLE); + rowset_meta_pb.set_start_version(start); + rowset_meta_pb.set_end_version(end); + rowset_meta_pb.set_num_rows(3929); + rowset_meta_pb.set_total_disk_size(84699); + rowset_meta_pb.set_data_disk_size(84464); + rowset_meta_pb.set_index_disk_size(235); + rowset_meta_pb.set_num_segments(2); + + rs_meta->init_from_pb(rowset_meta_pb); + rs_meta->set_tablet_schema(tablet_schema); + } + + StorageEngine* _engine_ref = nullptr; + TabletSharedPtr _tablet; + TabletMetaSharedPtr _tablet_meta; + TabletSchemaSPtr _tablet_schema; + std::vector<TColumn> _columns; + std::vector<doris::TOlapTableIndex> _alter_indexes; + std::unique_ptr<DataDir> _data_dir = nullptr; + std::string _current_dir; + std::string _absolute_dir; + std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache; + + constexpr static uint32_t MAX_PATH_LEN = 1024; + constexpr static std::string_view dest_dir = "./ut_dir/index_builder_test"; + constexpr static std::string_view tmp_dir = "./ut_dir/index_builder_test"; +}; + +TEST_F(IndexBuilderTest, BasicBuildTest) { + // 1. Prepare test data + TOlapTableIndex index; + index.index_id = 1; + index.columns.emplace_back("col1"); + _alter_indexes.push_back(index); + + // 2. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 3. Verify initialization + auto status = builder.init(); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(builder._alter_index_ids.size(), 1); +} + +TEST_F(IndexBuilderTest, DropIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(15676); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 3. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15676); + writer_context.tablet_id = 15676; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 4. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 5. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Verify index exists before dropping + EXPECT_TRUE(_tablet_schema->has_inverted_index()); + EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1)); + + // 7. Prepare index for dropping + TOlapTableIndex drop_index; + drop_index.index_id = 1; + drop_index.columns.emplace_back("k1"); + _alter_indexes.push_back(drop_index); + + // 8. Create IndexBuilder with drop operation + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, true); + + // 9. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 10. Execute drop operation + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 11. Verify the index has been removed + // check old tablet path and new tablet path + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); + EXPECT_TRUE(exists); + + // Check files in old and new directories + std::vector<io::FileInfo> files; + bool dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); + EXPECT_TRUE(dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : files) { + std::string filename = file.file_name; + if (filename.find("15676_0.idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find("15676_0.dat") != std::string::npos) { + old_dat_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.idx") != + std::string::npos) { + new_idx_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != + std::string::npos) { + new_dat_file_count++; + } + } + // The index should have been removed + EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; + EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; + EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; + EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; + + //auto tablet_schema = _tablet->tablet_schema(); + //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1)); +} + +TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14673); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15673); + writer_context.tablet_id = 15673; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15673); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 3. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns according to the schema + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare index for building + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + TOlapTableIndex index2; + index2.index_id = 2; + index2.columns.emplace_back("k2"); + index2.index_name = "k2_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 6. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 7. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 2); + + // 8. Build index + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // check old tablet path and new tablet path + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15673); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14673); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int idx_file_count = 0; + int dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + dat_file_count++; + } + } + EXPECT_EQ(idx_file_count, 0) << "Old directory should contain exactly 0 .idx file"; + EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 9. Verify the result (indexes should be built successfully) + //auto tablet_schema = _tablet->tablet_schema(); + //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(1)); + //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(2)); +} + +TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14675); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 3. Create rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15675); + writer_context.tablet_id = 15675; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15675); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 4. Create rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 5. Write data to rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add rowset to tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Prepare new index information (only add for k2 column) + TOlapTableIndex new_index; + new_index.index_id = 2; // New index ID is 2 + new_index.columns.emplace_back("k2"); + new_index.index_name = "k2_index"; + new_index.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(new_index); + + // 7. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 8. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built + + // 9. Build index + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // check old tablet path and new tablet path + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15675); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14675); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int idx_file_count = 0; + int dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + dat_file_count++; + } + } + EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; + EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 10. Verify results (both indexes should exist) + // Verify initial index (k1) still exists + //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1)); + // Verify newly added index (k2) is successfully built + //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(2)); +} + +TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { + // 1. Create new schema using V1 format + auto v1_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(v1_schema, KeysType::DUP_KEYS); + + // 2. Modify to V1 format + v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // 3. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + v1_schema->append_index(std::move(initial_index)); + + // 4. Update schema in tablet + TabletMetaPB tablet_meta_pb; + _tablet_meta->to_meta_pb(&tablet_meta_pb); + + TabletSchemaPB v1_schema_pb; + v1_schema->to_schema_pb(&v1_schema_pb); + tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); + + _tablet_meta->init_from_pb(tablet_meta_pb); + + // Reinitialize tablet to use new schema + _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + auto tablet_path = _absolute_dir + "/" + std::to_string(14674); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 5. Prepare data + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 6. Create rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15674); + writer_context.tablet_id = 15674; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15674); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = v1_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 7. Create rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 8. Write data to rowset + { + vectorized::Block block = v1_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add rowset to tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 9. Clear existing index list, prepare new index + _alter_indexes.clear(); + + // 10. Prepare new index information (only add for k2 column) + TOlapTableIndex new_index; + new_index.index_id = 2; // New index ID is 2 + new_index.columns.emplace_back("k2"); + new_index.index_name = "k2_index"; + new_index.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(new_index); + + // 11. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 12. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built + + // 13. Build index + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // check old tablet path and new tablet path + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15674); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14674); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int idx_file_count = 0; + int dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + dat_file_count++; + } + } + EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; + EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 2) << "New directory should contain exactly 2 .idx files"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + // 14. Verify results (both indexes should exist) + // Verify initial index (k1) still exists + //EXPECT_TRUE(v1_schema->has_inverted_index_with_index_id(1)); + // Verify newly added index (k2) is successfully built + //EXPECT_TRUE(_tablet->tablet_schema()->has_inverted_index_with_index_id(2)); + + // 15. Confirm storage format is still V1 + //EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); +} + +TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14677); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int rows_per_segment = 500; + const int num_segments = 3; + + // 2. Create a rowset writer context with segment size set to trigger multiple segments + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15677); + writer_context.tablet_id = 15677; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15677); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + // Set small segment size to ensure we create multiple segments + writer_context.max_rows_per_segment = rows_per_segment; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 3. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 4. Write data to the rowset in multiple batches to ensure we get multiple segments + for (int segment = 0; segment < num_segments; segment++) { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < rows_per_segment; ++i) { + // k1 column (int) - make values different across segments + int32_t k1 = (segment * rows_per_segment + i) * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = (segment * rows_per_segment + i) % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush to ensure we create a new segment + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + } + + // 5. Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Verify we have the expected number of segments + ASSERT_EQ(rowset->num_segments(), num_segments) + << "Rowset should have " << num_segments << " segments but has " + << rowset->num_segments(); + + // 6. Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + + // 7. Prepare indexes for building + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + TOlapTableIndex index2; + index2.index_id = 2; + index2.columns.emplace_back("k2"); + index2.index_name = "k2_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 8. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 9. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 2); + + // 10. Build indexes + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 11. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15677); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14677); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 12. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; + EXPECT_EQ(old_dat_file_count, num_segments) + << "Old directory should contain exactly " << num_segments << " .dat files"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, num_segments) + << "New directory should contain exactly " << num_segments << " .idx files"; + EXPECT_EQ(new_dat_file_count, num_segments) + << "New directory should contain exactly " << num_segments << " .dat files"; +} + +TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14678); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15678); + writer_context.tablet_id = 15678; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15678); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 3. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare indexes for building - including one for a non-existent column + _alter_indexes.clear(); + + // Index for non-existent column "k3" + TOlapTableIndex index2; + index2.index_id = 2; + index2.columns.emplace_back("k3"); // This column doesn't exist in the schema + index2.index_name = "k3_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 6. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 7. Initialize and verify + auto status = builder.init(); + // The init should succeed, as we'll skip non-existent columns later + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 8. Build indexes - should only build for existing columns + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 9. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15678); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14678); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 10. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; + EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + // Should only have index for k1, not for non-existent k3 + EXPECT_EQ(new_idx_file_count, 0) + << "New directory should contain exactly 0 .idx file for the existing column"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 11. Verify logs in the output to confirm k3 index was skipped + // This would require examining the log output which isn't easily done in unit tests, + // but the file count verification above should be sufficient to confirm behavior +} + +TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14679); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 3. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15679); + writer_context.tablet_id = 15679; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15679); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 4. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 5. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Prepare indexes for building - valid k2 and non-existent k3 + _alter_indexes.clear(); + + // Index for non-existent column "k3" + TOlapTableIndex index2; + index2.index_id = 3; + index2.columns.emplace_back("k3"); // This column doesn't exist in the schema + index2.index_name = "k3_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 7. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 8. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building + + // 9. Build indexes - should only build for existing columns + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 11. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 1) + << "Old directory should contain exactly 1 .idx file for the original k1 index"; + EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) + EXPECT_EQ(new_idx_file_count, 1) + << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 12. Verify the tablet schema - would need to examine tablet_schema here + // k1 and k2 indexes should exist, k3 index should not + // Note: In production code, additional verification of schema would be done here +} + +TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { + // 1. Create new schema using V1 format + auto v1_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(v1_schema, KeysType::DUP_KEYS); + + // 2. Modify to V1 format + v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // 3. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + v1_schema->append_index(std::move(initial_index)); + + // 4. Update schema in tablet + TabletMetaPB tablet_meta_pb; + _tablet_meta->to_meta_pb(&tablet_meta_pb); + + TabletSchemaPB v1_schema_pb; + v1_schema->to_schema_pb(&v1_schema_pb); + tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); + + _tablet_meta->init_from_pb(tablet_meta_pb); + + // 5. Reinitialize tablet to use new schema + _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + auto tablet_path = _absolute_dir + "/" + std::to_string(14680); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 6. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 7. Create rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15680); + writer_context.tablet_id = 15680; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15680); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = v1_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 8. Create rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 9. Write data to rowset + { + vectorized::Block block = v1_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add rowset to tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 10. Prepare indexes for building - valid k2 and non-existent k3 + _alter_indexes.clear(); + + // Index for non-existent column "k3" + TOlapTableIndex index2; + index2.index_id = 3; + index2.columns.emplace_back("k3"); // This column doesn't exist in the schema + index2.index_name = "k3_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // Add column information for the non-existent column + _columns.clear(); // Clear previous column info + TColumn non_existent_column; + non_existent_column.column_name = "k3"; + non_existent_column.column_type.type = TPrimitiveType::INT; + _columns.push_back(non_existent_column); + + // 11. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 12. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + // 13. Build indexes - should only build for existing columns + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 14. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15680); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14680); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 15. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 1) + << "Old directory should contain exactly 1 .idx file for the original k1 index"; + EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) + EXPECT_EQ(new_idx_file_count, 1) + << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 16. Confirm storage format is still V1 + EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); +} + +TEST_F(IndexBuilderTest, NonNullIndexDataTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14681); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15681); + writer_context.tablet_id = 15681; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15681); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 3. Create a rowset writer with non-null values + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 4. Write non-null data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns with no null values + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare indexes for building - only use non-nullable fields + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + // 6. Force columns to be treated as non-null by modifying schema + TabletSchemaSPtr non_null_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(non_null_schema, KeysType::DUP_KEYS); + // Set the second column to be non-nullable explicitly + TabletColumn& k2_column = non_null_schema->mutable_column(1); + k2_column.set_is_nullable(false); + + // 7. Create IndexBuilder with the modified schema + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 8. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 9. Build index - should trigger _add_data rather than _add_nullable + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Verify results + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15681); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14681); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 11. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 1) << "Should have created 1 index file"; +} + +TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14682); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15682); + writer_context.tablet_id = 15682; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15682); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 3. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 6. Prepare indexes for building - specifying column by unique_id that doesn't exist + _alter_indexes.clear(); + + // Use drop operation to test column_unique_ids path + TOlapTableIndex drop_index; + drop_index.index_id = 1; + drop_index.columns.emplace_back("non_existent_column"); + drop_index.column_unique_ids.push_back(999); // This unique ID doesn't exist + _alter_indexes.push_back(drop_index); + + // 7. Create IndexBuilder with drop operation + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, true); + + // 8. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 9. Execute drop operation - should handle non-existent column gracefully + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Verify paths exists - operations should complete without errors + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15682); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14682); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); +} + +TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { + // 1. Create new schema using V1 format + auto v1_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(v1_schema, KeysType::DUP_KEYS); + + // 2. Modify to V1 format + v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // 3. Add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + v1_schema->append_index(std::move(initial_index)); + + // 4. Update schema in tablet + TabletMetaPB tablet_meta_pb; + _tablet_meta->to_meta_pb(&tablet_meta_pb); + + TabletSchemaPB v1_schema_pb; + v1_schema->to_schema_pb(&v1_schema_pb); + tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); + + _tablet_meta->init_from_pb(tablet_meta_pb); + + // 5. Reinitialize tablet to use new schema + _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + auto tablet_path = _absolute_dir + "/" + std::to_string(15683); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 6. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 7. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15683); + writer_context.tablet_id = 15683; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = v1_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 8. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 9. Write data to the rowset + { + vectorized::Block block = v1_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 10. Prepare to drop the k1 index + _alter_indexes.clear(); + TOlapTableIndex drop_index; + drop_index.index_id = 1; + drop_index.columns.emplace_back("k1"); + drop_index.index_name = "k1_index"; + drop_index.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(drop_index); + + // 11. Create IndexBuilder with drop operation + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, true); + + // 12. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 13. Execute drop operation + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 14. Verify paths exists + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); + EXPECT_TRUE(exists); + + // 15. Verify the index has been removed + std::vector<io::FileInfo> files; + bool dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); + EXPECT_TRUE(dir_exists); + + // Verify no index files in the new directory + int new_idx_file_count = 0; + int new_dat_file_count = 0; + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : files) { + std::string filename = file.file_name; + if (filename.find("15683_0_1.idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find("15683_0.dat") != std::string::npos) { + old_dat_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0_1.idx") != + std::string::npos) { + new_idx_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != + std::string::npos) { + new_dat_file_count++; + } + } + // The index should have been removed + EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; + EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; + EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; + EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; +} + +TEST_F(IndexBuilderTest, ResourceCleanupTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(15684); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15684); + writer_context.tablet_id = 15684; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 3. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare indexes for building + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + // Create a custom IndexBuilder with a spy function to test resource cleanup + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, + const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op) + : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op) {} + + ~TestIndexBuilder() override = default; + // Override update_inverted_index_info to inject failure + Status update_inverted_index_info() override { + RETURN_IF_ERROR(IndexBuilder::update_inverted_index_info()); + // Create a fake error to trigger cleanup + return Status::Error<ErrorCode::INTERNAL_ERROR>("Simulated error for testing cleanup"); + } + }; + + // 6. Create our test builder + TestIndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 7. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 8. Build index - should fail with our simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure, but got success"; + EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) << "Expected internal error"; + EXPECT_EQ(status.to_string(), "[INTERNAL_ERROR]Simulated error for testing cleanup") + << "Error message doesn't match expected"; + + // Verify the paths haven't been modified since the operation failed + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); + EXPECT_TRUE(exists); + + auto rowset_id = extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"); + EXPECT_TRUE(_engine_ref->check_rowset_id_in_unused_rowsets(rowset_id)) + << "Rowset id should be in unused rowsets"; +} + +TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { + // 1. Prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14685); + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 2. Create tablet schema with array type + auto tablet_schema = std::make_shared<TabletSchema>(); + TabletColumn column_1(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_INT, true); + column_1.set_unique_id(1); + column_1.set_is_key(true); + column_1.set_name("k1"); + tablet_schema->append_column(column_1); + + // Array type column + TabletColumn column_2; + column_2.set_unique_id(2); + column_2.set_is_key(false); + column_2.set_name("array_col"); + column_2.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY); + column_2.set_is_nullable(false); + // Add a primitive type for array items + TabletColumn array_item_column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_VARCHAR, true); + array_item_column.set_is_key(false); + array_item_column.set_length(64); + column_2.add_sub_column(array_item_column); + tablet_schema->append_column(column_2); + + // 3. Create tablet + auto tablet_meta = create_tablet_meta(); + auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, _data_dir.get()); + tablet->_tablet_path = tablet_path; + ASSERT_TRUE(tablet->init().ok()); + + // 4. Add inverted index for array column + _columns.clear(); + TColumn tc1; + tc1.column_name = "array_col"; + _columns.push_back(tc1); + + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "array_index"; + tt_index.columns.emplace_back("array_col"); + tt_index.column_unique_ids.push_back(2); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + + // 5. Create a rowset writer + RowsetWriterContext writer_context; + writer_context.rowset_id.init(14685); + writer_context.tablet_id = 14685; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 6. Create rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 7. Create data block and write data + { + vectorized::Block block = tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Prepare columns for k1 and array_col + for (int i = 0; i < 1000; i++) { + // k1 column (int) + int32_t k1 = i; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // array_col column + // Create array data with 1-5 elements + int array_size = i % 5 + 1; + + // For array type, we need to create a complex nested column structure + auto& array_col = static_cast<vectorized::ColumnArray&>(*columns[1]); + vectorized::Array arr; + // Add string elements to the array + for (int j = 0; j < array_size; j++) { + std::string val = "item_" + std::to_string(i) + "_" + std::to_string(j); + arr.push_back(vectorized::Field(val)); + } + array_col.insert(arr); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + } + + // 8. Build rowset + RowsetSharedPtr rowset; + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + ASSERT_TRUE(rowset != nullptr); + ASSERT_TRUE(tablet->add_rowset(rowset).ok()); + + // 9. Initialize and build inverted index + IndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Verify that the index has been created + std::string segment_path = local_segment_path( + tablet->tablet_path(), + extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), + 0); + + if (tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { + // V1 format + auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), 1, ""); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } else { + // V2+ format + auto index_path = InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } +} + +TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14688); + _tablet->_tablet_path = tablet_path; + _tablet->_tablet_meta->_schema = _tablet_schema; + _tablet->_tablet_meta->_schema->_keys_type = KeysType::UNIQUE_KEYS; + _tablet->_tablet_meta->_enable_unique_key_merge_on_write = true; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int rows_per_segment = 500; + + // 2. Create a rowset writer context with segment size set to trigger multiple segments + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15677); + writer_context.tablet_id = 15677; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + // Set small segment size to ensure we create multiple segments + writer_context.max_rows_per_segment = rows_per_segment; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < 1000; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Create test class that overrides methods to simulate unique key table behavior + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, + const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op) + : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op) {} + + ~TestIndexBuilder() override = default; + + // Override to make sure modify_rowsets with UNIQUE_KEYS path is called + Status modify_rowsets(const Merger::Statistics* stats = nullptr) override { + // Call parent method which should use the UNIQUE_KEYS path + return IndexBuilder::modify_rowsets(stats); + } + }; + + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "k1_index"; + tt_index.columns.emplace_back("k1"); + tt_index.column_unique_ids.push_back(1); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + // 7. Initialize and build inverted index + TestIndexBuilder builder(*_engine_ref, _tablet, _columns, _alter_indexes, false); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 8. Execute build index, which should go through UNIQUE_KEYS path in modify_rowsets + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 9. Verify that the index was created successfully + std::string segment_path = local_segment_path( + _tablet->tablet_path(), + extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), + 0); + + if (_tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { + auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), 1, ""); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } else { + auto index_path = InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } +} + +TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { + // 1. Create a test class that overrides handle_single_rowset to simulate error scenarios + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, + const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op, bool simulate_non_local_rowset_error = false) + : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op), + _simulate_non_local_rowset_error(simulate_non_local_rowset_error) {} + + ~TestIndexBuilder() override = default; + + // Override to simulate error conditions + Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, + std::vector<segment_v2::SegmentSharedPtr>& segments) override { + if (_simulate_non_local_rowset_error) { + // Simulate the condition where is_local_rowset is false + return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", + 123, "test_rowset_id"); + } + + // Call parent method for normal processing + return IndexBuilder::handle_single_rowset(output_rowset_meta, segments); + } + + private: + bool _simulate_non_local_rowset_error; + }; + + // 2. Prepare tablet path + std::string tablet_path = _absolute_dir + "/" + std::to_string(14687); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 3. Set up tablet schema and tablet + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); + + auto tablet_meta = create_tablet_meta(); + tablet_meta->_schema = tablet_schema; + auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, _data_dir.get()); + tablet->_tablet_path = tablet_path; + ASSERT_TRUE(tablet->init().ok()); + + // 4. Create inverted index definition + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "k1_index"; + tt_index.columns.emplace_back("k1"); + tt_index.column_unique_ids.push_back(1); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + + // 5. Create a rowset + RowsetWriterContext writer_context; + writer_context.rowset_id = _engine_ref->next_rowset_id(); + writer_context.tablet_id = 14687; + writer_context.tablet_path = tablet_path; + writer_context.tablet_schema_hash = 1111; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.segments_overlap = NONOVERLAPPING; + writer_context.tablet_schema = tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + auto result = tablet->create_rowset_writer(writer_context, false); + EXPECT_TRUE(result.has_value()) << result.error(); + auto rowset_writer = std::move(result).value(); + + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < 1000; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + RowsetSharedPtr rowset; + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(tablet->add_rowset(rowset).ok()); + } + + // 6. Test error scenario with non-local rowset + TestIndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false, true); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // Execute build_index, which should fail due to simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure but got success"; + EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) + << "Expected internal error but got: " << status.to_string(); + EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) + << "Error message doesn't match expected: " << status.to_string(); +} + +TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { + // 1. Create a test class that overrides update_inverted_index_info to simulate error scenarios + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, + const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op, int error_type = 0) + : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op), + _error_type(error_type) {} + + ~TestIndexBuilder() override = default; + + // Override update_inverted_index_info to inject errors + Status update_inverted_index_info() override { + if (_error_type == 1) { + // Simulate non-local rowset error in update_inverted_index_info + return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", + 123, "test_rowset_id"); + } else if (_error_type == 2) { + // Simulate size retrieval error + return Status::Error<ErrorCode::INIT_FAILED>("debug point: get fs failed"); + } + + // Call parent method for normal processing + return IndexBuilder::update_inverted_index_info(); + } + + private: + int _error_type; // 0: no error, 1: non-local rowset error, 2: size retrieval error + }; + + // 2. Prepare tablet path + std::string tablet_path = _absolute_dir + "/" + std::to_string(14688); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 3. Set up tablet schema and tablet + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); + + auto tablet_meta = create_tablet_meta(); + tablet_meta->_schema = tablet_schema; + auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, _data_dir.get()); + tablet->_tablet_path = tablet_path; + ASSERT_TRUE(tablet->init().ok()); + + // 4. Create inverted index definition + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "k1_index"; + tt_index.columns.emplace_back("k1"); + tt_index.column_unique_ids.push_back(1); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + + // 5. Create a rowset + RowsetWriterContext writer_context; + writer_context.rowset_id = _engine_ref->next_rowset_id(); + writer_context.tablet_id = 14688; + writer_context.tablet_path = tablet_path; + writer_context.tablet_schema_hash = 1111; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.segments_overlap = NONOVERLAPPING; + writer_context.tablet_schema = tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + auto result = tablet->create_rowset_writer(writer_context, false); + EXPECT_TRUE(result.has_value()) << result.error(); + auto rowset_writer = std::move(result).value(); + + // Write data + { + vectorized::Block block = tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < 1000; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + RowsetSharedPtr rowset; + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(tablet->add_rowset(rowset).ok()); + } + // 6. Test error scenarios + + // 6.1 Test non-local rowset error + { + TestIndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false, 1); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // Execute build_index, which should fail due to simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure but got success"; + EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) + << "Expected internal error but got: " << status.to_string(); + EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) + << "Error message doesn't match expected: " << status.to_string(); + } + + // 6.2 Test size retrieval error + { + TestIndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false, 2); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // Execute build_index, which should fail due to simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure but got success"; + EXPECT_TRUE(status.is<ErrorCode::INIT_FAILED>()) + << "Expected INIT_FAILED but got: " << status.to_string(); + EXPECT_TRUE(status.to_string().find("debug point: get fs failed") != std::string::npos) + << "Error message doesn't match expected: " << status.to_string(); + } +} + +} // namespace doris diff --git a/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out b/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out index 78a797ead64..d538508a204 100644 Binary files a/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out and b/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out differ diff --git a/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy b/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy index c6b3ec8df2d..170a7e0afaf 100644 --- a/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy +++ b/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy @@ -99,5 +99,93 @@ suite("test_index_change_on_new_column") { assertEquals(show_result[0][2], "idx_s1") qt_select2 """ SELECT * FROM ${tableName} order by id; """ - qt_select3 """ SELECT * FROM ${tableName} where s1 match 'welcome'; """ + qt_select3 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s1 match 'welcome'; """ + + tableName = "test_index_change_on_new_column1" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `id` INT COMMENT "", + `s` STRING COMMENT "", + INDEX idx_s(s) USING INVERTED + ) + DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + PROPERTIES ( "replication_num" = "1" ); + """ + + sql """ INSERT INTO ${tableName} VALUES + (1, 'hello world') + """ + + // add new column + sql """ alter table ${tableName} add column s1 varchar(50) default null after s; """ + + qt_select1 """ SELECT * FROM ${tableName}; """ + + // create inverted index on new column + sql """ alter table ${tableName} add index idx_s1(s1) USING INVERTED PROPERTIES('parser' = 'english')""" + wait_for_latest_op_on_table_finish(tableName, timeout) + + sql """ INSERT INTO ${tableName} VALUES + (2, 'hello wold', 'welcome to the world') + """ + // build inverted index on new column + if (!isCloudMode()) { + sql """ build index idx_s1 on ${tableName} """ + wait_for_build_index_on_partition_finish(tableName, timeout) + } + + show_result = sql "show index from ${tableName}" + logger.info("show index from " + tableName + " result: " + show_result) + assertEquals(show_result.size(), 2) + assertEquals(show_result[0][2], "idx_s") + assertEquals(show_result[1][2], "idx_s1") + qt_select2 """ SELECT * FROM ${tableName} order by id; """ + qt_select3 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s1 match 'welcome'; """ + qt_select4 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s match 'hello world'; """ + + tableName = "test_index_change_on_new_column1_index_v1" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `id` INT COMMENT "", + `s` STRING COMMENT "", + INDEX idx_s(s) USING INVERTED + ) + DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + PROPERTIES ( "inverted_index_storage_format" = "v1", "replication_num" = "1" ); + """ + + sql """ INSERT INTO ${tableName} VALUES + (1, 'hello world') + """ + + // add new column + sql """ alter table ${tableName} add column s1 varchar(50) default null after s; """ + + qt_select1 """ SELECT * FROM ${tableName}; """ + + // create inverted index on new column + sql """ alter table ${tableName} add index idx_s1(s1) USING INVERTED PROPERTIES('parser' = 'english')""" + wait_for_latest_op_on_table_finish(tableName, timeout) + + sql """ INSERT INTO ${tableName} VALUES + (2, 'hello wold', 'welcome to the world') + """ + // build inverted index on new column + if (!isCloudMode()) { + sql """ build index idx_s1 on ${tableName} """ + wait_for_build_index_on_partition_finish(tableName, timeout) + } + + show_result = sql "show index from ${tableName}" + logger.info("show index from " + tableName + " result: " + show_result) + assertEquals(show_result.size(), 2) + assertEquals(show_result[0][2], "idx_s") + assertEquals(show_result[1][2], "idx_s1") + qt_select2 """ SELECT * FROM ${tableName} order by id; """ + qt_select3 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s1 match 'welcome'; """ + qt_select4 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s match 'hello world'; """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org