This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ded28ab112e [test](inverted index) add ut tests for inverted index 
file reader (#52111)
ded28ab112e is described below

commit ded28ab112e1f4a551e3109b50327a99d4d37a10
Author: airborne12 <jiang...@selectdb.com>
AuthorDate: Mon Jun 23 12:14:53 2025 +0800

    [test](inverted index) add ut tests for inverted index file reader (#52111)
    
    add ut tests for inverted index file reader
---
 .../segment_v2/inverted_index_file_reader_test.cpp | 442 +++++++++++++++++++++
 1 file changed, 442 insertions(+)

diff --git a/be/test/olap/rowset/segment_v2/inverted_index_file_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_file_reader_test.cpp
new file mode 100644
index 00000000000..5a379c2cea8
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/inverted_index_file_reader_test.cpp
@@ -0,0 +1,442 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index_file_reader.h"
+
+#include <CLucene.h>
+#include <gtest/gtest.h>
+#include <unistd.h>
+
+#include <filesystem>
+#include <memory>
+#include <string>
+
+#include "io/fs/local_file_system.h"
+#include "olap/data_dir.h"
+#include "olap/options.h"
+#include "olap/rowset/segment_v2/inverted_index_cache.h"
+#include "olap/rowset/segment_v2/inverted_index_desc.h"
+#include "olap/rowset/segment_v2/inverted_index_file_writer.h"
+#include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet_schema.h"
+#include "runtime/exec_env.h"
+
+namespace doris::segment_v2 {
+
+class MockTabletIndex : public TabletIndex {
+public:
+    MockTabletIndex(int64_t index_id, std::string index_suffix) {
+        _index_id = index_id;
+        _escaped_index_suffix_path = std::move(index_suffix);
+        _index_type = IndexType::INVERTED;
+        _index_name = "test_index";
+        _col_unique_ids.push_back(index_id); // Add some dummy column uid
+    }
+
+    int64_t index_id() const { return _index_id; }
+    const std::string& get_index_suffix() const { return 
_escaped_index_suffix_path; }
+};
+
+class InvertedIndexFileReaderTest : public testing::Test {
+public:
+    const std::string kTestDir = "./ut_dir/inverted_index_file_reader_test";
+    constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
+
+    void SetUp() override {
+        char buffer[MAX_PATH_LEN];
+        ASSERT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+        std::string current_dir = std::string(buffer);
+        std::string absolute_dir = current_dir + "/" + kTestDir;
+
+        auto st = 
io::global_local_filesystem()->delete_directory(absolute_dir);
+        ASSERT_TRUE(st.ok()) << st;
+        st = io::global_local_filesystem()->create_directory(absolute_dir);
+        ASSERT_TRUE(st.ok()) << st;
+
+        // Initialize tmp dir for ExecEnv
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+        
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
+        std::vector<StorePath> paths;
+        paths.emplace_back(std::string(tmp_dir), -1);
+        auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
+        EXPECT_TRUE(tmp_file_dirs->init().ok());
+        ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
+
+        // Initialize InvertedIndexSearcherCache
+        int64_t inverted_index_cache_limit = 1024 * 1024 * 1024;
+        _inverted_index_searcher_cache = 
std::unique_ptr<segment_v2::InvertedIndexSearcherCache>(
+                
InvertedIndexSearcherCache::create_global_instance(inverted_index_cache_limit, 
1));
+        ExecEnv::GetInstance()->set_inverted_index_searcher_cache(
+                _inverted_index_searcher_cache.get());
+
+        // Initialize StorageEngine and DataDir
+        doris::EngineOptions options;
+        auto engine = std::make_unique<StorageEngine>(options);
+        _engine_ref = engine.get();
+        _data_dir = std::make_unique<DataDir>(*_engine_ref, absolute_dir);
+        ASSERT_TRUE(_data_dir->update_capacity().ok());
+        std::unique_ptr<BaseStorageEngine> base_engine(engine.release());
+        ExecEnv::GetInstance()->set_storage_engine(std::move(base_engine));
+    }
+
+    void TearDown() override {
+        auto st = io::global_local_filesystem()->delete_directory(kTestDir);
+        ASSERT_TRUE(st.ok()) << st;
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+        ExecEnv::GetInstance()->set_storage_engine(nullptr);
+    }
+
+    void create_empty_index_file(const std::string& file_path) {
+        std::filesystem::path parent_path = 
std::filesystem::path(file_path).parent_path();
+        if (!std::filesystem::exists(parent_path)) {
+            std::filesystem::create_directories(parent_path);
+        }
+
+        DorisFSDirectory* dir_ptr = DorisFSDirectoryFactory::getDirectory(
+                io::global_local_filesystem(), parent_path.string().c_str());
+        ASSERT_NE(dir_ptr, nullptr);
+        std::unique_ptr<lucene::store::Directory, DirectoryDeleter> 
directory(dir_ptr);
+
+        lucene::store::IndexOutput* output = directory->createOutput(
+                std::filesystem::path(file_path).filename().string().c_str());
+        ASSERT_NE(output, nullptr);
+        std::unique_ptr<lucene::store::IndexOutput> output_guard(output);
+
+        output_guard->close();
+        directory->close();
+    }
+
+    void create_invalid_version_file(const std::string& file_path, int32_t 
invalid_version) {
+        std::filesystem::path parent_path = 
std::filesystem::path(file_path).parent_path();
+        if (!std::filesystem::exists(parent_path)) {
+            std::filesystem::create_directories(parent_path);
+        }
+
+        DorisFSDirectory* dir_ptr = DorisFSDirectoryFactory::getDirectory(
+                io::global_local_filesystem(), parent_path.string().c_str());
+        ASSERT_NE(dir_ptr, nullptr);
+        std::unique_ptr<lucene::store::Directory, DirectoryDeleter> 
directory(dir_ptr);
+
+        lucene::store::IndexOutput* output = directory->createOutput(
+                std::filesystem::path(file_path).filename().string().c_str());
+        ASSERT_NE(output, nullptr);
+        std::unique_ptr<lucene::store::IndexOutput> output_guard(output);
+
+        output->writeInt(invalid_version);
+        output_guard->close();
+        directory->close();
+    }
+
+    void create_v2_file_with_null_bitmap(const std::string& file_path, int64_t 
index_id,
+                                         const std::string& index_suffix, bool 
has_null_bitmap) {
+        std::filesystem::path parent_path = 
std::filesystem::path(file_path).parent_path();
+        if (!std::filesystem::exists(parent_path)) {
+            std::filesystem::create_directories(parent_path);
+        }
+
+        // Create a mock TabletIndex
+        MockTabletIndex tablet_index(index_id, index_suffix);
+
+        // Get the index path prefix from the file path
+        std::string index_path_prefix = file_path;
+        size_t idx_pos = index_path_prefix.find(".idx");
+        if (idx_pos != std::string::npos) {
+            index_path_prefix = index_path_prefix.substr(0, idx_pos);
+        }
+
+        // Create file writer for V2 format
+        io::FileWriterPtr file_writer;
+        io::FileWriterOptions opts;
+        Status st = io::global_local_filesystem()->create_file(file_path, 
&file_writer, &opts);
+        ASSERT_TRUE(st.ok()) << st.msg();
+
+        // Create InvertedIndexFileWriter
+        auto writer = std::make_unique<InvertedIndexFileWriter>(
+                io::global_local_filesystem(), index_path_prefix, 
"test_rowset", 0,
+                InvertedIndexStorageFormatPB::V2, std::move(file_writer));
+
+        // Open the writer and get directory
+        auto open_result = writer->open(&tablet_index);
+        ASSERT_TRUE(open_result.has_value()) << open_result.error().msg();
+        auto dir = open_result.value();
+
+        // Create test index files
+        auto test_output =
+                
std::unique_ptr<lucene::store::IndexOutput>(dir->createOutput("test_index_file"));
+        test_output->writeString("test_data");
+        test_output->close();
+
+        // Create null bitmap file if needed
+        if (has_null_bitmap) {
+            std::string null_bitmap_file =
+                    
std::string(InvertedIndexDescriptor::get_temporary_null_bitmap_file_name());
+            auto null_output = std::unique_ptr<lucene::store::IndexOutput>(
+                    dir->createOutput(null_bitmap_file.c_str()));
+            // Write enough data to indicate has null (> 5 bytes)
+            null_output->writeBytes(reinterpret_cast<const 
uint8_t*>("null_bitmap_data"), 16);
+            null_output->close();
+        } else {
+            // Create a small null bitmap file to indicate no null
+            std::string null_bitmap_file =
+                    
std::string(InvertedIndexDescriptor::get_temporary_null_bitmap_file_name());
+            auto null_output = std::unique_ptr<lucene::store::IndexOutput>(
+                    dir->createOutput(null_bitmap_file.c_str()));
+            // Write small data to indicate no null (<= 5 bytes)
+            null_output->writeBytes(reinterpret_cast<const uint8_t*>("123"), 
3);
+            null_output->close();
+        }
+
+        dir->close();
+
+        // Write and close the file - only call close(), not write()
+        st = writer->close();
+        ASSERT_TRUE(st.ok()) << st.msg();
+    }
+
+    void create_v2_file_with_small_null_bitmap(const std::string& file_path, 
int64_t index_id,
+                                               const std::string& 
index_suffix) {
+        create_v2_file_with_null_bitmap(file_path, index_id, index_suffix, 
false);
+    }
+
+private:
+    StorageEngine* _engine_ref = nullptr;
+    std::unique_ptr<DataDir> _data_dir = nullptr;
+    std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache;
+    constexpr static uint32_t MAX_PATH_LEN = 1024;
+};
+
+// Test case for CL_ERR_EmptyIndexSegment error handling
+TEST_F(InvertedIndexFileReaderTest, TestEmptyIndexSegmentError) {
+    std::string index_path = kTestDir + "/empty_index_file";
+    create_empty_index_file(index_path + ".idx");
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    Status status = reader.init(4096);
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_BYPASS);
+}
+
+// Test case for unknown inverted index format error
+TEST_F(InvertedIndexFileReaderTest, TestUnknownIndexFormatError) {
+    std::string index_path = kTestDir + "/invalid_version_index_file";
+    create_invalid_version_file(index_path + ".idx",
+                                1); // Version < V2 to trigger unknown format 
error
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    Status status = reader.init(4096);
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
+    // The error message could be either "unknown inverted index format" or 
CLucene error
+    // since reading invalid data may trigger CLucene exceptions
+    EXPECT_TRUE(status.msg().find("unknown inverted index format") != 
std::string::npos ||
+                status.msg().find("CLuceneError") != std::string::npos);
+}
+
+// Test case for V1 format file not found error
+TEST_F(InvertedIndexFileReaderTest, TestV1FileNotFoundError) {
+    std::string index_path = kTestDir + "/non_existent";
+
+    InvertedIndexFileInfo file_info;
+    auto* index_info = file_info.add_index_info();
+    index_info->set_index_id(1);
+    index_info->set_index_suffix("test");
+    index_info->set_index_file_size(100);
+
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V1, 
file_info);
+
+    MockTabletIndex tablet_index(1, "test");
+    auto result = reader.open(&tablet_index);
+    EXPECT_FALSE(result.has_value());
+    EXPECT_EQ(result.error().code(), ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND);
+}
+
+// Test case for V2 format stream is nullptr error
+TEST_F(InvertedIndexFileReaderTest, TestV2StreamNullptrError) {
+    std::string index_path = kTestDir + "/null_stream";
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    MockTabletIndex tablet_index(1, "test");
+    auto result = reader.open(&tablet_index);
+    EXPECT_FALSE(result.has_value());
+    EXPECT_EQ(result.error().code(), ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND);
+    EXPECT_TRUE(result.error().msg().find("stream is nullptr") != 
std::string::npos);
+}
+
+// Test case for index not found in V2 format
+TEST_F(InvertedIndexFileReaderTest, TestV2IndexNotFoundError) {
+    std::string index_path = kTestDir + "/v2_no_index";
+    create_v2_file_with_null_bitmap(index_path + ".idx", 100, "other", false);
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    Status status = reader.init(4096);
+    EXPECT_TRUE(status.ok());
+
+    MockTabletIndex tablet_index(1, "test");
+    auto result = reader.open(&tablet_index);
+    EXPECT_FALSE(result.has_value());
+    EXPECT_EQ(result.error().code(), ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND);
+    EXPECT_TRUE(result.error().msg().find("No index with id") != 
std::string::npos);
+}
+
+// Test case for index_file_exist method error handling
+TEST_F(InvertedIndexFileReaderTest, TestIndexFileExistV2StreamNullptr) {
+    std::string index_path = kTestDir + "/exist_test";
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    MockTabletIndex tablet_index(1, "test");
+    bool res = true;
+    Status status = reader.index_file_exist(&tablet_index, &res);
+    EXPECT_FALSE(status.ok());
+    EXPECT_FALSE(res);
+    EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND);
+}
+
+// Test case for has_null method with V1 format
+TEST_F(InvertedIndexFileReaderTest, TestHasNullV1Format) {
+    std::string index_path = kTestDir + "/has_null_v1";
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V1, 
file_info);
+
+    MockTabletIndex tablet_index(1, "test");
+    bool res = false;
+    Status status = reader.has_null(&tablet_index, &res);
+    EXPECT_TRUE(status.ok());
+    EXPECT_TRUE(res); // V1 always returns true
+}
+
+// Test case for has_null method with V2 format and null bitmap
+TEST_F(InvertedIndexFileReaderTest, TestHasNullV2WithNullBitmap) {
+    std::string index_path = kTestDir + "/has_null_v2";
+    create_v2_file_with_null_bitmap(index_path + ".idx", 1, "test", true);
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    Status init_status = reader.init(4096);
+    EXPECT_TRUE(init_status.ok());
+
+    MockTabletIndex tablet_index(1, "test");
+    bool res = false;
+    Status status = reader.has_null(&tablet_index, &res);
+    EXPECT_TRUE(status.ok());
+    EXPECT_TRUE(res); // Should have null bitmap
+}
+
+// Test case for has_null method with V2 format and small null bitmap
+TEST_F(InvertedIndexFileReaderTest, TestHasNullV2WithSmallNullBitmap) {
+    std::string index_path = kTestDir + "/has_null_v2_small";
+    create_v2_file_with_small_null_bitmap(index_path + ".idx", 1, "test");
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    Status init_status = reader.init(4096);
+    EXPECT_TRUE(init_status.ok());
+
+    MockTabletIndex tablet_index(1, "test");
+    bool res = true;
+    Status status = reader.has_null(&tablet_index, &res);
+    EXPECT_TRUE(status.ok());
+    EXPECT_FALSE(res); // Small bitmap should return false
+}
+
+// Test case for has_null method with V2 format stream nullptr
+TEST_F(InvertedIndexFileReaderTest, TestHasNullV2StreamNullptr) {
+    std::string index_path = kTestDir + "/has_null_stream_null";
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    MockTabletIndex tablet_index(1, "test");
+    bool res = true;
+    Status status = reader.has_null(&tablet_index, &res);
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND);
+}
+
+// Test case for has_null method with index not found
+TEST_F(InvertedIndexFileReaderTest, TestHasNullV2IndexNotFound) {
+    std::string index_path = kTestDir + "/has_null_not_found";
+    create_v2_file_with_null_bitmap(index_path + ".idx", 100, "other", false);
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    Status init_status = reader.init(4096);
+    EXPECT_TRUE(init_status.ok());
+
+    MockTabletIndex tablet_index(1, "test");
+    bool res = true;
+    Status status = reader.has_null(&tablet_index, &res);
+    EXPECT_TRUE(status.ok());
+    EXPECT_FALSE(res); // Index not found should return false
+}
+
+// Test case for debug_file_entries method
+TEST_F(InvertedIndexFileReaderTest, TestDebugFileEntries) {
+    std::string index_path = kTestDir + "/debug_entries";
+    create_v2_file_with_null_bitmap(index_path + ".idx", 1, "test", true);
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    Status init_status = reader.init(4096);
+    EXPECT_TRUE(init_status.ok());
+
+    // This should not crash and should log file entries
+    reader.debug_file_entries();
+}
+
+// Test case for get_all_directories error handling
+TEST_F(InvertedIndexFileReaderTest, TestGetAllDirectoriesError) {
+    std::string index_path = kTestDir + "/get_all_dirs";
+
+    InvertedIndexFileInfo file_info;
+    InvertedIndexFileReader reader(io::global_local_filesystem(), index_path,
+                                   InvertedIndexStorageFormatPB::V2, 
file_info);
+
+    // Don't initialize to cause empty result in get_all_directories
+    auto result = reader.get_all_directories();
+    EXPECT_TRUE(result.has_value());
+    EXPECT_TRUE(result.value().empty()); // Should be empty since no init was 
called
+}
+
+} // namespace doris::segment_v2
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to