This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch vector-index-dev
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/vector-index-dev by this push:
     new d6048f27a55 [fix] fix unstable test case (#51159)
d6048f27a55 is described below

commit d6048f27a556b3b5a2a164a679fed2bc7ee4536f
Author: zhiqiang <hezhiqi...@selectdb.com>
AuthorDate: Thu May 22 17:44:35 2025 +0800

    [fix] fix unstable test case (#51159)
    
    Search result of diff faiss index object is not same if we use batch
    insert mode.
    So i refactored the test case to make sure we can compare result of
    native faiss and vector index of doris
---
 be/src/olap/rowset/segment_v2/ann_index_reader.cpp |   4 +-
 be/src/vec/exprs/vann_topn_predicate.cpp           |   5 +-
 .../olap/vector_search/ann_index_reader_test.cpp   |   2 +-
 .../olap/vector_search/ann_index_smoke_test.cpp    |   2 +-
 .../olap/vector_search/ann_range_search_test.cpp   |  14 +-
 .../vector_search/ann_topn_descriptor_test.cpp     |  42 +-
 .../olap/vector_search/faiss_vector_index_test.cpp | 540 +++++++++------------
 be/test/olap/vector_search/native_faiss_test.cpp   |  53 +-
 be/test/olap/vector_search/vector_search_utils.cpp | 249 ++++++++++
 be/test/olap/vector_search/vector_search_utils.h   |  74 ++-
 10 files changed, 653 insertions(+), 332 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/ann_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/ann_index_reader.cpp
index b1fcdee341f..0222597ea32 100644
--- a/be/src/olap/rowset/segment_v2/ann_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/ann_index_reader.cpp
@@ -39,9 +39,7 @@ void AnnIndexReader::update_result(const IndexSearchResult& 
search_result,
     for (size_t i = 0; i < limit; ++i) {
         distance[i] = search_result.distances[i];
     }
-    // Use the search result's row_ids to update the input row_id
-    // by calculating the union in-place
-    roaring &= *search_result.roaring;
+    roaring = *search_result.roaring;
 }
 
 AnnIndexReader::AnnIndexReader(const TabletIndex* index_meta,
diff --git a/be/src/vec/exprs/vann_topn_predicate.cpp 
b/be/src/vec/exprs/vann_topn_predicate.cpp
index 41098e43a07..d68086dcd3d 100644
--- a/be/src/vec/exprs/vann_topn_predicate.cpp
+++ b/be/src/vec/exprs/vann_topn_predicate.cpp
@@ -147,8 +147,9 @@ Status AnnTopNDescriptor::evaluate_vector_ann_search(
     result_column = ColumnFloat64::create();
     ColumnFloat64* result_column_float = 
assert_cast<ColumnFloat64*>(result_column.get());
 
-    result_column_float->resize(ann_query_params.distance->size());
-    for (size_t i = 0; i < ann_query_params.distance->size(); ++i) {
+    size_t num_results = ann_query_params.distance->size();
+    result_column_float->resize(num_results);
+    for (size_t i = 0; i < num_results; ++i) {
         result_column_float->get_data()[i] = (*ann_query_params.distance)[i];
     }
     row_ids = std::move(ann_query_params.row_ids);
diff --git a/be/test/olap/vector_search/ann_index_reader_test.cpp 
b/be/test/olap/vector_search/ann_index_reader_test.cpp
index b384d0c6671..e951a48c743 100644
--- a/be/test/olap/vector_search/ann_index_reader_test.cpp
+++ b/be/test/olap/vector_search/ann_index_reader_test.cpp
@@ -26,7 +26,7 @@
 
 namespace doris::segment_v2 {
 
-using namespace vec_search_mock;
+using namespace vector_search_utils;
 class AnnIndexReaderTest : public testing::Test {};
 
 TEST_F(AnnIndexReaderTest, TestLoadIndex) {
diff --git a/be/test/olap/vector_search/ann_index_smoke_test.cpp 
b/be/test/olap/vector_search/ann_index_smoke_test.cpp
index 03d1d543a27..b106a9bc15e 100644
--- a/be/test/olap/vector_search/ann_index_smoke_test.cpp
+++ b/be/test/olap/vector_search/ann_index_smoke_test.cpp
@@ -31,7 +31,7 @@
 #include "vector_index.h"
 #include "vector_search_utils.h"
 
-using namespace doris::vec_search_mock;
+using namespace doris::vector_search_utils;
 
 namespace doris {
 
diff --git a/be/test/olap/vector_search/ann_range_search_test.cpp 
b/be/test/olap/vector_search/ann_range_search_test.cpp
index 248ae3d4fd7..8b9ad847f46 100644
--- a/be/test/olap/vector_search/ann_range_search_test.cpp
+++ b/be/test/olap/vector_search/ann_range_search_test.cpp
@@ -886,14 +886,15 @@ TEST_F(VectorSearchTest, TestEvaluateAnnRangeSearch) {
     idx_to_cid[3] = 3;
     std::vector<std::unique_ptr<segment_v2::IndexIterator>> 
cid_to_index_iterators;
     cid_to_index_iterators.resize(4);
-    cid_to_index_iterators[1] = 
std::make_unique<doris::vec_search_mock::MockAnnIndexIterator>();
+    cid_to_index_iterators[1] =
+            
std::make_unique<doris::vector_search_utils::MockAnnIndexIterator>();
     std::vector<std::unique_ptr<segment_v2::ColumnIterator>> column_iterators;
     column_iterators.resize(4);
     column_iterators[3] = 
std::make_unique<doris::segment_v2::VirtualColumnIterator>();
 
     roaring::Roaring row_bitmap;
-    doris::vec_search_mock::MockAnnIndexIterator* mock_ann_index_iter =
-            dynamic_cast<doris::vec_search_mock::MockAnnIndexIterator*>(
+    doris::vector_search_utils::MockAnnIndexIterator* mock_ann_index_iter =
+            dynamic_cast<doris::vector_search_utils::MockAnnIndexIterator*>(
                     cid_to_index_iterators[1].get());
 
     // Explain:
@@ -978,14 +979,15 @@ TEST_F(VectorSearchTest, TestEvaluateAnnRangeSearch2) {
     idx_to_cid[3] = 3;
     std::vector<std::unique_ptr<segment_v2::IndexIterator>> 
cid_to_index_iterators;
     cid_to_index_iterators.resize(4);
-    cid_to_index_iterators[1] = 
std::make_unique<doris::vec_search_mock::MockAnnIndexIterator>();
+    cid_to_index_iterators[1] =
+            
std::make_unique<doris::vector_search_utils::MockAnnIndexIterator>();
     std::vector<std::unique_ptr<segment_v2::ColumnIterator>> column_iterators;
     column_iterators.resize(4);
     column_iterators[3] = 
std::make_unique<doris::segment_v2::VirtualColumnIterator>();
 
     roaring::Roaring row_bitmap;
-    doris::vec_search_mock::MockAnnIndexIterator* mock_ann_index_iter =
-            dynamic_cast<doris::vec_search_mock::MockAnnIndexIterator*>(
+    doris::vector_search_utils::MockAnnIndexIterator* mock_ann_index_iter =
+            dynamic_cast<doris::vector_search_utils::MockAnnIndexIterator*>(
                     cid_to_index_iterators[1].get());
 
     // Explain:
diff --git a/be/test/olap/vector_search/ann_topn_descriptor_test.cpp 
b/be/test/olap/vector_search/ann_topn_descriptor_test.cpp
index b08adf73c32..7437bd80411 100644
--- a/be/test/olap/vector_search/ann_topn_descriptor_test.cpp
+++ b/be/test/olap/vector_search/ann_topn_descriptor_test.cpp
@@ -116,11 +116,40 @@ TEST_F(VectorSearchTest, AnnTopNDescriptorEvaluateTopN) {
     ASSERT_TRUE(st.ok()) << fmt::format("st: {}, expr {}", st.to_string(),
                                         
predicate->get_order_by_expr_ctx()->root()->debug_string());
 
-    std::shared_ptr<std::vector<float>> query_value = 
std::make_shared<std::vector<float>>(10, 0.0);
+    const ColumnConst* const_column =
+            assert_cast<const ColumnConst*>(predicate->_query_array.get());
+    const ColumnArray* column_array =
+            assert_cast<const 
ColumnArray*>(const_column->get_data_column_ptr().get());
+    const ColumnNullable* column_nullable =
+            assert_cast<const 
ColumnNullable*>(column_array->get_data_ptr().get());
+    const ColumnFloat64* cf64 =
+            assert_cast<const 
ColumnFloat64*>(column_nullable->get_nested_column_ptr().get());
+
+    const double* query_value = cf64->get_data().data();
+    const size_t query_value_size = cf64->get_data().size();
+    ASSERT_EQ(query_value_size, 8);
+    std::vector<float> query_value_f32;
+    for (size_t i = 0; i < query_value_size; ++i) {
+        query_value_f32.push_back(static_cast<float>(query_value[i]));
+    }
+    ASSERT_FLOAT_EQ(query_value_f32[0], 1.0f) << "query_value_f32[0] = " << 
query_value_f32[0];
+    ASSERT_FLOAT_EQ(query_value_f32[1], 2.0f) << "query_value_f32[1] = " << 
query_value_f32[1];
+    ASSERT_FLOAT_EQ(query_value_f32[2], 3.0f) << "query_value_f32[2] = " << 
query_value_f32[2];
+    ASSERT_FLOAT_EQ(query_value_f32[3], 4.0f) << "query_value_f32[3] = " << 
query_value_f32[3];
+    ASSERT_FLOAT_EQ(query_value_f32[4], 5.0f) << "query_value_f32[4] = " << 
query_value_f32[4];
+    ASSERT_FLOAT_EQ(query_value_f32[5], 6.0f) << "query_value_f32[5] = " << 
query_value_f32[5];
+    ASSERT_FLOAT_EQ(query_value_f32[6], 7.0f) << "query_value_f32[6] = " << 
query_value_f32[6];
+    ASSERT_FLOAT_EQ(query_value_f32[7], 20.0f) << "query_value_f32[7] = " << 
query_value_f32[7];
+
+    std::shared_ptr<std::vector<float>> query_vector =
+            std::make_shared<std::vector<float>>(10, 0.0);
     for (size_t i = 0; i < 10; ++i) {
-        (*query_value)[i] = static_cast<float>(i);
+        (*query_vector)[i] = static_cast<float>(i);
     }
 
+    std::cout << "query_vector: " << fmt::format("[{}]", 
fmt::join(*query_vector, ","))
+              << std::endl;
+
     EXPECT_CALL(*_ann_index_iterator, read_from_index(testing::_))
             .Times(1)
             .WillOnce(testing::Invoke([](const segment_v2::IndexParam& value) {
@@ -136,17 +165,16 @@ TEST_F(VectorSearchTest, AnnTopNDescriptorEvaluateTopN) {
 
     _result_column = ColumnFloat64::create(0, 0);
     std::unique_ptr<std::vector<uint64_t>> row_ids = 
std::make_unique<std::vector<uint64_t>>();
-    // roaring is empry
+
     roaring::Roaring roaring;
     st = predicate->evaluate_vector_ann_search(_ann_index_iterator.get(), 
roaring, _result_column,
                                                row_ids);
     ColumnFloat64* result_column_float = 
assert_cast<ColumnFloat64*>(_result_column.get());
-    for (size_t i = 0; i < query_value->size(); ++i) {
-        EXPECT_EQ(result_column_float->get_data()[i], (*query_value)[i]);
+    for (size_t i = 0; i < query_vector->size(); ++i) {
+        EXPECT_EQ(result_column_float->get_data()[i], (*query_vector)[i]);
     }
     ASSERT_TRUE(st.ok());
-    ASSERT_EQ(row_ids->size(), 0);
-    ASSERT_EQ(row_ids->size(), roaring.cardinality());
+    ASSERT_EQ(row_ids->size(), 10);
 }
 
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/olap/vector_search/faiss_vector_index_test.cpp 
b/be/test/olap/vector_search/faiss_vector_index_test.cpp
index 7c7b42611ec..bae49627ef2 100644
--- a/be/test/olap/vector_search/faiss_vector_index_test.cpp
+++ b/be/test/olap/vector_search/faiss_vector_index_test.cpp
@@ -31,109 +31,10 @@
 #include "vector_index.h"
 #include "vector_search_utils.h"
 
-// Generate random vectors for testing
-std::vector<float> generate_random_vector(int dim) {
-    std::vector<float> vector(dim);
-    std::random_device rd;
-    std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
-
-    for (int i = 0; i < dim; i++) {
-        vector[i] = dis(gen);
-    }
-    return vector;
-}
-
 using namespace doris::segment_v2;
 
 namespace doris::vectorized {
 
-// Helper function to create and configure a Doris Faiss index
-static std::unique_ptr<FaissVectorIndex> create_doris_index(
-        int dimension, int m,
-        FaissBuildParameter::IndexType index_type = 
FaissBuildParameter::IndexType::HNSW,
-        FaissBuildParameter::Quantilizer quantilizer = 
FaissBuildParameter::Quantilizer::FLAT) {
-    auto index = std::make_unique<FaissVectorIndex>();
-
-    FaissBuildParameter params;
-    params.d = dimension;
-    params.m = m;
-    params.index_type = index_type;
-    params.quantilizer = quantilizer;
-    index->set_build_params(params);
-
-    return index;
-}
-
-// Helper function to create a native Faiss HNSW index
-static std::unique_ptr<faiss::IndexHNSWFlat> create_native_index(int 
dimension, int m) {
-    return std::make_unique<faiss::IndexHNSWFlat>(dimension, m);
-}
-
-// Helper function to generate a batch of random vectors
-static std::vector<std::vector<float>> generate_test_vectors(int num_vectors, 
int dimension) {
-    std::vector<std::vector<float>> vectors;
-    vectors.reserve(num_vectors);
-
-    for (int i = 0; i < num_vectors; i++) {
-        vectors.push_back(generate_random_vector(dimension));
-    }
-
-    return vectors;
-}
-
-// Helper function to add vectors to both Doris and native indexes
-static void add_vectors_to_indexes(FaissVectorIndex* doris_index,
-                                   faiss::IndexHNSWFlat* native_index,
-                                   const std::vector<std::vector<float>>& 
vectors) {
-    for (size_t i = 0; i < vectors.size(); i++) {
-        auto status = doris_index->add(1, vectors[i].data());
-        ASSERT_TRUE(status.ok()) << "Failed to add vector to Doris index: " << 
status.to_string();
-
-        native_index->add(1, vectors[i].data());
-    }
-}
-
-// Helper function to print search results for comparison
-[[maybe_unused]] static void print_search_results(const IndexSearchResult& 
doris_results,
-                                                  const std::vector<float>& 
native_distances,
-                                                  const 
std::vector<faiss::idx_t>& native_indices,
-                                                  int query_idx) {
-    std::cout << "Query vector index: " << query_idx << std::endl;
-
-    std::cout << "Doris Index Results:" << std::endl;
-    for (int i = 0; i < doris_results.roaring->cardinality(); i++) {
-        std::cout << "ID: " << doris_results.roaring->getIndex(i)
-                  << ", Distance: " << doris_results.distances[i] << std::endl;
-    }
-
-    std::cout << "Native Faiss Results:" << std::endl;
-    for (size_t i = 0; i < native_indices.size(); i++) {
-        if (native_indices[i] == -1) continue;
-        std::cout << "ID: " << native_indices[i] << ", Distance: " << 
native_distances[i]
-                  << std::endl;
-    }
-}
-
-// Helper function to compare search results between Doris and native Faiss
-static void compare_search_results(const IndexSearchResult& doris_results,
-                                   const std::vector<float>& native_distances,
-                                   const std::vector<faiss::idx_t>& 
native_indices,
-                                   float epsilon = 1e-5) {
-    EXPECT_EQ(doris_results.roaring->cardinality(),
-              std::count_if(native_indices.begin(), native_indices.end(),
-                            [](faiss::idx_t id) { return id != -1; }));
-
-    for (size_t i = 0; i < native_indices.size(); i++) {
-        if (native_indices[i] == -1) continue;
-
-        EXPECT_TRUE(doris_results.roaring->contains(native_indices[i]))
-                << "ID mismatch at rank " << i;
-        EXPECT_NEAR(doris_results.distances[i], native_distances[i], epsilon)
-                << "Distance mismatch at rank " << i;
-    }
-}
-
 // Test saving and loading an index
 TEST_F(VectorSearchTest, TestSaveAndLoad) {
     // Step 1: Create first index instance
@@ -151,7 +52,7 @@ TEST_F(VectorSearchTest, TestSaveAndLoad) {
     const int num_vectors = 100;
     std::vector<float> vectors;
     for (int i = 0; i < num_vectors; i++) {
-        auto tmp = generate_random_vector(params.d);
+        auto tmp = vector_search_utils::generate_random_vector(params.d);
         vectors.insert(vectors.end(), tmp.begin(), tmp.end());
     }
 
@@ -169,7 +70,7 @@ TEST_F(VectorSearchTest, TestSaveAndLoad) {
     ASSERT_TRUE(load_status.ok()) << "Failed to load index: " << 
load_status.to_string();
 
     // Step 7: Verify the loaded index works by searching
-    auto query_vec = generate_random_vector(params.d);
+    auto query_vec = vector_search_utils::generate_random_vector(params.d);
     const int top_k = 10;
 
     IndexSearchParameters search_params;
@@ -221,132 +122,173 @@ TEST_F(VectorSearchTest, UpdateRoaring) {
 }
 
 TEST_F(VectorSearchTest, CompareResultWithNativeFaiss1) {
-    // Step 1: Create indexes
-    const int dimension = 64;
-    const int max_connections = 16;
-    auto doris_index = create_doris_index(dimension, max_connections);
-    auto native_index = create_native_index(dimension, max_connections);
-
-    // Step 2: Generate vectors and add to indexes
-    const int num_vectors = 200;
-    auto vectors = generate_test_vectors(num_vectors, dimension);
-    add_vectors_to_indexes(doris_index.get(), native_index.get(), vectors);
-
-    // Step 3: Search
-    int query_idx = num_vectors / 2;
-    const float* query_vec = vectors[query_idx].data();
-    const int top_k = 10;
-
-    // Search in Doris index
-    IndexSearchParameters search_params;
-    IndexSearchResult doris_results;
-    auto search_status =
-            doris_index->ann_topn_search(query_vec, top_k, search_params, 
doris_results);
-    ASSERT_EQ(search_status.ok(), true);
-
-    // Search in native Faiss index
-    std::vector<float> native_distances(top_k);
-    std::vector<faiss::idx_t> native_indices(top_k);
-    native_index->search(1, query_vec, top_k, native_distances.data(), 
native_indices.data());
-
-    // Step 4: Print and compare results
-    // print_search_results(doris_results, native_distances, native_indices, 
query_idx);
-    compare_search_results(doris_results, native_distances, native_indices);
+    const size_t iterations = 50;
+    // Create random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    // Define fixed parameter sets to choose from
+    const std::vector<int> dimensions = {32, 64, 128, 256};
+    const std::vector<int> max_connections = {8, 16, 32, 64};
+    const std::vector<int> vector_counts = {100, 200, 500, 1000};
+
+    for (size_t iter = 0; iter < iterations; ++iter) {
+        // Randomly select parameters from the fixed sets
+        const int dimension =
+                dimensions[std::uniform_int_distribution<>(0, 
dimensions.size() - 1)(gen)];
+        const int max_connection = 
max_connections[std::uniform_int_distribution<>(
+                0, max_connections.size() - 1)(gen)];
+        const int num_vectors =
+                vector_counts[std::uniform_int_distribution<>(0, 
vector_counts.size() - 1)(gen)];
+
+        // Step 1: Create indexes
+        auto doris_index = doris::vector_search_utils::create_doris_index(
+                doris::vector_search_utils::IndexType::HNSW, dimension, 
max_connection);
+        auto native_index = doris::vector_search_utils::create_native_index(
+                doris::vector_search_utils::IndexType::HNSW, dimension, 
max_connection);
+
+        // Step 2: Generate vectors and add to indexes
+        auto vectors =
+                
doris::vector_search_utils::generate_test_vectors_matrix(num_vectors, 
dimension);
+        
doris::vector_search_utils::add_vectors_to_indexes_serial_mode(doris_index.get(),
+                                                                       
native_index.get(), vectors);
+
+        // Step 3: Search
+        int query_idx = num_vectors / 2;
+        const float* query_vec = vectors[query_idx].data();
+        const int top_k = 10;
+
+        // Search in Doris index
+        IndexSearchParameters search_params;
+        IndexSearchResult doris_results;
+        auto search_status =
+                doris_index->ann_topn_search(query_vec, top_k, search_params, 
doris_results);
+        ASSERT_EQ(search_status.ok(), true)
+                << "Search failed with dimension=" << dimension
+                << ", max_connections=" << max_connection << ", num_vectors=" 
<< num_vectors;
+
+        // Search in native Faiss index
+        std::vector<float> native_distances(top_k);
+        std::vector<faiss::idx_t> native_indices(top_k);
+        native_index->search(1, query_vec, top_k, native_distances.data(), 
native_indices.data());
+
+        // Step 4: Compare results
+        vector_search_utils::compare_search_results(doris_results, 
native_distances,
+                                                    native_indices);
+    }
 }
 
 TEST_F(VectorSearchTest, CompareResultWithNativeFaiss2) {
-    // Step 1: Create indexes
-    const int dimension = 64;
-    const int max_connections = 16;
-    auto doris_index = create_doris_index(dimension, max_connections);
-    auto native_index = create_native_index(dimension, max_connections);
-
-    // Step 2: Generate vectors and add to indexes
-    const int num_vectors = 500;
-    auto vectors = generate_test_vectors(num_vectors, dimension);
-    add_vectors_to_indexes(doris_index.get(), native_index.get(), vectors);
-
-    // Step 3: Search
-    int query_idx = num_vectors / 2;
-    const float* query_vec = vectors[query_idx].data();
-    const int top_k = num_vectors;
-
-    // Search in Doris index
-    IndexSearchParameters search_params;
-    IndexSearchResult doris_results;
-    auto search_status =
-            doris_index->ann_topn_search(query_vec, top_k, search_params, 
doris_results);
-    ASSERT_EQ(search_status.ok(), true);
-
-    // Search in native Faiss index
-    std::vector<float> native_distances(top_k, -1);
-    std::vector<faiss::idx_t> native_indices(top_k, -1);
-    native_index->search(1, query_vec, top_k, native_distances.data(), 
native_indices.data());
-
-    // Step 4: Print and compare results
-    // print_search_results(doris_results, native_distances, native_indices, 
query_idx);
-    compare_search_results(doris_results, native_distances, native_indices);
+    const size_t iterations = 50;
+    // Create random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    // Define fixed parameter sets to choose from
+    const std::vector<int> dimensions = {32, 64, 128, 256};
+    const std::vector<int> max_connections = {8, 16, 32, 64};
+    const std::vector<int> vector_counts = {100, 200, 500, 1000};
+
+    for (size_t i = 0; i < iterations; ++i) {
+        // Randomly select parameters from the fixed sets
+        const int dimension =
+                dimensions[std::uniform_int_distribution<>(0, 
dimensions.size() - 1)(gen)];
+        const int max_connection = 
max_connections[std::uniform_int_distribution<>(
+                0, max_connections.size() - 1)(gen)];
+        const int num_vectors =
+                vector_counts[std::uniform_int_distribution<>(0, 
vector_counts.size() - 1)(gen)];
+
+        // Step 1: Create indexes
+        auto doris_index = doris::vector_search_utils::create_doris_index(
+                doris::vector_search_utils::IndexType::HNSW, dimension, 
max_connection);
+        auto native_index = doris::vector_search_utils::create_native_index(
+                doris::vector_search_utils::IndexType::HNSW, dimension, 
max_connection);
+
+        // Step 2: Generate vectors and add to indexes
+        std::vector<std::vector<float>> vectors =
+                
doris::vector_search_utils::generate_test_vectors_matrix(num_vectors, 
dimension);
+        
doris::vector_search_utils::add_vectors_to_indexes_serial_mode(doris_index.get(),
+                                                                       
native_index.get(), vectors);
+
+        // Step 3: Search
+        int query_idx = num_vectors / 2;
+        const float* query_vec = vectors[query_idx].data();
+        const int top_k = num_vectors;
+        IndexSearchParameters search_params;
+        IndexSearchResult doris_results;
+        std::ignore = doris_index->ann_topn_search(query_vec, top_k, 
search_params, doris_results);
+
+        // Search in native Faiss index
+        std::vector<float> native_distances(top_k, -1);
+        std::vector<faiss::idx_t> native_indices(top_k, -1);
+        native_index->search(1, query_vec, top_k, native_distances.data(), 
native_indices.data());
+
+        // Step 4: Compare results
+        doris::vector_search_utils::compare_search_results(doris_results, 
native_distances,
+                                                           native_indices);
+    }
 }
 
 TEST_F(VectorSearchTest, SearchAllVectors) {
-    // Step 1: Create and build index
-    auto index1 = std::make_unique<FaissVectorIndex>();
+    size_t iterations = 25;
+    for (size_t i = 0; i < iterations; ++i) {
+        // Step 1: Create and build index
+        auto index1 = std::make_unique<FaissVectorIndex>();
 
-    FaissBuildParameter params;
-    params.d = 64;
-    params.m = 32;
-    params.index_type = FaissBuildParameter::IndexType::HNSW;
-    params.quantilizer = FaissBuildParameter::Quantilizer::FLAT;
-    index1->set_build_params(params);
+        FaissBuildParameter params;
+        params.d = 64;
+        params.m = 32;
+        params.index_type = FaissBuildParameter::IndexType::HNSW;
+        params.quantilizer = FaissBuildParameter::Quantilizer::FLAT;
+        index1->set_build_params(params);
 
-    // Add 500 vectors
-    const int num_vectors = 500;
-    std::vector<float> vectors;
-    for (int i = 0; i < num_vectors; i++) {
-        auto vec = generate_random_vector(params.d);
-        vectors.insert(vectors.end(), vec.begin(), vec.end());
-    }
+        // Add 500 vectors
+        const int num_vectors = 500;
+        std::vector<float> vectors;
+        for (int i = 0; i < num_vectors; i++) {
+            auto vec = 
doris::vector_search_utils::generate_random_vector(params.d);
+            vectors.insert(vectors.end(), vec.begin(), vec.end());
+        }
 
-    ASSERT_EQ(index1->add(500, vectors.data()).ok(), true);
+        ASSERT_EQ(index1->add(500, vectors.data()).ok(), true);
 
-    // Save index
-    ASSERT_TRUE(index1->save(_ram_dir.get()).ok());
+        // Save index
+        ASSERT_TRUE(index1->save(_ram_dir.get()).ok());
 
-    // Step 2: Load index
-    auto index2 = std::make_unique<FaissVectorIndex>();
-    ASSERT_TRUE(index2->load(_ram_dir.get()).ok());
+        // Step 2: Load index
+        auto index2 = std::make_unique<FaissVectorIndex>();
+        ASSERT_TRUE(index2->load(_ram_dir.get()).ok());
 
-    // Step 3: Search all vectors
-    IndexSearchParameters search_params;
-    IndexSearchResult search_result;
-
-    // Search for all vectors - use a vector we know is in the index
-    std::vector<float> query_vec {vectors.begin(),
-                                  vectors.begin() + params.d}; // Use the 
first vector we added
-    const int top_k = num_vectors;                             // Get all 
vectors
-
-    ASSERT_EQ(index2->ann_topn_search(query_vec.data(), top_k, search_params, 
search_result).ok(),
-              true);
-    std::cout << "Search returned " << search_result.roaring->cardinality() << 
" results\n";
-    // Step 4: Verify we got all vectors back
-    // Note: In practical ANN search with approximate algorithms like HNSW,
-    // we might not get exactly all vectors due to the nature of approximate 
search.
-    // So we verify we got a reasonable number back.
-    EXPECT_GE(search_result.roaring->cardinality(), num_vectors * 0.60)
-            << "Expected to find at least 60% of all vectors";
-
-    // Also verify the first result is the query vector itself (it should be 
an exact match)
-    ASSERT_EQ(search_result.roaring->isEmpty(), false) << "Search result 
should not be empty";
-    size_t first = search_result.roaring->getIndex(0);
-    std::vector<float> first_result_vec(vectors.begin() + first * params.d,
-                                        vectors.begin() + (first + 1) * 
params.d);
-    std::string query_vec_str = fmt::format("[{}]", fmt::join(query_vec, ","));
-    std::string first_result_vec_str = fmt::format("[{}]", 
fmt::join(first_result_vec, ","));
-    EXPECT_EQ(first_result_vec, query_vec) << "First result should be the 
query vector itself";
+        // Step 3: Search all vectors
+        IndexSearchParameters search_params;
+        IndexSearchResult search_result;
+
+        // Search for all vectors - use a vector we know is in the index
+        std::vector<float> query_vec {vectors.begin(),
+                                      vectors.begin() + params.d}; // Use the 
first vector we added
+        const int top_k = num_vectors;                             // Get all 
vectors
+
+        ASSERT_EQ(
+                index2->ann_topn_search(query_vec.data(), top_k, 
search_params, search_result).ok(),
+                true);
+        // Step 4: Verify we got all vectors back
+        // Note: In practical ANN search with approximate algorithms like HNSW,
+        // we might not get exactly all vectors due to the nature of 
approximate search.
+        // So we verify we got a reasonable number back.
+        EXPECT_GE(search_result.roaring->cardinality(), num_vectors * 0.60)
+                << "Expected to find at least 60% of all vectors";
+
+        // Also verify the first result is the query vector itself (it should 
be an exact match)
+        ASSERT_EQ(search_result.roaring->isEmpty(), false) << "Search result 
should not be empty";
+        size_t first = search_result.roaring->getIndex(0);
+        std::vector<float> first_result_vec(vectors.begin() + first * params.d,
+                                            vectors.begin() + (first + 1) * 
params.d);
+        std::string query_vec_str = fmt::format("[{}]", fmt::join(query_vec, 
","));
+        std::string first_result_vec_str = fmt::format("[{}]", 
fmt::join(first_result_vec, ","));
+        EXPECT_EQ(first_result_vec, query_vec) << "First result should be the 
query vector itself";
+    }
 }
 
 TEST_F(VectorSearchTest, CompRangeSearch) {
-    size_t iterations = 50;
+    size_t iterations = 25;
     for (size_t i = 0; i < iterations; ++i) {
         // Random parameters for each test iteration
         std::random_device rd;
@@ -367,35 +309,36 @@ TEST_F(VectorSearchTest, CompRangeSearch) {
         index1->set_build_params(params);
 
         const int num_vectors = random_n;
-        std::vector<float> vectors;
+        std::vector<std::vector<float>> vectors;
         for (int i = 0; i < num_vectors; i++) {
-            auto vec = generate_random_vector(params.d);
-            vectors.insert(vectors.end(), vec.begin(), vec.end());
+            auto vec = vector_search_utils::generate_random_vector(params.d);
+            vectors.push_back(vec);
         }
         std::unique_ptr<faiss::Index> native_index =
                 std::make_unique<faiss::IndexHNSWFlat>(params.d, params.m);
-        native_index->add(num_vectors, vectors.data());
-        std::ignore = index1->add(num_vectors, vectors.data());
+        
doris::vector_search_utils::add_vectors_to_indexes_serial_mode(index1.get(),
+                                                                       
native_index.get(), vectors);
 
-        std::vector<float> query_vec {vectors.begin(),
-                                      vectors.begin() + params.d}; // Use the 
first vector we added
+        std::vector<float> query_vec = vectors.front();
 
         std::vector<std::pair<size_t, float>> distances(num_vectors);
         for (int i = 0; i < num_vectors; i++) {
             double sum = 0;
+            auto& vec = vectors[i];
             for (int j = 0; j < params.d; j++) {
-                accumulate(vectors[i * params.d + j], query_vec[j], sum);
+                accumulate(vec[j], query_vec[j], sum);
             }
             distances[i] = std::make_pair(i, finalize(sum));
         }
         std::sort(distances.begin(), distances.end(),
                   [](const auto& a, const auto& b) { return a.second < 
b.second; });
-        // Use the median distance as the radius
-        float radius = distances[num_vectors / 2].second;
+
+        float radius = distances[num_vectors / 4].second;
 
         HNSWSearchParameters hnsw_params;
         hnsw_params.ef_search = 16;    // Set efSearch for better accuracy
         hnsw_params.roaring = nullptr; // No selector for this test
+        hnsw_params.is_le_or_lt = true;
         IndexSearchResult doris_result;
         std::ignore = index1->range_search(query_vec.data(), radius, 
hnsw_params, doris_result);
 
@@ -452,30 +395,31 @@ TEST_F(VectorSearchTest, RangeSearchNoSelector1) {
         index1->set_build_params(params);
 
         const int num_vectors = random_n;
-        std::vector<float> vectors;
+        std::vector<std::vector<float>> vectors;
         for (int i = 0; i < num_vectors; i++) {
-            auto vec = generate_random_vector(params.d);
-            vectors.insert(vectors.end(), vec.begin(), vec.end());
+            auto vec = vector_search_utils::generate_random_vector(params.d);
+            vectors.push_back(vec);
         }
+        std::unique_ptr<faiss::Index> native_index =
+                std::make_unique<faiss::IndexHNSWFlat>(params.d, params.m);
+        
doris::vector_search_utils::add_vectors_to_indexes_serial_mode(index1.get(),
+                                                                       
native_index.get(), vectors);
+
+        std::vector<float> query_vec = vectors.front();
 
         std::vector<std::pair<size_t, float>> distances(num_vectors);
         for (int i = 0; i < num_vectors; i++) {
             double sum = 0;
+            auto& vec = vectors[i];
             for (int j = 0; j < params.d; j++) {
-                accumulate(vectors[i * params.d + j], vectors[j], sum);
+                accumulate(vec[j], query_vec[j], sum);
             }
             distances[i] = std::make_pair(i, finalize(sum));
         }
         std::sort(distances.begin(), distances.end(),
                   [](const auto& a, const auto& b) { return a.second < 
b.second; });
-        // Use the median distance as the radius
-        float radius = distances[num_vectors / 2].second;
-
-        std::unique_ptr<faiss::Index> native_index =
-                std::make_unique<faiss::IndexHNSWFlat>(params.d, params.m, 
faiss::METRIC_L2);
-        native_index->add(num_vectors, vectors.data());
-        std::ignore = index1->add(num_vectors, vectors.data());
 
+        float radius = distances[num_vectors / 4].second;
         // Save index
         ASSERT_TRUE(index1->save(_ram_dir.get()).ok());
 
@@ -485,8 +429,6 @@ TEST_F(VectorSearchTest, RangeSearchNoSelector1) {
 
         // Step 3: Range search
         // Use a vector we know is in the index
-        std::vector<float> query_vec {vectors.begin(),
-                                      vectors.begin() + params.d}; // Use the 
first vector we added
 
         faiss::SearchParametersHNSW search_params;
         search_params.efSearch = 16; // Set efSearch for better accuracy
@@ -568,17 +510,20 @@ TEST_F(VectorSearchTest, RangeSearchWithSelector1) {
         index1->set_build_params(params);
 
         const int num_vectors = 1000;
-        std::vector<float> vectors;
+        std::vector<std::vector<float>> vectors;
         for (int i = 0; i < num_vectors; i++) {
-            auto vec = generate_random_vector(params.d);
-            vectors.insert(vectors.end(), vec.begin(), vec.end());
+            auto vec = vector_search_utils::generate_random_vector(params.d);
+            vectors.push_back(vec);
         }
 
+        // Use a vector we know is in the index
+        std::vector<float> query_vec = vectors.front();
         std::vector<std::pair<size_t, float>> distances(num_vectors);
         for (int i = 0; i < num_vectors; i++) {
             double sum = 0;
+            auto& vec = vectors[i];
             for (int j = 0; j < params.d; j++) {
-                accumulate(vectors[i * params.d + j], vectors[j], sum);
+                accumulate(vec[j], query_vec[j], sum);
             }
             distances[i] = std::make_pair(i, finalize(sum));
         }
@@ -589,8 +534,8 @@ TEST_F(VectorSearchTest, RangeSearchWithSelector1) {
 
         std::unique_ptr<faiss::Index> native_index =
                 std::make_unique<faiss::IndexHNSWFlat>(params.d, params.m, 
faiss::METRIC_L2);
-        native_index->add(num_vectors, vectors.data());
-        std::ignore = index1->add(num_vectors, vectors.data());
+        
doris::vector_search_utils::add_vectors_to_indexes_serial_mode(index1.get(),
+                                                                       
native_index.get(), vectors);
 
         std::unique_ptr<roaring::Roaring> all_rows = 
std::make_unique<roaring::Roaring>();
         std::unique_ptr<roaring::Roaring> sel_rows = 
std::make_unique<roaring::Roaring>();
@@ -600,11 +545,8 @@ TEST_F(VectorSearchTest, RangeSearchWithSelector1) {
                 sel_rows->add(i);
             }
         }
-        // Step 3: Range search
-        // Use a vector we know is in the index
-        std::vector<float> query_vec {vectors.begin(),
-                                      vectors.begin() + params.d}; // Use the 
first vector we added
 
+        // Step 3: Range search
         faiss::SearchParametersHNSW search_params;
         search_params.efSearch = 16; // Set efSearch for better accuracy
         auto faiss_selector = 
segment_v2::FaissVectorIndex::roaring_to_faiss_selector(*sel_rows);
@@ -612,7 +554,7 @@ TEST_F(VectorSearchTest, RangeSearchWithSelector1) {
         faiss::RangeSearchResult native_search_result(1, true);
         native_index->range_search(1, query_vec.data(), radius * radius, 
&native_search_result,
                                    &search_params);
-
+        // labels and distance
         std::vector<std::pair<int, float>> native_results;
         size_t begin = native_search_result.lims[0];
         size_t end = native_search_result.lims[1];
@@ -622,7 +564,7 @@ TEST_F(VectorSearchTest, RangeSearchWithSelector1) {
         }
 
         HNSWSearchParameters doris_search_params;
-        doris_search_params.ef_search = 16; // Set efSearch for better accuracy
+        doris_search_params.ef_search = search_params.efSearch;
         doris_search_params.is_le_or_lt = true;
         doris_search_params.roaring = sel_rows.get();
         IndexSearchResult doris_search_result;
@@ -644,11 +586,17 @@ TEST_F(VectorSearchTest, RangeSearchWithSelector1) {
         }
 
         doris_search_params.is_le_or_lt = false;
-        roaring::Roaring ge_rows = *doris_search_params.roaring;
+        IndexSearchResult doris_search_result2;
+        ASSERT_EQ(index1->range_search(query_vec.data(), radius, 
doris_search_params,
+                                       doris_search_result2)
+                          .ok(),
+                  true);
+        roaring::Roaring ge_rows = *doris_search_result2.roaring;
         roaring::Roaring less_rows;
         for (size_t i = 0; i < native_results.size(); ++i) {
             less_rows.add(native_results[i].first);
         }
+        // result2 contains all rows that not included by result1
         roaring::Roaring and_row_id = ge_rows & less_rows;
         roaring::Roaring or_row_id = ge_rows | less_rows;
         ASSERT_NEAR(and_row_id.cardinality(), 0, 1);
@@ -659,14 +607,12 @@ TEST_F(VectorSearchTest, RangeSearchWithSelector1) {
 
 TEST_F(VectorSearchTest, RangeSearchEmptyResult) {
     for (size_t i = 0; i < 10; ++i) {
-        auto index1 = std::make_unique<FaissVectorIndex>();
-        FaissBuildParameter params;
-        params.d = 10;
-        params.m = 32;
-        params.index_type = FaissBuildParameter::IndexType::HNSW;
-        index1->set_build_params(params);
-
+        const size_t d = 10;
+        const size_t m = 32;
         const int num_vectors = 1000;
+        auto index1 =
+                
vector_search_utils::create_doris_index(vector_search_utils::IndexType::HNSW, 
d, m);
+
         std::vector<float> vectors;
         // Create 1000 vectors and make sure their l2_distance with 
[1,2,3,4,5,6,7,8,9,10] is less than 100
         // [1,2,3,4,5,6,7,8,9,10]
@@ -679,73 +625,66 @@ TEST_F(VectorSearchTest, RangeSearchEmptyResult) {
                 rowid = rowid % 10;
             }
 
-            std::vector<float> vec(params.d);
-            for (int colid = 0; colid < params.d; colid++) {
+            std::vector<float> vec(d);
+            for (int colid = 0; colid < d; colid++) {
                 vec[colid] = (rowid + colid) % 10 + 1;
             }
             vectors.insert(vectors.end(), vec.begin(), vec.end());
         }
-        // Find the min
-        float radius = 5.0f;
-
-        std::unique_ptr<faiss::Index> native_index =
-                std::make_unique<faiss::IndexHNSWFlat>(params.d, params.m, 
faiss::METRIC_L2);
-
-        native_index->add(num_vectors, vectors.data());
-        std::ignore = index1->add(num_vectors, vectors.data());
+        std::unique_ptr<faiss::Index> native_index = 
vector_search_utils::create_native_index(
+                vector_search_utils::IndexType::HNSW, d, m);
+        vector_search_utils::add_vectors_to_indexes_batch_mode(index1.get(), 
native_index.get(),
+                                                               num_vectors, 
vectors);
 
         std::vector<float> query_vec;
-        for (int i = 0; i < params.d; i++) {
+        for (int i = 0; i < d; i++) {
             query_vec.push_back(5.0f);
         }
-        // L2 distance between [5,5,5,5,5,5,5,5,5,5] with any vector add added 
is large than 5 and less than 250.
 
-        faiss::SearchParametersHNSW search_params;
-        search_params.efSearch = 1000; // Set efSearch for better accuracy
-        faiss::RangeSearchResult native_search_result(1, true);
-        native_index->range_search(1, query_vec.data(), radius * radius, 
&native_search_result);
+        // L2 distance between [5,5,5,5,5,5,5,5,5,5] with any other vector is 
large than 5 and less than 250.
+        // Find the min
+        float radius = 5.0f;
+        doris::segment_v2::HNSWSearchParameters search_params;
+        search_params.ef_search = 1000; // Set efSearch for better accuracy
+        auto doris_search_result = 
vector_search_utils::perform_doris_index_range_search(
+                index1.get(), query_vec.data(), radius, search_params);
+        auto native_search_result = 
vector_search_utils::perform_native_index_range_search(
+                native_index.get(), query_vec.data(), radius);
 
-        std::vector<std::pair<int, float>> native_results;
-        size_t begin = native_search_result.lims[0];
-        size_t end = native_search_result.lims[1];
-        for (size_t i = begin; i < end; i++) {
-            native_results.push_back(
-                    {native_search_result.labels[i], 
native_search_result.distances[i]});
-        }
+        ASSERT_EQ(doris_search_result->roaring->cardinality(), 0);
+        ASSERT_EQ(native_search_result.size(), 0);
 
-        HNSWSearchParameters doris_search_params;
-        doris_search_params.ef_search = 1000; // Set efSearch for better 
accuracy
-        doris_search_params.is_le_or_lt = true;
+        // Search all rows.
+        doris::segment_v2::HNSWSearchParameters search_params_all_rows;
+        search_params_all_rows.ef_search = 1000; // Set efSearch for better 
accuracy
+        search_params_all_rows.is_le_or_lt = true;
         std::unique_ptr<roaring::Roaring> sel_rows = 
std::make_unique<roaring::Roaring>();
         for (size_t i = 0; i < num_vectors; ++i) {
             sel_rows->add(i);
         }
+        search_params_all_rows.roaring = sel_rows.get();
+        doris_search_result = 
vector_search_utils::perform_doris_index_range_search(
+                index1.get(), query_vec.data(), radius, 
search_params_all_rows);
+        ASSERT_EQ(doris_search_result->distances != nullptr, true);
 
-        // Search all rows.
-        doris_search_params.roaring = sel_rows.get();
-        IndexSearchResult doris_search_result;
-
-        ASSERT_EQ(index1->range_search(query_vec.data(), radius, 
doris_search_params,
-                                       doris_search_result)
-                          .ok(),
-                  true);
+        native_search_result = 
vector_search_utils::perform_native_index_range_search(
+                native_index.get(), query_vec.data(), radius);
 
-        ASSERT_EQ(native_results.size(), 
doris_search_result.roaring->cardinality());
-        ASSERT_EQ(doris_search_result.distances != nullptr, true);
-        for (size_t i = 0; i < native_results.size(); i++) {
-            const size_t rowid = native_results[i].first;
-            const float dis = native_results[i].second;
-            ASSERT_EQ(doris_search_result.roaring->contains(rowid), true)
+        // Make sure result is same
+        for (size_t i = 0; i < native_search_result.size(); i++) {
+            const size_t rowid = native_search_result[i].first;
+            const float dis = native_search_result[i].second;
+            ASSERT_EQ(doris_search_result->roaring->contains(rowid), true)
                     << "Row ID mismatch at rank " << i;
-            ASSERT_FLOAT_EQ(doris_search_result.distances[i], sqrt(dis))
+            ASSERT_FLOAT_EQ(doris_search_result->distances[i], sqrt(dis))
                     << "Distance mismatch at rank " << i;
         }
 
-        doris_search_params.is_le_or_lt = false;
-        roaring::Roaring ge_rows = *doris_search_params.roaring;
+        search_params_all_rows.is_le_or_lt = false;
+        roaring::Roaring ge_rows = *search_params_all_rows.roaring;
         roaring::Roaring less_rows;
-        for (size_t i = 0; i < native_results.size(); ++i) {
-            less_rows.add(native_results[i].first);
+        for (size_t i = 0; i < native_search_result.size(); ++i) {
+            less_rows.add(native_search_result[i].first);
         }
         roaring::Roaring and_row_id = ge_rows & less_rows;
         roaring::Roaring or_row_id = ge_rows | less_rows;
@@ -762,4 +701,5 @@ TEST_F(VectorSearchTest, TestIdSelectorWithEmptyRoaring) {
         ASSERT_EQ(sel->is_member(i), false) << "Selector should be empty";
     }
 }
+
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/olap/vector_search/native_faiss_test.cpp 
b/be/test/olap/vector_search/native_faiss_test.cpp
index dc7b5da3046..1cfdd12019f 100644
--- a/be/test/olap/vector_search/native_faiss_test.cpp
+++ b/be/test/olap/vector_search/native_faiss_test.cpp
@@ -25,9 +25,11 @@
 #include <iomanip>
 #include <roaring/roaring.hh>
 
-#include "vector/faiss_vector_index.h"
+#include "olap/vector_search/vector_search_utils.h"
 
+namespace doris::vector_search_utils {
 std::vector<float> generate_random_vector(int dim);
+}
 
 class NativeFaissTest : public ::testing::Test {
 public:
@@ -233,7 +235,7 @@ TEST_F(NativeFaissTest, TestRangeSearch10000Random) {
     int n = 1000;
     std::vector<float> data_vectors;
     for (int i = 0; i < n; ++i) {
-        auto random_vector = generate_random_vector(dim);
+        auto random_vector = 
doris::vector_search_utils::generate_random_vector(dim);
         data_vectors.insert(data_vectors.end(), random_vector.begin(), 
random_vector.end());
     }
 
@@ -287,7 +289,7 @@ TEST_F(NativeFaissTest, 
TestRangeSearchRandomVectorsSearchMedian) {
                 // Generate random vectors
                 std::vector<float> data_vectors;
                 for (int i = 0; i < n; ++i) {
-                    auto random_vector = generate_random_vector(dim);
+                    auto random_vector = 
doris::vector_search_utils::generate_random_vector(dim);
                     data_vectors.insert(data_vectors.end(), 
random_vector.begin(),
                                         random_vector.end());
                 }
@@ -367,13 +369,15 @@ TEST_F(NativeFaissTest, TestTopNRandomVectorSearchMedian) 
{
                     // Generate random vectors
                     std::vector<float> data_vectors;
                     for (int i = 0; i < n; ++i) {
-                        auto random_vector = generate_random_vector(dim);
+                        auto random_vector =
+                                
doris::vector_search_utils::generate_random_vector(dim);
                         data_vectors.insert(data_vectors.end(), 
random_vector.begin(),
                                             random_vector.end());
                     }
 
                     // Use a random vector as query
-                    std::vector<float> query_vector = 
generate_random_vector(dim);
+                    std::vector<float> query_vector =
+                            
doris::vector_search_utils::generate_random_vector(dim);
 
                     // Brute force search to find ground truth top-k
                     std::vector<std::pair<int, float>> all_distances(n);
@@ -436,7 +440,7 @@ TEST_F(NativeFaissTest, SameTypeIndexDiffObject) {
     int n = 1000;
     std::vector<float> data_vectors;
     for (int i = 0; i < n; ++i) {
-        auto random_vector = generate_random_vector(dim);
+        auto random_vector = 
doris::vector_search_utils::generate_random_vector(dim);
         data_vectors.insert(data_vectors.end(), random_vector.begin(), 
random_vector.end());
     }
 
@@ -475,4 +479,41 @@ TEST_F(NativeFaissTest, SameTypeIndexDiffObject) {
         ASSERT_FLOAT_EQ(actual_results1[i].second, actual_results2[i].second);
         ASSERT_FLOAT_EQ(actual_results1[i].second, actual_results3[i].second);
     }
+}
+
+TEST_F(NativeFaissTest, BatchInsert) {
+    size_t iterations = 5;
+    for (size_t i = 0; i < iterations; ++i) {
+        const size_t d = 100;
+        const size_t m = 32;
+        const int num_vectors = 1000;
+
+        auto index1 = doris::vector_search_utils::create_native_index(
+                doris::vector_search_utils::IndexType::HNSW, d, m);
+
+        auto index2 = doris::vector_search_utils::create_native_index(
+                doris::vector_search_utils::IndexType::HNSW, d, m);
+
+        auto flatten_vectors =
+                
doris::vector_search_utils::generate_test_vectors_flatten(num_vectors, d);
+
+        doris::vector_search_utils::add_vectors_to_indexes_batch_mode(nullptr, 
index1.get(),
+                                                                      
num_vectors, flatten_vectors);
+        doris::vector_search_utils::add_vectors_to_indexes_batch_mode(nullptr, 
index2.get(),
+                                                                      
num_vectors, flatten_vectors);
+
+        // Use a random vector as query
+        std::vector<float> query_vector = 
doris::vector_search_utils::generate_random_vector(d);
+        float radius = 
doris::vector_search_utils::get_radius_from_flatten(query_vector.data(), d,
+                                                                           
flatten_vectors, 0.4);
+
+        // Get index-based results using HNSW index
+        auto res1 = 
doris::vector_search_utils::perform_native_index_range_search(
+                index1.get(), query_vector.data(), radius);
+        auto res2 = 
doris::vector_search_utils::perform_native_index_range_search(
+                index2.get(), query_vector.data(), radius);
+        // Insert vector using batch insert will make result is different.
+        std::cout << "res1 size: " << res1.size() << std::endl;
+        std::cout << "res2 size: " << res2.size() << std::endl;
+    }
 }
\ No newline at end of file
diff --git a/be/test/olap/vector_search/vector_search_utils.cpp 
b/be/test/olap/vector_search/vector_search_utils.cpp
new file mode 100644
index 00000000000..6ee03666bbf
--- /dev/null
+++ b/be/test/olap/vector_search/vector_search_utils.cpp
@@ -0,0 +1,249 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vector_search_utils.h"
+
+#include <faiss/IndexHNSW.h>
+
+#include <cstddef>
+#include <memory>
+
+#include "faiss_vector_index.h"
+#include "vector_index.h"
+
+namespace doris::vector_search_utils {
+static void accumulate(double x, double y, double& sum) {
+    sum += (x - y) * (x - y);
+}
+static double finalize(double sum) {
+    return sqrt(sum);
+}
+
+// Generate random vectors for testing
+std::vector<float> generate_random_vector(int dim) {
+    std::vector<float> vector(dim);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
+
+    for (int i = 0; i < dim; i++) {
+        vector[i] = dis(gen);
+    }
+    return vector;
+}
+
+// Helper function to create and configure a Doris Vector index
+std::unique_ptr<doris::segment_v2::VectorIndex> create_doris_index(IndexType 
index_type,
+                                                                   int 
dimension, int m) {
+    auto index = std::make_unique<doris::segment_v2::FaissVectorIndex>();
+    segment_v2::FaissBuildParameter params;
+    params.d = dimension;
+    params.m = m;
+    switch (index_type) {
+    case IndexType::FLAT_L2:
+        params.index_type = 
segment_v2::FaissBuildParameter::IndexType::BruteForce;
+        break;
+    case IndexType::HNSW:
+        params.index_type = segment_v2::FaissBuildParameter::IndexType::HNSW;
+        break;
+    default:
+        throw std::invalid_argument("Unsupported index type");
+    }
+    params.quantilizer = segment_v2::FaissBuildParameter::Quantilizer::FLAT;
+    index->set_build_params(params);
+    return std::move(index);
+}
+
+// Helper function to create a native Faiss index
+std::unique_ptr<faiss::Index> create_native_index(IndexType type, int 
dimension, int m) {
+    std::unique_ptr<faiss::Index> index;
+
+    switch (type) {
+    case IndexType::FLAT_L2:
+        index = std::make_unique<faiss::IndexFlatL2>(dimension);
+        break;
+    case IndexType::HNSW:
+        index = std::make_unique<faiss::IndexHNSWFlat>(dimension, m, 
faiss::METRIC_L2);
+        break;
+    default:
+        throw std::invalid_argument("Unsupported index type");
+    }
+
+    return index;
+}
+
+// Helper function to generate a batch of random vectors
+std::vector<std::vector<float>> generate_test_vectors_matrix(int num_vectors, 
int dimension) {
+    std::vector<std::vector<float>> vectors;
+    vectors.reserve(num_vectors);
+
+    for (int i = 0; i < num_vectors; i++) {
+        vectors.push_back(generate_random_vector(dimension));
+    }
+
+    return vectors;
+}
+
+std::vector<float> generate_test_vectors_flatten(int num_vectors, int 
dimension) {
+    std::vector<float> vectors;
+    vectors.reserve(num_vectors * dimension);
+
+    for (int i = 0; i < num_vectors; i++) {
+        auto tmp = generate_random_vector(dimension);
+        vectors.insert(vectors.end(), tmp.begin(), tmp.end());
+    }
+
+    return vectors;
+}
+
+// Helper function to add vectors to both Doris and native indexes
+void add_vectors_to_indexes_serial_mode(segment_v2::VectorIndex* doris_index,
+                                        faiss::Index* native_index,
+                                        const std::vector<std::vector<float>>& 
vectors) {
+    for (size_t i = 0; i < vectors.size(); i++) {
+        if (doris_index) {
+            auto status = doris_index->add(1, vectors[i].data());
+            ASSERT_TRUE(status.ok())
+                    << "Failed to add vector to Doris index: " << 
status.to_string();
+        }
+        if (native_index) {
+            // Add vector to native Faiss index
+            native_index->add(1, vectors[i].data());
+        }
+    }
+}
+
+void add_vectors_to_indexes_batch_mode(segment_v2::VectorIndex* doris_index,
+                                       faiss::Index* native_index, size_t 
num_vectors,
+                                       const std::vector<float>& 
flatten_vectors) {
+    if (doris_index) {
+        auto status = doris_index->add(num_vectors, flatten_vectors.data());
+        ASSERT_TRUE(status.ok()) << "Failed to add vectors to Doris index: " 
<< status.to_string();
+    }
+
+    if (native_index) {
+        // Add vectors to native Faiss index
+        native_index->add(num_vectors, flatten_vectors.data());
+    }
+}
+
+// Helper function to print search results for comparison
+void print_search_results(const segment_v2::IndexSearchResult& doris_results,
+                          const std::vector<float>& native_distances,
+                          const std::vector<faiss::idx_t>& native_indices, int 
query_idx) {
+    std::cout << "Query vector index: " << query_idx << std::endl;
+
+    std::cout << "Doris Index Results:" << std::endl;
+    for (int i = 0; i < doris_results.roaring->cardinality(); i++) {
+        std::cout << "ID: " << doris_results.roaring->getIndex(i)
+                  << ", Distance: " << doris_results.distances[i] << std::endl;
+    }
+
+    std::cout << "Native Faiss Results:" << std::endl;
+    for (size_t i = 0; i < native_indices.size(); i++) {
+        if (native_indices[i] == -1) continue;
+        std::cout << "ID: " << native_indices[i] << ", Distance: " << 
native_distances[i]
+                  << std::endl;
+    }
+}
+
+// Helper function to compare search results between Doris and native Faiss
+void compare_search_results(const segment_v2::IndexSearchResult& doris_results,
+                            const std::vector<float>& native_distances,
+                            const std::vector<faiss::idx_t>& native_indices, 
float abs_error) {
+    EXPECT_EQ(doris_results.roaring->cardinality(),
+              std::count_if(native_indices.begin(), native_indices.end(),
+                            [](faiss::idx_t id) { return id != -1; }));
+
+    for (size_t i = 0; i < native_indices.size(); i++) {
+        if (native_indices[i] == -1) continue;
+
+        EXPECT_TRUE(doris_results.roaring->contains(native_indices[i]))
+                << "ID mismatch at rank " << i;
+        EXPECT_NEAR(doris_results.distances[i], native_distances[i], abs_error)
+                << "Distance mismatch at rank " << i;
+    }
+}
+
+// result is a vector of pairs, where each pair contains the labels and 
distance
+// result is sorted by labels
+std::vector<std::pair<int, float>> 
perform_native_index_range_search(faiss::Index* index,
+                                                                     const 
float* query_vector,
+                                                                     float 
radius) {
+    std::vector<std::pair<int, float>> results;
+    faiss::RangeSearchResult result(1);
+    index->range_search(1, query_vector, radius * radius, &result);
+    size_t begin = result.lims[0];
+    size_t end = result.lims[1];
+    results.reserve(end - begin);
+    for (size_t j = begin; j < end; ++j) {
+        results.push_back({result.labels[j], sqrt(result.distances[j])});
+    }
+    std::sort(results.begin(), results.end(),
+              [](const auto& a, const auto& b) { return a.first < b.first; });
+    return results;
+}
+
+std::unique_ptr<doris::segment_v2::IndexSearchResult> 
perform_doris_index_range_search(
+        segment_v2::VectorIndex* index, const float* query_vector, float 
radius,
+        const segment_v2::IndexSearchParameters& params) {
+    auto result = std::make_unique<doris::segment_v2::IndexSearchResult>();
+    std::ignore = index->range_search(query_vector, radius, params, *result);
+    return result;
+}
+
+float get_radius_from_flatten(const float* vector, int dim,
+                              const std::vector<float>& flatten_vectors, float 
percentile) {
+    size_t n = flatten_vectors.size() / dim;
+    std::vector<std::pair<size_t, float>> distances(n);
+    for (int i = 0; i < n; i++) {
+        double sum = 0;
+        for (int j = 0; j < dim; j++) {
+            accumulate(flatten_vectors[i * dim + j], flatten_vectors[j], sum);
+        }
+        distances[i] = std::make_pair(i, finalize(sum));
+    }
+    std::sort(distances.begin(), distances.end(),
+              [](const auto& a, const auto& b) { return a.second < b.second; 
});
+    // Use the median distance as the radius
+    size_t percentile_index = static_cast<size_t>(n * percentile);
+    float radius = distances[percentile_index].second;
+
+    return radius;
+}
+
+float get_radius_from_matrix(const float* vector, int dim,
+                             const std::vector<std::vector<float>>& 
matrix_vectors,
+                             float percentile) {
+    size_t n = matrix_vectors.size();
+    std::vector<std::pair<size_t, float>> distances(n);
+    for (size_t i = 0; i < n; i++) {
+        double sum = 0;
+        for (int j = 0; j < dim; j++) {
+            accumulate(matrix_vectors[i][j], vector[j], sum);
+        }
+        distances[i] = std::make_pair(i, finalize(sum));
+    }
+    std::sort(distances.begin(), distances.end(),
+              [](const auto& a, const auto& b) { return a.second < b.second; 
});
+    // Use the median distance as the radius
+    size_t percentile_index = static_cast<size_t>(n * percentile);
+    float radius = distances[percentile_index].second;
+
+    return radius;
+}
+} // namespace doris::vector_search_utils
\ No newline at end of file
diff --git a/be/test/olap/vector_search/vector_search_utils.h 
b/be/test/olap/vector_search/vector_search_utils.h
index b22e01b63b5..8fd79997819 100644
--- a/be/test/olap/vector_search/vector_search_utils.h
+++ b/be/test/olap/vector_search/vector_search_utils.h
@@ -20,6 +20,7 @@
 #include <gen_cpp/Descriptors_types.h>
 #include <gen_cpp/Exprs_types.h>
 #include <gen_cpp/Types_types.h>
+#include <gen_cpp/olap_file.pb.h>
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include <stdint.h>
@@ -36,12 +37,75 @@
 #include "olap/tablet_schema.h"
 #include "runtime/descriptors.h"
 #include "vec/exprs/vexpr_context.h"
+#include "vector_index.h"
 // Add CLucene RAM Directory header
 #include <CLucene/store/RAMDirectory.h>
+#include <faiss/MetricType.h>
 
 using doris::segment_v2::DorisCompoundReader;
 
-namespace doris::vec_search_mock {
+namespace faiss {
+class Index;
+class IndexHNSWFlat;
+} // namespace faiss
+
+namespace doris::segment_v2 {
+class FaissVectorIndex;
+}
+
+namespace doris::vector_search_utils {
+
+// Generate random vectors for testing
+std::vector<float> generate_random_vector(int dim);
+// Generate random vectors in matrix form
+std::vector<std::vector<float>> generate_test_vectors_matrix(int num_vectors, 
int dimension);
+// Generate random vectors as a flatten vector
+std::vector<float> generate_test_vectors_flatten(int num_vectors, int 
dimension);
+
+// Enum for different index types
+enum class IndexType {
+    FLAT_L2,
+    HNSW,
+    // Add more index types as needed
+};
+std::unique_ptr<faiss::Index> create_native_index(IndexType type, int 
dimension, int m);
+std::unique_ptr<doris::segment_v2::VectorIndex> create_doris_index(IndexType 
index_type,
+                                                                   int 
dimension, int m);
+
+// Helper function to add vectors to both Doris and native indexes
+void add_vectors_to_indexes_serial_mode(segment_v2::VectorIndex* doris_index,
+                                        faiss::Index* native_index,
+                                        const std::vector<std::vector<float>>& 
vectors);
+
+void add_vectors_to_indexes_batch_mode(segment_v2::VectorIndex* doris_index,
+                                       faiss::Index* native_index, size_t 
num_vectors,
+                                       const std::vector<float>& 
flatten_vectors);
+
+void print_search_results(const segment_v2::IndexSearchResult& doris_results,
+                          const std::vector<float>& native_distances,
+                          const std::vector<faiss::idx_t>& native_indices, int 
query_idx);
+
+float get_radius_from_flatten(const float* vector, int dim,
+                              const std::vector<float>& flatten_vectors, float 
percentile);
+float get_radius_from_matrix(const float* vector, int dim,
+                             const std::vector<std::vector<float>>& 
matrix_vectors,
+                             float percentile);
+// Helper function to compare search results between Doris and native Faiss
+void compare_search_results(const segment_v2::IndexSearchResult& doris_results,
+                            const std::vector<float>& native_distances,
+                            const std::vector<faiss::idx_t>& native_indices,
+                            float abs_error = 1e-5);
+
+// result is a vector of pairs, where each pair contains the labels and 
distance
+// result is sorted by labels
+std::vector<std::pair<int, float>> 
perform_native_index_range_search(faiss::Index* index,
+                                                                     const 
float* query_vector,
+                                                                     float 
radius);
+
+std::unique_ptr<doris::segment_v2::IndexSearchResult> 
perform_doris_index_range_search(
+        segment_v2::VectorIndex* index, const float* query_vector, float 
radius,
+        const segment_v2::IndexSearchParameters& params);
+
 class MockIndexFileReader : public ::doris::segment_v2::IndexFileReader {
 public:
     MockIndexFileReader()
@@ -83,8 +147,6 @@ public:
 
     ~MockAnnIndexIterator() override = default;
 
-    IndexType type() override { return IndexType::ANN; }
-
     MOCK_METHOD(Status, read_from_index, (const doris::segment_v2::IndexParam& 
param), (override));
     MOCK_METHOD(Status, range_search,
                 (const segment_v2::RangeSearchParams& params,
@@ -95,7 +157,7 @@ public:
 private:
     io::IOContext _io_ctx_mock;
 };
-} // namespace doris::vec_search_mock
+} // namespace doris::vector_search_utils
 
 namespace doris::vectorized {
 template <typename T>
@@ -189,7 +251,7 @@ protected:
         virtual_slot_ref_node.__isset.opcode = false;
 
         _virtual_slot_ref_expr.nodes.push_back(virtual_slot_ref_node);
-        _ann_index_iterator = 
std::make_unique<vec_search_mock::MockAnnIndexIterator>();
+        _ann_index_iterator = 
std::make_unique<vector_search_utils::MockAnnIndexIterator>();
 
         _row_desc = RowDescriptor(*_desc_tbl, {0}, {false});
 
@@ -210,7 +272,7 @@ protected:
 private:
     doris::ObjectPool obj_pool;
     RowDescriptor _row_desc;
-    std::unique_ptr<vec_search_mock::MockAnnIndexIterator> _ann_index_iterator;
+    std::unique_ptr<vector_search_utils::MockAnnIndexIterator> 
_ann_index_iterator;
     vectorized::IColumn::MutablePtr _result_column;
     doris::TExpr _virtual_slot_ref_expr;
     DescriptorTbl* _desc_tbl;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to