This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 1aa57a3b130 branch-2.1: [fix](array index) Correct null bitmap writing 
for inverted index #47846 (#48214)
1aa57a3b130 is described below

commit 1aa57a3b130795b21def9a122ab00f2ff843cfcd
Author: airborne12 <jiang...@selectdb.com>
AuthorDate: Tue Feb 25 20:31:18 2025 +0800

    branch-2.1: [fix](array index) Correct null bitmap writing for inverted 
index #47846 (#48214)
    
    cherry pick from #47846 #48231
---
 be/src/olap/rowset/segment_v2/column_writer.cpp    |   10 +-
 .../rowset/segment_v2/inverted_index_writer.cpp    |   57 +-
 .../olap/rowset/segment_v2/inverted_index_writer.h |    2 +-
 be/src/olap/task/index_builder.cpp                 |   57 +-
 .../segment_v2/inverted_index_array_test.cpp       | 1005 +++++++++++++++++++-
 .../inverted_index_p0/test_add_index_for_arr.out   |  Bin 0 -> 187 bytes
 .../test_add_index_for_arr.groovy                  |   43 +-
 7 files changed, 1065 insertions(+), 109 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index bdbfcdc2d41..7c71c55598f 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -507,7 +507,9 @@ Status ScalarColumnWriter::init() {
                         return Status::OK();
                     }
                     Status add_nulls(uint32_t count) override { return 
Status::OK(); }
-                    Status add_array_nulls(uint32_t row_id) override { return 
Status::OK(); }
+                    Status add_array_nulls(const uint8_t* null_map, size_t 
num_rows) override {
+                        return Status::OK();
+                    }
                     Status finish() override { return Status::OK(); }
                     int64_t size() const override { return 0; }
                     int64_t file_size() const override { return 0; }
@@ -1018,11 +1020,7 @@ Status ArrayColumnWriter::append_nullable(const uint8_t* 
null_map, const uint8_t
     RETURN_IF_ERROR(append_data(ptr, num_rows));
     if (is_nullable()) {
         if (_opts.need_inverted_index) {
-            for (int row_id = 0; row_id < num_rows; row_id++) {
-                if (null_map[row_id] == 1) {
-                    
RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(row_id));
-                }
-            }
+            RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(null_map, 
num_rows));
         }
         RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
     }
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 4e503685e68..64c373db166 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -323,8 +323,26 @@ public:
         return Status::OK();
     }
 
-    Status add_array_nulls(uint32_t row_id) override {
-        _null_bitmap.add(row_id);
+    Status add_array_nulls(const uint8_t* null_map, size_t num_rows) override {
+        DCHECK(_rid >= num_rows);
+        if (num_rows == 0 || null_map == nullptr) {
+            return Status::OK();
+        }
+        std::vector<uint32_t> null_indices;
+        null_indices.reserve(num_rows / 8);
+
+        // because _rid is the row id in block, not segment, and we add data 
before we add nulls,
+        // so we need to subtract num_rows to get the row id in segment
+        for (size_t i = 0; i < num_rows; i++) {
+            if (null_map[i] == 1) {
+                null_indices.push_back(_rid - num_rows + 
static_cast<uint32_t>(i));
+            }
+        }
+
+        if (!null_indices.empty()) {
+            _null_bitmap.addMany(null_indices.size(), null_indices.data());
+        }
+
         return Status::OK();
     }
 
@@ -384,8 +402,11 @@ public:
         return Status::OK();
     }
 
-    Status add_array_values(size_t field_size, const void* value_ptr, const 
uint8_t* null_map,
-                            const uint8_t* offsets_ptr, size_t count) override 
{
+    Status add_array_values(size_t field_size, const void* value_ptr,
+                            const uint8_t* nested_null_map, const uint8_t* 
offsets_ptr,
+                            size_t count) override {
+        
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_count_is_zero",
+                        { count = 0; })
         if (count == 0) {
             // no values to add inverted index
             return Status::OK();
@@ -408,7 +429,7 @@ public:
                 lucene::document::Field* new_field = nullptr;
                 CL_NS(analysis)::TokenStream* ts = nullptr;
                 for (auto j = start_off; j < start_off + array_elem_size; ++j) 
{
-                    if (null_map[j] == 1) {
+                    if (nested_null_map && nested_null_map[j] == 1) {
                         continue;
                     }
                     auto* v = (Slice*)((const uint8_t*)value_ptr + j * 
field_size);
@@ -471,7 +492,7 @@ public:
             for (int i = 0; i < count; ++i) {
                 auto array_elem_size = offsets[i + 1] - offsets[i];
                 for (size_t j = start_off; j < start_off + array_elem_size; 
++j) {
-                    if (null_map[j] == 1) {
+                    if (nested_null_map && nested_null_map[j] == 1) {
                         continue;
                     }
                     const CppType* p = &reinterpret_cast<const 
CppType*>(value_ptr)[j];
@@ -488,6 +509,12 @@ public:
     Status add_array_values(size_t field_size, const CollectionValue* values,
                             size_t count) override {
         if constexpr (field_is_slice_type(field_type)) {
+            
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_field_is_nullptr",
+                            { _field = nullptr; })
+            DBUG_EXECUTE_IF(
+                    
"InvertedIndexColumnWriterImpl::add_array_values_index_writer_is_"
+                    "nullptr",
+                    { _index_writer = nullptr; })
             if (_field == nullptr || _index_writer == nullptr) {
                 LOG(ERROR) << "field or index writer is null in inverted index 
writer.";
                 return Status::InternalError(
@@ -548,9 +575,10 @@ public:
             std::string new_value;
             size_t value_length = sizeof(CppType);
 
-            
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_error",
 {
-                _CLTHROWA(CL_ERR_IllegalArgument, ("packedValue should be 
length=xxx"));
-            });
+            DBUG_EXECUTE_IF(
+                    
"InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_"
+                    "error",
+                    { _CLTHROWA(CL_ERR_IllegalArgument, ("packedValue should 
be length=xxx")); });
 
             _value_key_coder->full_encode_ascending(&value, &new_value);
             _bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, 
_rid);
@@ -614,8 +642,8 @@ public:
                                 _bkd_writer->finish(data_out.get(), 
index_out.get()),
                                 int(field_type));
                     } else {
-                        LOG(WARNING)
-                                << "Inverted index writer create output error 
occurred: nullptr";
+                        LOG(WARNING) << "Inverted index writer create output 
error "
+                                        "occurred: nullptr";
                         _CLTHROWA(CL_ERR_IO, "Create output error with 
nullptr");
                     }
                     meta_out->close();
@@ -630,9 +658,12 @@ public:
                     write_null_bitmap(null_bitmap_out.get());
                     close();
                     DBUG_EXECUTE_IF(
-                            
"InvertedIndexWriter._throw_clucene_error_in_fulltext_writer_close", {
+                            
"InvertedIndexWriter._throw_clucene_error_in_fulltext_"
+                            "writer_close",
+                            {
                                 _CLTHROWA(CL_ERR_IO,
-                                          "debug point: test throw error in 
fulltext index writer");
+                                          "debug point: test throw error in 
fulltext "
+                                          "index writer");
                             });
                 }
             } catch (CLuceneError& e) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.h 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.h
index 134dc32287c..45b19263bca 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.h
@@ -64,7 +64,7 @@ public:
                                     size_t count) = 0;
 
     virtual Status add_nulls(uint32_t count) = 0;
-    virtual Status add_array_nulls(uint32_t row_id) = 0;
+    virtual Status add_array_nulls(const uint8_t* null_map, size_t num_rows) = 
0;
 
     virtual Status finish() = 0;
 
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 68232326b0b..94feffcc059 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -509,9 +509,9 @@ Status 
IndexBuilder::_write_inverted_index_data(TabletSchemaSPtr tablet_schema,
             return converted_result.first;
         }
         const auto* ptr = (const uint8_t*)converted_result.second->get_data();
-        if (converted_result.second->get_nullmap()) {
-            RETURN_IF_ERROR(_add_nullable(column_name, writer_sign, 
field.get(),
-                                          
converted_result.second->get_nullmap(), &ptr,
+        const auto* null_map = converted_result.second->get_nullmap();
+        if (null_map) {
+            RETURN_IF_ERROR(_add_nullable(column_name, writer_sign, 
field.get(), null_map, &ptr,
                                           block->rows()));
         } else {
             RETURN_IF_ERROR(_add_data(column_name, writer_sign, field.get(), 
&ptr, block->rows()));
@@ -526,49 +526,44 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
                                    const std::pair<int64_t, int64_t>& 
index_writer_sign,
                                    Field* field, const uint8_t* null_map, 
const uint8_t** ptr,
                                    size_t num_rows) {
-    size_t offset = 0;
-    auto next_run_step = [&]() {
-        size_t step = 1;
-        for (auto i = offset + 1; i < num_rows; ++i) {
-            if (null_map[offset] == null_map[i]) {
-                step++;
-            } else {
-                break;
-            }
-        }
-        return step;
-    };
     // TODO: need to process null data for inverted index
     if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
         DCHECK(field->get_sub_field_count() == 1);
         // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
         const auto* data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
         // total number length
-        auto element_cnt = size_t((unsigned long)(*data_ptr));
         auto offset_data = *(data_ptr + 1);
         const auto* offsets_ptr = (const uint8_t*)offset_data;
         try {
-            if (element_cnt > 0) {
-                auto data = *(data_ptr + 2);
-                auto nested_null_map = *(data_ptr + 3);
-                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
-                        field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
-                        reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
-            }
+            auto data = *(data_ptr + 2);
+            auto nested_null_map = *(data_ptr + 3);
+            
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+                    field->get_sub_field(0)->size(), reinterpret_cast<const 
void*>(data),
+                    reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
+            
DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", {
+                _CLTHROWA(CL_ERR_IO, "debug point: 
_add_nullable_add_array_values_error");
+            })
+            
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_nulls(null_map,
+                                                                               
          num_rows));
         } catch (const std::exception& e) {
             return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
                     "CLuceneError occured: {}", e.what());
         }
-        // we should refresh nullmap for array
-        for (int row_id = 0; row_id < num_rows; row_id++) {
-            if (null_map && null_map[row_id] == 1) {
-                RETURN_IF_ERROR(
-                        
_inverted_index_builders[index_writer_sign]->add_array_nulls(row_id));
-            }
-        }
+
         return Status::OK();
     }
-
+    size_t offset = 0;
+    auto next_run_step = [&]() {
+        size_t step = 1;
+        for (auto i = offset + 1; i < num_rows; ++i) {
+            if (null_map[offset] == null_map[i]) {
+                step++;
+            } else {
+                break;
+            }
+        }
+        return step;
+    };
     try {
         do {
             auto step = next_run_step();
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp
index 74e9827db25..aac5f3c1c7d 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp
@@ -18,17 +18,24 @@
 #include <CLucene.h>
 #include <CLucene/config/repl_wchar.h>
 #include <CLucene/index/IndexReader.h>
+#include <gen_cpp/olap_file.pb.h>
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
 #include <string.h>
 
+#include <map>
 #include <memory>
 #include <string>
 
 #include "gtest/gtest_pred_impl.h"
 #include "io/fs/file_writer.h"
 #include "io/fs/local_file_system.h"
+#include "io/fs/path.h"
+#include "olap/rowset/beta_rowset.h"
 #include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
+#include "olap/rowset/segment_v2/inverted_index_desc.h"
+#include "olap/rowset/segment_v2/inverted_index_file_reader.h"
 #include "olap/rowset/segment_v2/inverted_index_file_writer.h"
 #include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
 #include "olap/rowset/segment_v2/inverted_index_writer.h"
@@ -36,6 +43,7 @@
 #include "olap/tablet_schema.h"
 #include "olap/tablet_schema_helper.h"
 #include "runtime/exec_env.h"
+#include "util/faststring.h"
 #include "util/slice.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_nullable.h"
@@ -50,20 +58,68 @@
 using namespace lucene::index;
 using doris::segment_v2::InvertedIndexFileWriter;
 
-namespace doris {
-namespace segment_v2 {
+namespace doris::segment_v2 {
 
 class InvertedIndexArrayTest : public testing::Test {
+    using ExpectedDocMap = std::map<std::string, std::vector<int>>;
+
 public:
     const std::string kTestDir = "./ut_dir/inverted_index_array_test";
 
-    void check_terms_stats(string dir_str, string file_str) {
-        auto fs = io::global_local_filesystem();
-        std::unique_ptr<DorisCompoundReader> reader = 
std::make_unique<DorisCompoundReader>(
-                DorisFSDirectoryFactory::getDirectory(fs, dir_str.c_str()), 
file_str.c_str(), 4096);
+    void check_terms_stats(std::string index_prefix, ExpectedDocMap* expected,
+                           std::vector<int> expected_null_bitmap = {},
+                           InvertedIndexStorageFormatPB format = 
InvertedIndexStorageFormatPB::V1,
+                           const TabletIndex* index_meta = nullptr) {
+        std::string file_str;
+        if (format == InvertedIndexStorageFormatPB::V1) {
+            file_str = 
InvertedIndexDescriptor::get_index_file_name(index_prefix,
+                                                                    
index_meta->index_id(), "");
+        } else if (format == InvertedIndexStorageFormatPB::V2) {
+            file_str = 
InvertedIndexDescriptor::get_index_file_name(index_prefix);
+        }
+        io::Path path(index_prefix);
+        std::unique_ptr<InvertedIndexFileReader> reader = 
std::make_unique<InvertedIndexFileReader>(
+                io::global_local_filesystem(), path.parent_path(), 
path.filename(), format);
+        auto st = reader->init();
+        EXPECT_EQ(st, Status::OK());
+        auto result = reader->open(index_meta);
+        EXPECT_TRUE(result.has_value()) << "Failed to open compound reader" << 
result.error();
+        auto compound_reader = std::move(result.value());
+        try {
+            CLuceneError err;
+            CL_NS(store)::IndexInput* index_input = nullptr;
+            auto ok = DorisFSDirectory::FSIndexInput::open(
+                    io::global_local_filesystem(), file_str.c_str(), 
index_input, err, 4096);
+            if (!ok) {
+                throw err;
+            }
+
+            std::shared_ptr<roaring::Roaring> null_bitmap = 
std::make_shared<roaring::Roaring>();
+            auto null_bitmap_file_name =
+                    
InvertedIndexDescriptor::get_temporary_null_bitmap_file_name();
+            if (compound_reader->fileExists(null_bitmap_file_name.c_str())) {
+                std::unique_ptr<lucene::store::IndexInput> null_bitmap_in;
+                
assert(compound_reader->openInput(null_bitmap_file_name.c_str(), null_bitmap_in,
+                                                  err, 4096));
+                size_t null_bitmap_size = null_bitmap_in->length();
+                doris::faststring buf;
+                buf.resize(null_bitmap_size);
+                
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(buf.data()), 
null_bitmap_size);
+                *null_bitmap = 
roaring::Roaring::read(reinterpret_cast<char*>(buf.data()), false);
+                EXPECT_TRUE(expected_null_bitmap.size() == 
null_bitmap->cardinality());
+                for (int i : expected_null_bitmap) {
+                    EXPECT_TRUE(null_bitmap->contains(i));
+                }
+            }
+            index_input->close();
+            _CLLDELETE(index_input);
+        } catch (const CLuceneError& e) {
+            EXPECT_TRUE(false) << "CLuceneError: " << e.what();
+        }
+
         std::cout << "Term statistics for " << file_str << std::endl;
         std::cout << "==================================" << std::endl;
-        lucene::store::Directory* dir = reader.get();
+        lucene::store::Directory* dir = compound_reader.get();
 
         IndexReader* r = IndexReader::open(dir);
 
@@ -78,15 +134,31 @@ public:
                     lucene_wcstoutf8string(te->term(false)->text(), 
te->term(false)->textLength());
 
             printf("Term: %s ", token.c_str());
+            if (expected) {
+                auto it = expected->find(token);
+                if (it != expected->end()) {
+                    TermDocs* td = r->termDocs(te->term(false));
+                    std::vector<int> actual_docs;
+                    while (td->next()) {
+                        actual_docs.push_back(td->doc());
+                    }
+                    td->close();
+                    _CLLDELETE(td);
+                    EXPECT_EQ(actual_docs, it->second) << "Term: " << token;
+                }
+            }
             printf("Freq: %d\n", te->docFreq());
         }
         printf("Term count: %d\n\n", nterms);
+        if (expected) {
+            ASSERT_EQ(nterms, expected->size());
+        }
         te->close();
         _CLLDELETE(te);
 
         r->close();
         _CLLDELETE(r);
-        reader->close();
+        compound_reader->close();
     }
 
     void SetUp() override {
@@ -99,7 +171,7 @@ public:
         paths.emplace_back(kTestDir, 1024);
         auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
         st = tmp_file_dirs->init();
-        if (!st.OK()) {
+        if (!st.ok()) {
             std::cout << "init tmp file dirs error:" << st.to_string() << 
std::endl;
             return;
         }
@@ -109,16 +181,43 @@ public:
         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok());
     }
 
-    void test_string(std::string testname, Field* field) {
+    // create a TabletSchema with an array column (and a normal int column as 
key)
+    TabletSchemaSPtr create_schema_with_array(KeysType keys_type = DUP_KEYS) {
+        TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+        TabletSchemaPB tablet_schema_pb;
+        tablet_schema_pb.set_keys_type(keys_type);
+
+        tablet_schema->init_from_pb(tablet_schema_pb);
+        TabletColumn array;
+        array.set_name("arr1");
+        array.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY);
+        array.set_length(0);
+        array.set_index_length(0);
+        array.set_is_nullable(false);
+        array.set_is_bf_column(false);
+        TabletColumn child;
+        child.set_name("arr_sub_string");
+        child.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+        child.set_length(INT_MAX);
+        array.add_sub_column(child);
+        tablet_schema->append_column(array);
+        return tablet_schema;
+    }
+
+    void test_non_null_string(int64_t rowset_id, int seg_id, Field* field) {
         EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
-        std::string filename = kTestDir + "/" + testname;
+        RowsetId rowset_id_obj;
+        rowset_id_obj.init(rowset_id);
+        std::string index_path_prefix =
+                BetaRowset::segment_file_path(kTestDir, rowset_id_obj, seg_id);
+        int index_id = 26033;
+        std::string index_path =
+                
InvertedIndexDescriptor::get_index_file_name(index_path_prefix, index_id, "");
         auto fs = io::global_local_filesystem();
 
-        io::FileWriterPtr file_writer;
-        EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
         auto index_meta_pb = std::make_unique<TabletIndexPB>();
         index_meta_pb->set_index_type(IndexType::INVERTED);
-        index_meta_pb->set_index_id(26033);
+        index_meta_pb->set_index_id(index_id);
         index_meta_pb->set_index_name("index_inverted_arr1");
         index_meta_pb->clear_col_unique_id();
         index_meta_pb->add_col_unique_id(0);
@@ -126,16 +225,100 @@ public:
         TabletIndex idx_meta;
         idx_meta.index_type();
         idx_meta.init_from_pb(*index_meta_pb.get());
+        io::Path path(index_path_prefix);
         auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
-                fs, file_writer->path().parent_path(), 
file_writer->path().filename(),
-                InvertedIndexStorageFormatPB::V1);
+                fs, path.parent_path(), path.filename(), 
InvertedIndexStorageFormatPB::V1);
         std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
_inverted_index_builder = nullptr;
         EXPECT_EQ(InvertedIndexColumnWriter::create(field, 
&_inverted_index_builder,
                                                     index_file_writer.get(), 
&idx_meta),
                   Status::OK());
-        vectorized::PaddedPODArray<Slice> _slice;
-        _slice.resize(5);
 
+        // Construct two arrays: The first row is ["amory","doris"], and the 
second row is ["amory", "commiter"]
+        vectorized::Array a1, a2;
+        a1.push_back("amory");
+        a1.push_back("doris");
+        a2.push_back("amory");
+        a2.push_back("commiter");
+
+        // Construct array type: DataTypeArray(DataTypeString)
+        vectorized::DataTypePtr s1 = 
std::make_shared<vectorized::DataTypeString>();
+        vectorized::DataTypePtr array_type = 
std::make_shared<vectorized::DataTypeArray>(s1);
+        vectorized::MutableColumnPtr col = array_type->create_column();
+        col->insert(a1);
+        col->insert(a2);
+        vectorized::ColumnPtr column_array = std::move(col);
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
array_type, "arr1");
+
+        // Put the array column into the Block (assuming only this column)
+        vectorized::Block block;
+        block.insert(type_and_name);
+        // block.rows() should be 2
+
+        // Use OlapBlockDataConvertor to convert
+        // Note: Here we need a TabletSchema object, in this example we 
construct a simple schema,
+        // Assuming that the 0th column in the schema is our array column (the 
actual UT has the corresponding TabletColumn)
+        TabletSchemaSPtr tablet_schema = create_schema_with_array();
+        vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0});
+        convertor.set_source_content(&block, 0, block.rows());
+        auto [st, accessor] = convertor.convert_column_data(0);
+        EXPECT_EQ(st, Status::OK());
+        // The conversion result is actually an array of 4 pointers:
+        //   [0]: Total number of elements (elem_cnt)
+        //   [1]: Offsets array pointer
+        //   [2]: Nested item data pointer
+        //   [3]: Nested nullmap pointer
+        const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+        const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+        const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+        const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+
+        // Get the length of the subfield, used for inverted index writing
+        auto field_size = field->get_sub_field(0)->size();
+        // Call the inverted index writing interface, passing in item_data, 
item_nullmap, offsets_ptr, and the number of rows (the number of array rows in 
the Block)
+        st = _inverted_index_builder->add_array_values(field_size, item_data, 
item_nullmap,
+                                                       offsets_ptr, 
block.rows());
+        EXPECT_EQ(st, Status::OK());
+        const auto* null_map = accessor->get_nullmap();
+        // add nulls
+        st = _inverted_index_builder->add_array_nulls(null_map, block.rows());
+        EXPECT_EQ(st, Status::OK());
+
+        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
+        EXPECT_EQ(index_file_writer->close(), Status::OK());
+
+        ExpectedDocMap expected = {{"amory", {0, 1}}, {"doris", {0}}, 
{"commiter", {1}}};
+        check_terms_stats(index_path_prefix, &expected, {}, 
InvertedIndexStorageFormatPB::V1,
+                          &idx_meta);
+    }
+
+    void test_string(int64_t rowset_id, int seg_id, Field* field) {
+        EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
+        RowsetId rowset_id_obj;
+        rowset_id_obj.init(rowset_id);
+        std::string index_path_prefix =
+                BetaRowset::segment_file_path(kTestDir, rowset_id_obj, seg_id);
+        int index_id = 26033;
+        std::string index_path =
+                
InvertedIndexDescriptor::get_index_file_name(index_path_prefix, index_id, "");
+        auto fs = io::global_local_filesystem();
+        auto index_meta_pb = std::make_unique<TabletIndexPB>();
+        index_meta_pb->set_index_type(IndexType::INVERTED);
+        index_meta_pb->set_index_id(index_id);
+        index_meta_pb->set_index_name("index_inverted_arr1");
+        index_meta_pb->clear_col_unique_id();
+        index_meta_pb->add_col_unique_id(0);
+        TabletIndex idx_meta;
+        idx_meta.index_type();
+        idx_meta.init_from_pb(*index_meta_pb.get());
+        io::Path path(index_path_prefix);
+        auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
+                fs, path.parent_path(), path.filename(), 
InvertedIndexStorageFormatPB::V1);
+        std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
_inverted_index_builder = nullptr;
+        EXPECT_EQ(InvertedIndexColumnWriter::create(field, 
&_inverted_index_builder,
+                                                    index_file_writer.get(), 
&idx_meta),
+                  Status::OK());
+
+        // Construct two arrays: The first row is ["amory","doris"], and the 
second row is [NULL, "amory", "commiter"]
         vectorized::Array a1, a2;
         a1.push_back("amory");
         a1.push_back("doris");
@@ -143,36 +326,725 @@ public:
         a2.push_back("amory");
         a2.push_back("commiter");
 
+        // Construct array type: 
DataTypeArray(DataTypeNullable(DataTypeString))
         vectorized::DataTypePtr s1 = 
std::make_shared<vectorized::DataTypeNullable>(
                 std::make_shared<vectorized::DataTypeString>());
-        vectorized::DataTypePtr au = 
std::make_shared<vectorized::DataTypeArray>(s1);
-        vectorized::MutableColumnPtr col = au->create_column();
+        vectorized::DataTypePtr array_type = 
std::make_shared<vectorized::DataTypeArray>(s1);
+        vectorized::MutableColumnPtr col = array_type->create_column();
         col->insert(a1);
         col->insert(a2);
         vectorized::ColumnPtr column_array = std::move(col);
-        vectorized::ColumnWithTypeAndName type_and_name(column_array, au, 
"arr1");
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
array_type, "arr1");
+
+        // Put the array column into the Block (assuming only this column)
+        vectorized::Block block;
+        block.insert(type_and_name);
+        // block.rows() should be 2
+
+        // Use OlapBlockDataConvertor to convert
+        // Note: Here we need a TabletSchema object, in this example we 
construct a simple schema,
+        // Assuming that the 0th column in the schema is our array column (the 
actual UT has the corresponding TabletColumn)
+        TabletSchemaSPtr tablet_schema = create_schema_with_array();
+        vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0});
+        convertor.set_source_content(&block, 0, block.rows());
+        auto [st, accessor] = convertor.convert_column_data(0);
+        EXPECT_EQ(st, Status::OK());
+        // The conversion result is actually an array of 4 pointers:
+        //   [0]: Total number of elements (elem_cnt)
+        //   [1]: Offsets array pointer
+        //   [2]: Nested item data pointer
+        //   [3]: Nested nullmap pointer
+        const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+        const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+        const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+        const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+
+        // Get the length of the subfield, used for inverted index writing
+        auto field_size = field->get_sub_field(0)->size();
+        // Call the inverted index writing interface, passing in item_data, 
item_nullmap, offsets_ptr, and the number of rows (the number of array rows in 
the Block)
+        st = _inverted_index_builder->add_array_values(field_size, item_data, 
item_nullmap,
+                                                       offsets_ptr, 
block.rows());
+        EXPECT_EQ(st, Status::OK());
+        const auto* null_map = accessor->get_nullmap();
+        // add nulls
+        st = _inverted_index_builder->add_array_nulls(null_map, block.rows());
+        EXPECT_EQ(st, Status::OK());
+        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
+        EXPECT_EQ(index_file_writer->close(), Status::OK());
+
+        ExpectedDocMap expected = {{"amory", {0, 1}}, {"doris", {0}}, 
{"commiter", {1}}};
+        check_terms_stats(index_path_prefix, &expected, {}, 
InvertedIndexStorageFormatPB::V1,
+                          &idx_meta);
+    }
+
+    void test_null_write_v2(int64_t rowset_id, int seg_id, Field* field) {
+        EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
+        RowsetId rowset_id_obj;
+        rowset_id_obj.init(rowset_id);
+        std::string index_path_prefix =
+                BetaRowset::segment_file_path(kTestDir, rowset_id_obj, seg_id);
+        int index_id = 26033;
+        std::string index_path = 
InvertedIndexDescriptor::get_index_file_name(index_path_prefix);
+        auto fs = io::global_local_filesystem();
+
+        auto index_meta_pb = std::make_unique<TabletIndexPB>();
+        index_meta_pb->set_index_type(IndexType::INVERTED);
+        index_meta_pb->set_index_id(index_id);
+        index_meta_pb->set_index_name("index_inverted_arr1");
+        index_meta_pb->clear_col_unique_id();
+        index_meta_pb->add_col_unique_id(0);
+
+        TabletIndex idx_meta;
+        idx_meta.index_type();
+        idx_meta.init_from_pb(*index_meta_pb.get());
+        io::Path path(index_path_prefix);
+        auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
+                fs, path.parent_path(), path.filename(), 
InvertedIndexStorageFormatPB::V2);
+        std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
_inverted_index_builder = nullptr;
+        EXPECT_EQ(InvertedIndexColumnWriter::create(field, 
&_inverted_index_builder,
+                                                    index_file_writer.get(), 
&idx_meta),
+                  Status::OK());
+
+        // Simulate outer null cases: 5 rows, outer null map = {1, 0, 0, 1, 
0}, i.e., rows 0 and 3 are null
+        std::vector<uint8_t> outer_null_map = {1, 0, 0, 1, 0};
+
+        // Construct inner array type: 
DataTypeArray(DataTypeNullable(DataTypeString))
+        vectorized::DataTypePtr inner_string_type = 
std::make_shared<vectorized::DataTypeNullable>(
+                std::make_shared<vectorized::DataTypeString>());
+        vectorized::DataTypePtr array_type =
+                std::make_shared<vectorized::DataTypeArray>(inner_string_type);
+        // To support outer array null values, wrap it in a Nullable type
+        vectorized::DataTypePtr final_type =
+                std::make_shared<vectorized::DataTypeNullable>(array_type);
+
+        // Construct 5 rows of data:
+        // Row 0: null
+        // Row 1: a2 = [Null, "test"]
+        // Row 2: a3 = ["mixed", Null, "data"]
+        // Row 3: null
+        // Row 4: a5 = ["non-null"]
+        vectorized::MutableColumnPtr col = final_type->create_column();
+        // Row 0: insert null
+        col->insert(vectorized::Null());
+        // Row 1: insert a2
+        vectorized::Array a2;
+        a2.push_back(vectorized::Null());
+        a2.push_back("test");
+        col->insert(a2);
+        // Row 2: insert a3
+        vectorized::Array a3;
+        a3.push_back("mixed");
+        a3.push_back(vectorized::Null());
+        a3.push_back("data");
+        col->insert(a3);
+        // Row 3: insert null
+        col->insert(vectorized::Null());
+        // Row 4: insert a5
+        vectorized::Array a5;
+        a5.push_back("non-null");
+        col->insert(a5);
+
+        vectorized::ColumnPtr column_array = std::move(col);
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
final_type, "arr1");
+
+        // Construct Block, containing only the array column, with 5 rows
+        vectorized::Block block;
+        block.insert(type_and_name);
+
+        // Construct TabletSchema (containing the array column) - reference 
the existing helper function
+        TabletSchemaSPtr tablet_schema = create_schema_with_array();
+        // In this schema, assume the 0th column is the key, and the arr1 
column is the non-key column with index 1
+        vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0});
+        convertor.set_source_content(&block, 0, block.rows());
+
+        // Convert array column data
+        auto [st, accessor] = convertor.convert_column_data(0);
+        EXPECT_EQ(st, Status::OK());
+        // OlapColumnDataConvertorArray conversion result is a 4-tuple:
+        //   [0]: element total count (elem_cnt, not used directly)
+        //   [1]: offsets array pointer
+        //   [2]: nested item data conversion result pointer
+        //   [3]: nested nullmap pointer
+        const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+        const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+        const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+        const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+
+        // Call the inverted index writing interface, passing in the converted 
nested data, nullmap, and offsets
+        auto field_size = field->get_sub_field(0)->size();
+        st = _inverted_index_builder->add_array_values(field_size, item_data, 
item_nullmap,
+                                                       offsets_ptr, 
block.rows());
+        EXPECT_EQ(st, Status::OK());
+        const auto* null_map = accessor->get_nullmap();
+        // add nulls
+        st = _inverted_index_builder->add_array_nulls(null_map, block.rows());
+        EXPECT_EQ(st, Status::OK());
+        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
+        EXPECT_EQ(index_file_writer->close(), Status::OK());
+
+        // Expected inverted index result: only index non-null elements
+        // Row 1: non-null in a2 is "test"
+        // Row 2: non-null in a3 is "mixed" and "data"
+        // Row 4: non-null in a5 is "non-null"
+        ExpectedDocMap expected = {{"test", {1}}, {"mixed", {2}}, {"data", 
{2}}, {"non-null", {4}}};
+        std::vector<int> expected_null_bitmap = {0, 3};
+        check_terms_stats(index_path_prefix, &expected, expected_null_bitmap,
+                          InvertedIndexStorageFormatPB::V2, &idx_meta);
+    }
+
+    void test_null_write(int64_t rowset_id, int seg_id, Field* field) {
+        EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
+        RowsetId rowset_id_obj;
+        rowset_id_obj.init(rowset_id);
+        std::string index_path_prefix =
+                BetaRowset::segment_file_path(kTestDir, rowset_id_obj, seg_id);
+        int index_id = 26033;
+        std::string index_path =
+                
InvertedIndexDescriptor::get_index_file_name(index_path_prefix, index_id, "");
+        auto fs = io::global_local_filesystem();
+
+        auto index_meta_pb = std::make_unique<TabletIndexPB>();
+        index_meta_pb->set_index_type(IndexType::INVERTED);
+        index_meta_pb->set_index_id(index_id);
+        index_meta_pb->set_index_name("index_inverted_arr1");
+        index_meta_pb->clear_col_unique_id();
+        index_meta_pb->add_col_unique_id(0);
 
-        vectorized::PaddedPODArray<vectorized::UInt64> _offsets;
-        _offsets.reserve(3);
-        _offsets.emplace_back(0);
-        _offsets.emplace_back(2);
-        _offsets.emplace_back(5);
-        const uint8_t* offsets_ptr = (const uint8_t*)(_offsets.data());
+        TabletIndex idx_meta;
+        idx_meta.index_type();
+        idx_meta.init_from_pb(*index_meta_pb.get());
+        io::Path path(index_path_prefix);
+        auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
+                fs, path.parent_path(), path.filename(), 
InvertedIndexStorageFormatPB::V1);
+        std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
_inverted_index_builder = nullptr;
+        EXPECT_EQ(InvertedIndexColumnWriter::create(field, 
&_inverted_index_builder,
+                                                    index_file_writer.get(), 
&idx_meta),
+                  Status::OK());
 
-        auto* col_arr = assert_cast<const 
vectorized::ColumnArray*>(column_array.get());
+        // Simulate outer null cases: 5 rows, outer null map = {1, 0, 0, 1, 
0}, i.e., rows 0 and 3 are null
+        std::vector<uint8_t> outer_null_map = {1, 0, 0, 1, 0};
+
+        // Construct inner array type: 
DataTypeArray(DataTypeNullable(DataTypeString))
+        vectorized::DataTypePtr inner_string_type = 
std::make_shared<vectorized::DataTypeNullable>(
+                std::make_shared<vectorized::DataTypeString>());
+        vectorized::DataTypePtr array_type =
+                std::make_shared<vectorized::DataTypeArray>(inner_string_type);
+        // To support outer array null values, wrap it in a Nullable type
+        vectorized::DataTypePtr final_type =
+                std::make_shared<vectorized::DataTypeNullable>(array_type);
+
+        // Construct 5 rows of data:
+        // Row 0: null
+        // Row 1: a2 = [Null, "test"]
+        // Row 2: a3 = ["mixed", Null, "data"]
+        // Row 3: null
+        // Row 4: a5 = ["non-null"]
+        vectorized::MutableColumnPtr col = final_type->create_column();
+        // Row 0: insert null
+        col->insert(vectorized::Null());
+        // Row 1: insert a2
+        vectorized::Array a2;
+        a2.push_back(vectorized::Null());
+        a2.push_back("test");
+        col->insert(a2);
+        // Row 2: insert a3
+        vectorized::Array a3;
+        a3.push_back("mixed");
+        a3.push_back(vectorized::Null());
+        a3.push_back("data");
+        col->insert(a3);
+        // Row 3: insert null
+        col->insert(vectorized::Null());
+        // Row 4: insert a5
+        vectorized::Array a5;
+        a5.push_back("non-null");
+        col->insert(a5);
+
+        vectorized::ColumnPtr column_array = std::move(col);
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
final_type, "arr1");
+
+        // Construct Block, containing only the array column, with 5 rows
+        vectorized::Block block;
+        block.insert(type_and_name);
+
+        // Construct TabletSchema (containing the array column) - reference 
the existing helper function
+        TabletSchemaSPtr tablet_schema = create_schema_with_array();
+        // In this schema, assume the 0th column is the key, and the arr1 
column is the non-key column with index 1
+        vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0});
+        convertor.set_source_content(&block, 0, block.rows());
+
+        // Convert array column data
+        auto [st, accessor] = convertor.convert_column_data(0);
+        EXPECT_EQ(st, Status::OK());
+        // OlapColumnDataConvertorArray conversion result is a 4-tuple:
+        //   [0]: element total count (elem_cnt, not used directly)
+        //   [1]: offsets array pointer
+        //   [2]: nested item data conversion result pointer
+        //   [3]: nested nullmap pointer
+        const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+        const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+        const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+        const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+
+        // Call the inverted index writing interface, passing in the converted 
nested data, nullmap, and offsets
+        auto field_size = field->get_sub_field(0)->size();
+        st = _inverted_index_builder->add_array_values(field_size, item_data, 
item_nullmap,
+                                                       offsets_ptr, 
block.rows());
+        EXPECT_EQ(st, Status::OK());
+        const auto* null_map = accessor->get_nullmap();
+        // add nulls
+        st = _inverted_index_builder->add_array_nulls(null_map, block.rows());
+        EXPECT_EQ(st, Status::OK());
+        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
+        EXPECT_EQ(index_file_writer->close(), Status::OK());
+
+        // Expected inverted index result: only index non-null elements
+        // Row 1: non-null in a2 is "test"
+        // Row 2: non-null in a3 is "mixed" and "data"
+        // Row 4: non-null in a5 is "non-null"
+        ExpectedDocMap expected = {{"test", {1}}, {"mixed", {2}}, {"data", 
{2}}, {"non-null", {4}}};
+        std::vector<int> expected_null_bitmap = {0, 3};
+        check_terms_stats(index_path_prefix, &expected, expected_null_bitmap,
+                          InvertedIndexStorageFormatPB::V1, &idx_meta);
+    }
+
+    void test_multi_block_write(int64_t rowset_id, int seg_id, Field* field) {
+        EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
+        RowsetId rowset_id_obj;
+        rowset_id_obj.init(rowset_id);
+        std::string index_path_prefix =
+                BetaRowset::segment_file_path(kTestDir, rowset_id_obj, seg_id);
+        int index_id = 26033;
+        std::string index_path =
+                
InvertedIndexDescriptor::get_index_file_name(index_path_prefix, index_id, "");
+        auto fs = io::global_local_filesystem();
+
+        auto index_meta_pb = std::make_unique<TabletIndexPB>();
+        index_meta_pb->set_index_type(IndexType::INVERTED);
+        index_meta_pb->set_index_id(index_id);
+        index_meta_pb->set_index_name("index_inverted_arr1");
+        index_meta_pb->clear_col_unique_id();
+        index_meta_pb->add_col_unique_id(0);
+
+        TabletIndex idx_meta;
+        idx_meta.init_from_pb(*index_meta_pb.get());
+        io::Path path(index_path_prefix);
+        auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
+                fs, path.parent_path(), path.filename(), 
InvertedIndexStorageFormatPB::V1);
+        std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
_inverted_index_builder = nullptr;
+        EXPECT_EQ(InvertedIndexColumnWriter::create(field, 
&_inverted_index_builder,
+                                                    index_file_writer.get(), 
&idx_meta),
+                  Status::OK());
+
+        ExpectedDocMap merged_expected;
+
+        // --- Block 1 ---
+        {
+            const int row_num = 4;
+            // construct data type: Nullable( Array( Nullable(String) ) )
+            vectorized::DataTypePtr inner_string = 
std::make_shared<vectorized::DataTypeNullable>(
+                    std::make_shared<vectorized::DataTypeString>());
+            vectorized::DataTypePtr array_type =
+                    std::make_shared<vectorized::DataTypeArray>(inner_string);
+            vectorized::DataTypePtr final_type =
+                    std::make_shared<vectorized::DataTypeNullable>(array_type);
+
+            // construct MutableColumn
+            vectorized::MutableColumnPtr col = final_type->create_column();
+            // simulate outer null: row0 and row3 are null, the rest are 
non-null
+            col->insert(vectorized::Null()); // row0: null
+            {
+                // row1: non-null, array with 1 element: "block1_data1"
+                vectorized::Array arr;
+                arr.push_back("block1_data1");
+                col->insert(arr);
+            }
+            {
+                // row2: non-null, array with 1 element: "block1_data2"
+                vectorized::Array arr;
+                arr.push_back("block1_data2");
+                col->insert(arr);
+            }
+            col->insert(vectorized::Null()); // row3: null
+
+            vectorized::ColumnPtr column_array = std::move(col);
+            vectorized::ColumnWithTypeAndName type_and_name(column_array, 
final_type, "arr1");
+
+            // construct Block (containing only the arr1 column)
+            vectorized::Block block;
+            block.insert(type_and_name);
+
+            // use TabletSchema containing the array column (arr1 is the 
non-key column with index 1 in the schema)
+            TabletSchemaSPtr tablet_schema = create_schema_with_array();
+            vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), 
{0});
+            convertor.set_source_content(&block, 0, block.rows());
+
+            // convert the arr1 column in the block
+            auto [st, accessor] = convertor.convert_column_data(0);
+            EXPECT_EQ(st, Status::OK());
+            // the conversion result is a 4-tuple: [0]: element count, [1]: 
offsets pointer, [2]: item data, [3]: item nullmap
+            const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+            const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+            const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+            const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+            auto field_size = field->get_sub_field(0)->size();
+            st = _inverted_index_builder->add_array_values(field_size, 
item_data, item_nullmap,
+                                                           offsets_ptr, 
row_num);
+            EXPECT_EQ(st, Status::OK());
+            const auto* null_map = accessor->get_nullmap();
+            // add nulls
+            st = _inverted_index_builder->add_array_nulls(null_map, row_num);
+            EXPECT_EQ(st, Status::OK());
+
+            // for Block1, the expected non-null behavior is row1 and row2
+            ExpectedDocMap expected = {{"block1_data1", {1}}, {"block1_data2", 
{2}}};
+            merged_expected.insert(expected.begin(), expected.end());
+        }
+
+        // --- Block 2 ---
+        {
+            const int row_num = 2;
+            vectorized::DataTypePtr inner_string = 
std::make_shared<vectorized::DataTypeNullable>(
+                    std::make_shared<vectorized::DataTypeString>());
+            vectorized::DataTypePtr array_type =
+                    std::make_shared<vectorized::DataTypeArray>(inner_string);
+            vectorized::DataTypePtr final_type =
+                    std::make_shared<vectorized::DataTypeNullable>(array_type);
+
+            vectorized::MutableColumnPtr col = final_type->create_column();
+            // row0: non-null, array with 1 element: "block2_data1"
+            {
+                vectorized::Array arr;
+                arr.push_back("block2_data1");
+                col->insert(arr);
+            }
+            // row1: null
+            col->insert(vectorized::Null());
+
+            vectorized::ColumnPtr column_array = std::move(col);
+            vectorized::ColumnWithTypeAndName type_and_name(column_array, 
final_type, "arr1");
+
+            vectorized::Block block;
+            block.insert(type_and_name);
+
+            TabletSchemaSPtr tablet_schema = create_schema_with_array();
+            vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), 
{0});
+            convertor.set_source_content(&block, 0, block.rows());
+
+            auto [st, accessor] = convertor.convert_column_data(0);
+            EXPECT_EQ(st, Status::OK());
+            const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+            const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+            const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+            const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+
+            auto field_size = field->get_sub_field(0)->size();
+            st = _inverted_index_builder->add_array_values(field_size, 
item_data, item_nullmap,
+                                                           offsets_ptr, 
row_num);
+            EXPECT_EQ(st, Status::OK());
+            const auto* null_map = accessor->get_nullmap();
+            // add nulls
+            st = _inverted_index_builder->add_array_nulls(null_map, row_num);
+            EXPECT_EQ(st, Status::OK());
+
+            ExpectedDocMap expected = {{"block2_data1", {4}}};
+            merged_expected.insert(expected.begin(), expected.end());
+        }
+
+        // --- Block 3 ---
+        {
+            const int row_num = 2;
+            vectorized::DataTypePtr inner_string = 
std::make_shared<vectorized::DataTypeNullable>(
+                    std::make_shared<vectorized::DataTypeString>());
+            vectorized::DataTypePtr array_type =
+                    std::make_shared<vectorized::DataTypeArray>(inner_string);
+            vectorized::DataTypePtr final_type =
+                    std::make_shared<vectorized::DataTypeNullable>(array_type);
+
+            vectorized::MutableColumnPtr col = final_type->create_column();
+            // row0: non-null, array with 1 element: "block3_data1"
+            {
+                vectorized::Array arr;
+                arr.push_back("block3_data1");
+                col->insert(arr);
+            }
+            // row1: null
+            col->insert(vectorized::Null());
+
+            vectorized::ColumnPtr column_array = std::move(col);
+            vectorized::ColumnWithTypeAndName type_and_name(column_array, 
final_type, "arr1");
+
+            vectorized::Block block;
+            block.insert(type_and_name);
+
+            TabletSchemaSPtr tablet_schema = create_schema_with_array();
+            vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), 
{0});
+            convertor.set_source_content(&block, 0, block.rows());
+
+            auto [st, accessor] = convertor.convert_column_data(0);
+            EXPECT_EQ(st, Status::OK());
+            const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+            const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+            const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+            const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+            auto field_size = field->get_sub_field(0)->size();
+            st = _inverted_index_builder->add_array_values(field_size, 
item_data, item_nullmap,
+                                                           offsets_ptr, 
row_num);
+            EXPECT_EQ(st, Status::OK());
+            const auto* null_map = accessor->get_nullmap();
+            // add nulls
+            st = _inverted_index_builder->add_array_nulls(null_map, row_num);
+            EXPECT_EQ(st, Status::OK());
+
+            ExpectedDocMap expected = {{"block3_data1", {6}}};
+            merged_expected.insert(expected.begin(), expected.end());
+        }
+
+        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
+        EXPECT_EQ(index_file_writer->close(), Status::OK());
+
+        std::vector<int> expected_null_bitmap = {0, 3, 5, 7};
+        check_terms_stats(index_path_prefix, &merged_expected, 
expected_null_bitmap,
+                          InvertedIndexStorageFormatPB::V1, &idx_meta);
+    }
+
+    void test_array_all_null(int64_t rowset_id, int seg_id, Field* field) {
+        EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
+        RowsetId rowset_id_obj;
+        rowset_id_obj.init(rowset_id);
+        std::string index_path_prefix =
+                BetaRowset::segment_file_path(kTestDir, rowset_id_obj, seg_id);
+        int index_id = 26034;
+        std::string index_path =
+                
InvertedIndexDescriptor::get_index_file_name(index_path_prefix, index_id, "");
+        auto fs = io::global_local_filesystem();
+
+        auto index_meta_pb = std::make_unique<TabletIndexPB>();
+        index_meta_pb->set_index_type(IndexType::INVERTED);
+        index_meta_pb->set_index_id(index_id);
+        index_meta_pb->set_index_name("index_inverted_arr_all_null");
+        index_meta_pb->clear_col_unique_id();
+        index_meta_pb->add_col_unique_id(0);
+
+        TabletIndex idx_meta;
+        idx_meta.init_from_pb(*index_meta_pb.get());
+        io::Path path(index_path_prefix);
+        auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
+                fs, path.parent_path(), path.filename(), 
InvertedIndexStorageFormatPB::V1);
+        std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
_inverted_index_builder = nullptr;
+        EXPECT_EQ(InvertedIndexColumnWriter::create(field, 
&_inverted_index_builder,
+                                                    index_file_writer.get(), 
&idx_meta),
+                  Status::OK());
+
+        // Construct inner array type: 
DataTypeArray(DataTypeNullable(DataTypeString))
+        vectorized::DataTypePtr inner_string_type = 
std::make_shared<vectorized::DataTypeNullable>(
+                std::make_shared<vectorized::DataTypeString>());
+        vectorized::DataTypePtr array_type =
+                std::make_shared<vectorized::DataTypeArray>(inner_string_type);
+        // To support outer array null values, wrap it in a Nullable type
+        vectorized::DataTypePtr final_type =
+                std::make_shared<vectorized::DataTypeNullable>(array_type);
+
+        vectorized::MutableColumnPtr col = final_type->create_column();
+        col->insert(vectorized::Null());
+        col->insert(vectorized::Null());
+
+        vectorized::ColumnPtr column_array = std::move(col);
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
final_type, "arr1");
+
+        vectorized::Block block;
+        block.insert(type_and_name);
+
+        TabletSchemaSPtr tablet_schema = create_schema_with_array();
+        vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0});
+        convertor.set_source_content(&block, 0, block.rows());
+
+        auto [st, accessor] = convertor.convert_column_data(0);
+        EXPECT_EQ(st, Status::OK());
+        const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+        const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+        const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+        const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+        const auto* null_map = accessor->get_nullmap();
+
+        auto field_size = field->get_sub_field(0)->size();
+        st = _inverted_index_builder->add_array_values(field_size, item_data, 
item_nullmap,
+                                                       offsets_ptr, 
block.rows());
+        EXPECT_EQ(st, Status::OK());
+        st = _inverted_index_builder->add_array_nulls(null_map, block.rows());
+        EXPECT_EQ(st, Status::OK());
+
+        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
+        EXPECT_EQ(index_file_writer->close(), Status::OK());
+
+        std::vector<int> expected_null_bitmap = {0, 1};
+        ExpectedDocMap expected {};
+        check_terms_stats(index_path_prefix, &expected, expected_null_bitmap,
+                          InvertedIndexStorageFormatPB::V1, &idx_meta);
+    }
+
+    void test_array_numeric(int64_t rowset_id, int seg_id, Field* field) {
+        EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
+        RowsetId rowset_id_obj;
+        rowset_id_obj.init(rowset_id);
+        std::string index_path_prefix =
+                BetaRowset::segment_file_path(kTestDir, rowset_id_obj, seg_id);
+        int index_id = 26033;
+        std::string index_path =
+                
InvertedIndexDescriptor::get_index_file_name(index_path_prefix, index_id, "");
+        auto fs = io::global_local_filesystem();
+
+        auto index_meta_pb = std::make_unique<TabletIndexPB>();
+        index_meta_pb->set_index_type(IndexType::INVERTED);
+        index_meta_pb->set_index_id(index_id);
+        index_meta_pb->set_index_name("index_inverted_arr_numeric");
+        index_meta_pb->clear_col_unique_id();
+        index_meta_pb->add_col_unique_id(0);
+
+        TabletIndex idx_meta;
+        idx_meta.init_from_pb(*index_meta_pb.get());
+        io::Path path(index_path_prefix);
+        auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
+                fs, path.parent_path(), path.filename(), 
InvertedIndexStorageFormatPB::V1);
+        std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
_inverted_index_builder = nullptr;
+        EXPECT_EQ(InvertedIndexColumnWriter::create(field, 
&_inverted_index_builder,
+                                                    index_file_writer.get(), 
&idx_meta),
+                  Status::OK());
+
+        vectorized::DataTypePtr inner_int = 
std::make_shared<vectorized::DataTypeInt32>();
+        vectorized::DataTypePtr array_type = 
std::make_shared<vectorized::DataTypeArray>(inner_int);
+        vectorized::DataTypePtr final_type =
+                std::make_shared<vectorized::DataTypeNullable>(array_type);
+
+        // create a MutableColumnPtr
+        vectorized::MutableColumnPtr col = final_type->create_column();
+        // row0: non-null, array [123, 456]
+        {
+            vectorized::Array arr;
+            arr.push_back(123);
+            arr.push_back(456);
+            col->insert(arr);
+        }
+        // row1: null
+        col->insert(vectorized::Null());
+        // row2: non-null, array [789, 101112]
+        {
+            vectorized::Array arr;
+            arr.push_back(789);
+            arr.push_back(101112);
+            col->insert(arr);
+        }
+        // wrap the constructed column into a ColumnWithTypeAndName
+        vectorized::ColumnPtr column_array = std::move(col);
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
final_type, "arr_num");
+
+        // construct Block (containing only this column), with 3 rows
+        vectorized::Block block;
+        block.insert(type_and_name);
+
+        TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+        TabletSchemaPB tablet_schema_pb;
+        tablet_schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
+        tablet_schema->init_from_pb(tablet_schema_pb);
+        TabletColumn array;
+        array.set_name("arr1");
+        array.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY);
+        array.set_length(0);
+        array.set_index_length(0);
+        array.set_is_nullable(false);
+        array.set_is_bf_column(false);
+        TabletColumn child;
+        child.set_name("arr_sub_int");
+        child.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+        child.set_length(INT_MAX);
+        array.add_sub_column(child);
+        tablet_schema->append_column(array);
+
+        vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0});
+        convertor.set_source_content(&block, 0, block.rows());
+        auto [st, accessor] = convertor.convert_column_data(0);
+        EXPECT_EQ(st, Status::OK());
+        // the conversion result is a 4-tuple: [0]: element total count, [1]: 
offsets pointer, [2]: item data, [3]: item nullmap
+        const auto* data_ptr = reinterpret_cast<const 
uint64_t*>(accessor->get_data());
+        const auto* offsets_ptr = reinterpret_cast<const 
uint8_t*>(data_ptr[1]);
+        const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
+        const auto* item_nullmap = reinterpret_cast<const 
uint8_t*>(data_ptr[3]);
+
+        // get the size of the sub field (4 bytes for INT type)
+        auto field_size = field->get_sub_field(0)->size();
+        st = _inverted_index_builder->add_array_values(field_size, item_data, 
item_nullmap,
+                                                       offsets_ptr, 
block.rows());
+        EXPECT_EQ(st, Status::OK());
+        const auto* null_map = accessor->get_nullmap();
+        // add nulls
+        st = _inverted_index_builder->add_array_nulls(null_map, block.rows());
+        EXPECT_EQ(st, Status::OK());
+        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
+        EXPECT_EQ(index_file_writer->close(), Status::OK());
+
+        // expected inverted index: row0 contains "123" and "456" (doc id 0), 
row1 is null, row2 contains "789" and "101112" (doc id 2)
+        ExpectedDocMap expected = {{"123", {0}}, {"456", {0}}, {"789", {2}}, 
{"101112", {2}}};
+        std::vector<int> expected_null_bitmap = {1};
+
+        std::unique_ptr<InvertedIndexFileReader> reader = 
std::make_unique<InvertedIndexFileReader>(
+                io::global_local_filesystem(), path.parent_path(), 
path.filename(),
+                InvertedIndexStorageFormatPB::V1);
+        auto sts = reader->init();
+        EXPECT_EQ(sts, Status::OK());
+        auto result = reader->open(&idx_meta);
+        EXPECT_TRUE(result.has_value()) << "Failed to open compound reader" << 
result.error();
+        auto compound_reader = std::move(result.value());
+        try {
+            CLuceneError err;
+            CL_NS(store)::IndexInput* index_input = nullptr;
+            auto ok = DorisFSDirectory::FSIndexInput::open(
+                    io::global_local_filesystem(), index_path.c_str(), 
index_input, err, 4096);
+            if (!ok) {
+                throw err;
+            }
+
+            std::shared_ptr<roaring::Roaring> null_bitmap = 
std::make_shared<roaring::Roaring>();
+            auto null_bitmap_file_name =
+                    
InvertedIndexDescriptor::get_temporary_null_bitmap_file_name();
+            if (compound_reader->fileExists(null_bitmap_file_name.c_str())) {
+                std::unique_ptr<lucene::store::IndexInput> null_bitmap_in;
+                
assert(compound_reader->openInput(null_bitmap_file_name.c_str(), null_bitmap_in,
+                                                  err, 4096));
+                size_t null_bitmap_size = null_bitmap_in->length();
+                doris::faststring buf;
+                buf.resize(null_bitmap_size);
+                
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(buf.data()), 
null_bitmap_size);
+                *null_bitmap = 
roaring::Roaring::read(reinterpret_cast<char*>(buf.data()), false);
+                assert(expected_null_bitmap.size() == 
null_bitmap->cardinality());
+                for (int i : expected_null_bitmap) {
+                    EXPECT_TRUE(null_bitmap->contains(i));
+                }
+            }
+            index_input->close();
+            _CLLDELETE(index_input);
+        } catch (const CLuceneError& e) {
+            EXPECT_TRUE(false) << "CLuceneError: " << e.what();
+        }
+    }
+
+private:
+    static void build_slices(vectorized::PaddedPODArray<Slice>& slices,
+                             const vectorized::ColumnPtr& column_array, size_t 
num_strings) {
+        const auto* col_arr = assert_cast<const 
vectorized::ColumnArray*>(column_array.get());
         const vectorized::UInt8* nested_null_map =
                 assert_cast<const 
vectorized::ColumnNullable*>(col_arr->get_data_ptr().get())
-                        ->get_null_map_data()
+                        ->get_null_map_column()
+                        .get_data()
                         .data();
-        auto* col_arr_str = assert_cast<const vectorized::ColumnString*>(
+        const auto* col_arr_str = assert_cast<const vectorized::ColumnString*>(
                 assert_cast<const 
vectorized::ColumnNullable*>(col_arr->get_data_ptr().get())
                         ->get_nested_column_ptr()
                         .get());
         const char* char_data = (const char*)(col_arr_str->get_chars().data());
         const vectorized::ColumnString::Offset* offset_cur = 
col_arr_str->get_offsets().data();
-        const vectorized::ColumnString::Offset* offset_end = offset_cur + 5;
-
-        Slice* slice = _slice.data();
+        const vectorized::ColumnString::Offset* offset_end = offset_cur + 
num_strings;
+        Slice* slice = slices.data();
         size_t string_offset = *(offset_cur - 1);
         const vectorized::UInt8* nullmap_cur = nested_null_map;
         while (offset_cur != offset_end) {
@@ -188,21 +1060,6 @@ public:
             ++slice;
             ++offset_cur;
         }
-
-        auto field_size = field->get_sub_field(0)->size();
-        Status st = _inverted_index_builder->add_array_values(
-                field_size, reinterpret_cast<const void*>(_slice.data()),
-                reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, 2);
-        EXPECT_EQ(st, Status::OK());
-        EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
-        EXPECT_EQ(index_file_writer->close(), Status::OK());
-
-        {
-            std::cout << "dir: " << file_writer->path().parent_path().string() 
<< std::endl;
-            string idx_file_name = file_writer->path().filename().string() + 
"_26033.idx";
-            std::cout << "file: " << file_writer->path().filename().string() 
<< std::endl;
-            check_terms_stats(file_writer->path().parent_path().string(), 
idx_file_name);
-        }
     }
 };
 
@@ -217,9 +1074,55 @@ TEST_F(InvertedIndexArrayTest, ArrayString) {
     arraySubColumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
     arrayTabletColumn.add_sub_column(arraySubColumn);
     Field* field = FieldFactory::create(arrayTabletColumn);
-    test_string("InvertedIndexArray", field);
+    test_string(0, 0, field);
+    test_non_null_string(1, 0, field);
+    delete field;
+}
+
+TEST_F(InvertedIndexArrayTest, ComplexNullCases) {
+    TabletColumn arrayTabletColumn;
+    arrayTabletColumn.set_unique_id(0);
+    arrayTabletColumn.set_name("arr1");
+    arrayTabletColumn.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY);
+    TabletColumn arraySubColumn;
+    arraySubColumn.set_unique_id(1);
+    arraySubColumn.set_name("arr_sub_string");
+    arraySubColumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    arrayTabletColumn.add_sub_column(arraySubColumn);
+    Field* field = FieldFactory::create(arrayTabletColumn);
+    test_null_write(2, 0, field);
+    test_null_write_v2(3, 0, field);
+    test_array_all_null(4, 0, field);
     delete field;
 }
 
-} // namespace segment_v2
-} // namespace doris
+TEST_F(InvertedIndexArrayTest, MultiBlockWrite) {
+    TabletColumn arrayTabletColumn;
+    arrayTabletColumn.set_unique_id(0);
+    arrayTabletColumn.set_name("arr1");
+    arrayTabletColumn.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY);
+    TabletColumn arraySubColumn;
+    arraySubColumn.set_unique_id(1);
+    arraySubColumn.set_name("arr_sub_string");
+    arraySubColumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    arrayTabletColumn.add_sub_column(arraySubColumn);
+    Field* field = FieldFactory::create(arrayTabletColumn);
+    test_multi_block_write(5, 0, field);
+    delete field;
+}
+
+TEST_F(InvertedIndexArrayTest, ArrayInt) {
+    TabletColumn arrayTabletColumn;
+    arrayTabletColumn.set_unique_id(0);
+    arrayTabletColumn.set_name("arr1");
+    arrayTabletColumn.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY);
+    TabletColumn arraySubColumn;
+    arraySubColumn.set_unique_id(1);
+    arraySubColumn.set_name("arr_sub_int");
+    arraySubColumn.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    arrayTabletColumn.add_sub_column(arraySubColumn);
+    Field* field = FieldFactory::create(arrayTabletColumn);
+    test_array_numeric(6, 0, field);
+    delete field;
+}
+} // namespace doris::segment_v2
diff --git a/regression-test/data/inverted_index_p0/test_add_index_for_arr.out 
b/regression-test/data/inverted_index_p0/test_add_index_for_arr.out
new file mode 100644
index 00000000000..9bb146c0df5
Binary files /dev/null and 
b/regression-test/data/inverted_index_p0/test_add_index_for_arr.out differ
diff --git 
a/regression-test/suites/inverted_index_p0/test_add_index_for_arr.groovy 
b/regression-test/suites/inverted_index_p0/test_add_index_for_arr.groovy
index 6f3e772dd08..78bec2d11b0 100644
--- a/regression-test/suites/inverted_index_p0/test_add_index_for_arr.groovy
+++ b/regression-test/suites/inverted_index_p0/test_add_index_for_arr.groovy
@@ -106,9 +106,9 @@ suite("test_add_index_for_arr") {
 
     // query without inverted index
     // query rows with array_contains
-    def sql_query_name1 = sql "select id, name[1], description[1] from 
my_test_array where array_contains(name,'text7')"
+    def sql_query_name1 = sql "select id, name[1], description[1] from 
my_test_array where array_contains(name,'text7') order by id"
     // query rows with !array_contains
-    def sql_query_name2 = sql "select id, name[1], description[1] from 
my_test_array where !array_contains(name,'text7')"
+    def sql_query_name2 = sql "select id, name[1], description[1] from 
my_test_array where !array_contains(name,'text7') order by id"
 
     // add index for name
     sql "ALTER TABLE my_test_array ADD INDEX name_idx (name) USING INVERTED;"
@@ -122,9 +122,9 @@ suite("test_add_index_for_arr") {
     // query with inverted index
     sql "set enable_inverted_index_query=true"
     // query rows with array_contains
-    def sql_query_name1_inverted = sql "select id, name[1], description[1] 
from my_test_array where array_contains(name,'text7')"
+    def sql_query_name1_inverted = sql "select id, name[1], description[1] 
from my_test_array where array_contains(name,'text7') order by id"
     // query rows with !array_contains
-    def sql_query_name2_inverted = sql "select id, name[1], description[1] 
from my_test_array where !array_contains(name,'text7')"
+    def sql_query_name2_inverted = sql "select id, name[1], description[1] 
from my_test_array where !array_contains(name,'text7') order by id"
 
     // check result for query without inverted index and with inverted index
     def size1 = sql_query_name1.size();
@@ -147,9 +147,38 @@ suite("test_add_index_for_arr") {
     sql "drop index name_idx on my_test_array"
     wait_for_latest_op_on_table_finish("my_test_array", timeout)
 
-    def sql_query_name1_without_inverted = sql "select id, name[1], 
description[1] from my_test_array where array_contains(name,'text7')"
-    def sql_query_name2_without_inverted = sql "select id, name[1], 
description[1] from my_test_array where !array_contains(name,'text7')"
+    def sql_query_name1_without_inverted = sql "select id, name[1], 
description[1] from my_test_array where array_contains(name,'text7') order by 
id"
+    def sql_query_name2_without_inverted = sql "select id, name[1], 
description[1] from my_test_array where !array_contains(name,'text7') order by 
id"
 
     assertEquals(sql_query_name1.size(), 
sql_query_name1_without_inverted.size())
     assertEquals(sql_query_name2.size(), 
sql_query_name2_without_inverted.size())
-}
+
+    def table_name = "test_add_index_for_arr_all_null"
+    sql "DROP TABLE IF EXISTS ${table_name}"
+    sql """
+            CREATE TABLE IF NOT EXISTS ${table_name} (
+                `id` int(11) NULL,
+                `name` ARRAY<text> NULL,
+            )
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            properties("replication_num" = "1");
+    """
+
+    sql "insert into ${table_name} values (1, null), (2, null)"
+    sql "ALTER TABLE ${table_name} ADD INDEX name_idx (name) USING INVERTED;"
+    wait_for_latest_op_on_table_finish("${table_name}", timeout)
+    // build index for name that name data can using inverted index
+    if (!isCloudMode()) {
+        sql "BUILD INDEX name_idx ON ${table_name}"
+        wait_for_build_index_on_partition_finish("${table_name}", timeout)
+    }
+
+    qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from 
${table_name} where array_contains(name, 'text7') order by id"
+    qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from 
${table_name} where !array_contains(name, 'text7') order by id"
+    qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from 
${table_name} where name is null order by id"
+
+    qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from 
${table_name} where array_contains(name, 'text7') order by id"
+    qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from 
${table_name} where !array_contains(name, 'text7') order by id"
+    qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from 
${table_name} where name is null order by id"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to