This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 4d49ccb1cc8 branch-4.0: [feat](Ann Index)Support create index and 
build index for ANN #55586 (#58651)
4d49ccb1cc8 is described below

commit 4d49ccb1cc8c71af49ea6bcb99a8f5b378ba5630
Author: Jack <[email protected]>
AuthorDate: Wed Dec 3 17:42:03 2025 +0800

    branch-4.0: [feat](Ann Index)Support create index and build index for ANN 
#55586 (#58651)
    
    cherry pick from #55586
---
 be/src/olap/rowset/segment_v2/column_writer.cpp    |   8 +-
 be/src/olap/rowset/segment_v2/column_writer.h      |   2 +-
 be/src/olap/rowset/segment_v2/index_writer.cpp     | 114 ++++---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  42 ++-
 be/src/olap/rowset/segment_v2/segment_iterator.h   |   5 +-
 be/src/olap/tablet_schema.cpp                      |   4 +-
 be/src/olap/tablet_schema.h                        |   2 +
 be/src/olap/task/index_builder.cpp                 | 166 ++++++---
 be/src/olap/task/index_builder.h                   |   2 +-
 be/test/olap/index_builder_test.cpp                | 369 ++++++++++++++++++++-
 .../olap/vector_search/ann_index_writer_test.cpp   |   2 +
 .../apache/doris/alter/SchemaChangeHandler.java    |   6 +-
 .../main/java/org/apache/doris/catalog/Index.java  |   9 +-
 .../trees/plans/commands/info/BuildIndexOp.java    |  14 +-
 .../trees/plans/commands/info/CreateIndexOp.java   |   6 -
 .../ann_index_p0/build_ann_index_test.groovy       | 112 +++++++
 .../ann_index_p0/create_ann_index_test.groovy      |  23 +-
 17 files changed, 731 insertions(+), 155 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 461a5046e71..2707525e26a 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -919,7 +919,7 @@ Status ArrayColumnWriter::init() {
     if (_opts.need_inverted_index) {
         auto* writer = dynamic_cast<ScalarColumnWriter*>(_item_writer.get());
         if (writer != nullptr) {
-            RETURN_IF_ERROR(IndexColumnWriter::create(get_field(), 
&_inverted_index_builder,
+            RETURN_IF_ERROR(IndexColumnWriter::create(get_field(), 
&_inverted_index_writer,
                                                       _opts.index_file_writer,
                                                       
_opts.inverted_indexes[0]));
         }
@@ -937,7 +937,7 @@ Status ArrayColumnWriter::init() {
 
 Status ArrayColumnWriter::write_inverted_index() {
     if (_opts.need_inverted_index) {
-        return _inverted_index_builder->finish();
+        return _inverted_index_writer->finish();
     }
     return Status::OK();
 }
@@ -969,7 +969,7 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, 
size_t num_rows) {
         // now only support nested type is scala
         if (writer != nullptr) {
             //NOTE: use array field name as index field, but item_writer size 
should be used when moving item_data_ptr
-            RETURN_IF_ERROR(_inverted_index_builder->add_array_values(
+            RETURN_IF_ERROR(_inverted_index_writer->add_array_values(
                     _item_writer->get_field()->size(), reinterpret_cast<const 
void*>(data),
                     reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
         }
@@ -1005,7 +1005,7 @@ Status ArrayColumnWriter::append_nullable(const uint8_t* 
null_map, const uint8_t
     RETURN_IF_ERROR(append_data(ptr, num_rows));
     if (is_nullable()) {
         if (_opts.need_inverted_index) {
-            RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(null_map, 
num_rows));
+            RETURN_IF_ERROR(_inverted_index_writer->add_array_nulls(null_map, 
num_rows));
         }
         RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
     }
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h 
b/be/src/olap/rowset/segment_v2/column_writer.h
index 4f42d6bb750..0315b6bb4af 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -426,7 +426,7 @@ private:
     std::unique_ptr<OffsetColumnWriter> _offset_writer;
     std::unique_ptr<ScalarColumnWriter> _null_writer;
     std::unique_ptr<ColumnWriter> _item_writer;
-    std::unique_ptr<IndexColumnWriter> _inverted_index_builder;
+    std::unique_ptr<IndexColumnWriter> _inverted_index_writer;
     std::unique_ptr<AnnIndexColumnWriter> _ann_index_writer;
     ColumnWriterOptions _opts;
 };
diff --git a/be/src/olap/rowset/segment_v2/index_writer.cpp 
b/be/src/olap/rowset/segment_v2/index_writer.cpp
index d5cf7f11b03..70c51b7be13 100644
--- a/be/src/olap/rowset/segment_v2/index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/index_writer.cpp
@@ -17,6 +17,7 @@
 
 #include "common/exception.h"
 #include "olap/field.h"
+#include "olap/rowset/segment_v2/ann_index/ann_index_writer.h"
 #include "olap/rowset/segment_v2/inverted_index_writer.h"
 
 namespace doris::segment_v2 {
@@ -40,10 +41,11 @@ bool IndexColumnWriter::check_support_inverted_index(const 
TabletColumn& column)
 }
 
 bool IndexColumnWriter::check_support_ann_index(const TabletColumn& column) {
-    // bellow types are not supported in inverted index for extracted columns
+    // only array are supported in ann index
     return column.is_array_type();
 }
 
+// create index writer
 Status IndexColumnWriter::create(const Field* field, 
std::unique_ptr<IndexColumnWriter>* res,
                                  IndexFileWriter* index_file_writer,
                                  const TabletIndex* index_meta) {
@@ -62,64 +64,78 @@ Status IndexColumnWriter::create(const Field* field, 
std::unique_ptr<IndexColumn
             field_name = std::to_string(field->unique_id());
         }
     }
-    bool single_field = true;
-    if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
-        const auto* array_typeinfo = dynamic_cast<const 
ArrayTypeInfo*>(typeinfo);
-        
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_array_typeinfo_is_nullptr",
-                        { array_typeinfo = nullptr; })
-        if (array_typeinfo != nullptr) {
-            typeinfo = array_typeinfo->item_type_info();
-            type = typeinfo->type();
-            single_field = false;
-        } else {
-            return Status::NotSupported("unsupported array type for inverted 
index: " +
-                                        std::to_string(int(type)));
+
+    if (index_meta->is_inverted_index()) {
+        bool single_field = true;
+        if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
+            const auto* array_typeinfo = dynamic_cast<const 
ArrayTypeInfo*>(typeinfo);
+            
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_array_typeinfo_is_nullptr",
+                            { array_typeinfo = nullptr; })
+            if (array_typeinfo != nullptr) {
+                typeinfo = array_typeinfo->item_type_info();
+                type = typeinfo->type();
+                single_field = false;
+            } else {
+                return Status::NotSupported("unsupported array type for 
inverted index: " +
+                                            std::to_string(int(type)));
+            }
         }
-    }
 
-    
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_unsupported_type_for_inverted_index",
-                    { type = FieldType::OLAP_FIELD_TYPE_JSONB; })
-    switch (type) {
+        
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_unsupported_type_for_inverted_index",
+                        { type = FieldType::OLAP_FIELD_TYPE_JSONB; })
+        switch (type) {
 #define M(TYPE)                                                                
                 \
     case TYPE:                                                                 
                 \
         *res = std::make_unique<InvertedIndexColumnWriter<TYPE>>(field_name, 
index_file_writer, \
                                                                  index_meta, 
single_field);     \
         break;
-        M(FieldType::OLAP_FIELD_TYPE_TINYINT)
-        M(FieldType::OLAP_FIELD_TYPE_SMALLINT)
-        M(FieldType::OLAP_FIELD_TYPE_INT)
-        M(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT)
-        M(FieldType::OLAP_FIELD_TYPE_BIGINT)
-        M(FieldType::OLAP_FIELD_TYPE_LARGEINT)
-        M(FieldType::OLAP_FIELD_TYPE_CHAR)
-        M(FieldType::OLAP_FIELD_TYPE_VARCHAR)
-        M(FieldType::OLAP_FIELD_TYPE_STRING)
-        M(FieldType::OLAP_FIELD_TYPE_DATE)
-        M(FieldType::OLAP_FIELD_TYPE_DATETIME)
-        M(FieldType::OLAP_FIELD_TYPE_DECIMAL)
-        M(FieldType::OLAP_FIELD_TYPE_DATEV2)
-        M(FieldType::OLAP_FIELD_TYPE_DATETIMEV2)
-        M(FieldType::OLAP_FIELD_TYPE_DECIMAL32)
-        M(FieldType::OLAP_FIELD_TYPE_DECIMAL64)
-        M(FieldType::OLAP_FIELD_TYPE_DECIMAL128I)
-        M(FieldType::OLAP_FIELD_TYPE_DECIMAL256)
-        M(FieldType::OLAP_FIELD_TYPE_BOOL)
-        M(FieldType::OLAP_FIELD_TYPE_IPV4)
-        M(FieldType::OLAP_FIELD_TYPE_IPV6)
-        M(FieldType::OLAP_FIELD_TYPE_FLOAT)
-        M(FieldType::OLAP_FIELD_TYPE_DOUBLE)
+            M(FieldType::OLAP_FIELD_TYPE_TINYINT)
+            M(FieldType::OLAP_FIELD_TYPE_SMALLINT)
+            M(FieldType::OLAP_FIELD_TYPE_INT)
+            M(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT)
+            M(FieldType::OLAP_FIELD_TYPE_BIGINT)
+            M(FieldType::OLAP_FIELD_TYPE_LARGEINT)
+            M(FieldType::OLAP_FIELD_TYPE_CHAR)
+            M(FieldType::OLAP_FIELD_TYPE_VARCHAR)
+            M(FieldType::OLAP_FIELD_TYPE_STRING)
+            M(FieldType::OLAP_FIELD_TYPE_DATE)
+            M(FieldType::OLAP_FIELD_TYPE_DATETIME)
+            M(FieldType::OLAP_FIELD_TYPE_DECIMAL)
+            M(FieldType::OLAP_FIELD_TYPE_DATEV2)
+            M(FieldType::OLAP_FIELD_TYPE_DATETIMEV2)
+            M(FieldType::OLAP_FIELD_TYPE_DECIMAL32)
+            M(FieldType::OLAP_FIELD_TYPE_DECIMAL64)
+            M(FieldType::OLAP_FIELD_TYPE_DECIMAL128I)
+            M(FieldType::OLAP_FIELD_TYPE_DECIMAL256)
+            M(FieldType::OLAP_FIELD_TYPE_BOOL)
+            M(FieldType::OLAP_FIELD_TYPE_IPV4)
+            M(FieldType::OLAP_FIELD_TYPE_IPV6)
+            M(FieldType::OLAP_FIELD_TYPE_FLOAT)
+            M(FieldType::OLAP_FIELD_TYPE_DOUBLE)
 #undef M
-    default:
-        return Status::NotSupported("unsupported type for inverted index: " +
-                                    std::to_string(int(type)));
-    }
-    if (*res != nullptr) {
-        auto st = (*res)->init();
-        if (!st.ok()) {
-            (*res)->close_on_error();
-            return st;
+        default:
+            return Status::NotSupported("unsupported type for inverted index: 
" +
+                                        std::to_string(int(type)));
+        }
+        if (*res != nullptr) {
+            auto st = (*res)->init();
+            if (!st.ok()) {
+                (*res)->close_on_error();
+                return st;
+            }
+        }
+    } else if (index_meta->is_ann_index()) {
+        DCHECK(type == FieldType::OLAP_FIELD_TYPE_ARRAY);
+        *res = std ::make_unique<AnnIndexColumnWriter>(index_file_writer, 
index_meta);
+        if (*res != nullptr) {
+            auto st = (*res)->init();
+            if (!st.ok()) {
+                (*res)->close_on_error();
+                return st;
+            }
         }
     }
+
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index d0301c7677f..1dc1101d6df 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -683,6 +683,13 @@ Status 
SegmentIterator::_get_row_ranges_by_column_conditions() {
     return Status::OK();
 }
 
+bool SegmentIterator::_column_has_ann_index(int32_t cid) {
+    bool has_ann_index = _index_iterators[cid] != nullptr &&
+                         
_index_iterators[cid]->get_reader(AnnIndexReaderType::ANN);
+
+    return has_ann_index;
+}
+
 Status SegmentIterator::_apply_ann_topn_predicate() {
     if (_ann_topn_runtime == nullptr) {
         return Status::OK();
@@ -692,7 +699,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
     size_t src_col_idx = _ann_topn_runtime->get_src_column_idx();
     ColumnId src_cid = _schema->column_id(src_col_idx);
     IndexIterator* ann_index_iterator = _index_iterators[src_cid].get();
-    bool has_ann_index = ann_index_iterator != nullptr;
+    bool has_ann_index = _column_has_ann_index(src_cid);
     bool has_common_expr_push_down = !_common_expr_ctxs_push_down.empty();
     bool has_column_predicate = std::any_of(_is_pred_column.begin(), 
_is_pred_column.end(),
                                             [](bool is_pred) { return is_pred; 
});
@@ -1368,6 +1375,13 @@ Status SegmentIterator::_init_bitmap_index_iterators() {
         return Status::OK();
     }
     for (auto cid : _schema->column_ids()) {
+        const auto& col = _opts.tablet_schema->column(cid);
+        int col_uid = col.unique_id() >= 0 ? col.unique_id() : 
col.parent_unique_id();
+        // The column is not in this segment
+        if (!_segment->_tablet_schema->has_column_unique_id(col_uid)) {
+            continue;
+        }
+
         if (_bitmap_index_iterators[cid] == nullptr) {
             RETURN_IF_ERROR(_segment->new_bitmap_index_iterator(
                     _opts.tablet_schema->column(cid), _opts, 
&_bitmap_index_iterators[cid]));
@@ -1430,14 +1444,14 @@ Status SegmentIterator::_init_index_iterators() {
     for (auto cid : _schema->column_ids()) {
         if (_index_iterators[cid] == nullptr) {
             const auto& column = _opts.tablet_schema->column(cid);
-            int32_t col_unique_id =
-                    column.is_extracted_column() ? column.parent_unique_id() : 
column.unique_id();
-            RETURN_IF_ERROR(_segment->new_index_iterator(
-                    column,
-                    _segment->_tablet_schema->ann_index(col_unique_id, 
column.suffix_path()), _opts,
-                    &_index_iterators[cid]));
-            if (_index_iterators[cid] != nullptr) {
-                _index_iterators[cid]->set_context(_index_query_context);
+            const auto* index_meta = 
_segment->_tablet_schema->ann_index(column);
+            if (index_meta) {
+                RETURN_IF_ERROR(_segment->new_index_iterator(column, 
index_meta, _opts,
+                                                             
&_index_iterators[cid]));
+
+                if (_index_iterators[cid] != nullptr) {
+                    _index_iterators[cid]->set_context(_index_query_context);
+                }
             }
         }
     }
@@ -1631,22 +1645,22 @@ Status SegmentIterator::_seek_columns(const 
std::vector<ColumnId>& column_ids, r
  *   This is an estimate, if we want more precise cost, statistics collection 
is necessary(this is a todo).
  *   In short, when returned non-pred columns contains string/hll/bitmap, we 
using Lazy Materialization.
  *   Otherwise, we disable it.
- *    
+ *
  *   When Lazy Materialization enable, we need to read column at least two 
times.
  *   First time to read Pred col, second time to read non-pred.
  *   Here's an interesting question to research, whether read Pred col once is 
the best plan.
  *   (why not read Pred col twice or more?)
  *
  *   When Lazy Materialization disable, we just need to read once.
- *   
- * 
+ *
+ *
  *  2 Whether the predicate type can be evaluate in a fast way(using SIMD to 
eval pred)
  *    Such as integer type and float type, they can be eval fast.
  *    But for BloomFilter/string/date, they eval slow.
  *    If a type can be eval fast, we use vectorization to eval it.
  *    Otherwise, we use short-circuit to eval it.
- * 
- *  
+ *
+ *
  */
 
 // todo(wb) need a UT here
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 7f765a5f4b4..92925a75ba7 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -174,7 +174,6 @@ private:
     [[nodiscard]] Status _init_bitmap_index_iterators();
     [[nodiscard]] Status _init_index_iterators();
 
-    Status _apply_ann_topn_predicate();
     // calculate row ranges that fall into requested key ranges using short 
key index
     [[nodiscard]] Status _get_row_ranges_by_keys();
     [[nodiscard]] Status _prepare_seek(const StorageReadOptions::KeyRange& 
key_range);
@@ -192,13 +191,17 @@ private:
     // calculate row ranges that satisfy requested column conditions using 
various column index
     [[nodiscard]] Status _get_row_ranges_by_column_conditions();
     [[nodiscard]] Status _get_row_ranges_from_conditions(RowRanges* 
condition_row_ranges);
+
     [[nodiscard]] Status _apply_bitmap_index();
     [[nodiscard]] Status _apply_inverted_index();
     [[nodiscard]] Status _apply_inverted_index_on_column_predicate(
             ColumnPredicate* pred, std::vector<ColumnPredicate*>& 
remaining_predicates,
             bool* continue_apply);
+    [[nodiscard]] Status _apply_ann_topn_predicate();
     [[nodiscard]] Status _apply_index_expr();
+
     bool _column_has_fulltext_index(int32_t cid);
+    bool _column_has_ann_index(int32_t cid);
     bool _downgrade_without_index(Status res, bool need_remaining = false);
     inline bool _inverted_index_not_support_pred_type(const PredicateType& 
type);
     bool _is_literal_node(const TExprNodeType::type& node_type);
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 079cf9278ff..b3331cdfb18 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1544,7 +1544,8 @@ void TabletSchema::update_tablet_columns(const 
TabletSchema& tablet_schema,
 
 bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
     for (size_t i = 0; i < _indexes.size(); i++) {
-        if (_indexes[i]->index_type() == IndexType::INVERTED &&
+        if ((_indexes[i]->index_type() == IndexType::INVERTED ||
+             _indexes[i]->index_type() == IndexType::ANN) &&
             _indexes[i]->index_id() == index_id) {
             return true;
         }
@@ -1645,7 +1646,6 @@ const TabletIndex* TabletSchema::ann_index(int32_t 
col_unique_id,
 }
 
 const TabletIndex* TabletSchema::ann_index(const TabletColumn& col) const {
-    // Some columns(Float, Double, JSONB ...) from the variant do not support 
inverted index
     if (!segment_v2::IndexColumnWriter::check_support_ann_index(col)) {
         return nullptr;
     }
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index b839231464b..429e102c9a2 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -341,6 +341,8 @@ public:
 
     bool is_inverted_index() const { return _index_type == 
IndexType::INVERTED; }
 
+    bool is_ann_index() const { return _index_type == IndexType::ANN; }
+
     void remove_parser_and_analyzer() {
         _properties.erase(INVERTED_INDEX_PARSER_KEY);
         _properties.erase(INVERTED_INDEX_PARSER_KEY_ALIAS);
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 9dd418f4ace..ecfbd2e007b 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -19,6 +19,7 @@
 
 #include <mutex>
 
+#include "common/logging.h"
 #include "common/status.h"
 #include "olap/olap_define.h"
 #include "olap/rowset/beta_rowset.h"
@@ -50,7 +51,7 @@ IndexBuilder::IndexBuilder(StorageEngine& engine, 
TabletSharedPtr tablet,
 
 IndexBuilder::~IndexBuilder() {
     _olap_data_convertor.reset();
-    _inverted_index_builders.clear();
+    _index_column_writers.clear();
 }
 
 Status IndexBuilder::init() {
@@ -113,12 +114,9 @@ Status IndexBuilder::update_inverted_index_info() {
                     }
                 }
                 auto column = output_rs_tablet_schema->column(column_idx);
+
+                // inverted index
                 auto index_metas = 
output_rs_tablet_schema->inverted_indexs(column);
-                if (index_metas.empty()) {
-                    LOG(ERROR) << "failed to find column: " << column_name
-                               << " index_id: " << t_inverted_index.index_id;
-                    continue;
-                }
                 for (const auto& index_meta : index_metas) {
                     if 
(output_rs_tablet_schema->get_inverted_index_storage_format() ==
                         InvertedIndexStorageFormatPB::V1) {
@@ -143,7 +141,20 @@ Status IndexBuilder::update_inverted_index_info() {
                     // remove dropped index_meta from output rowset tablet 
schema
                     
output_rs_tablet_schema->remove_index(index_meta->index_id());
                 }
+
+                // ann index
+                const auto* ann_index = 
output_rs_tablet_schema->ann_index(column);
+                if (!ann_index) {
+                    continue;
+                }
+                
DCHECK(output_rs_tablet_schema->get_inverted_index_storage_format() !=
+                       InvertedIndexStorageFormatPB::V1);
+                _dropped_inverted_indexes.push_back(*ann_index);
+                // ATTN: DO NOT REMOVE INDEX AFTER OUTPUT_ROWSET_WRITER 
CREATED.
+                // remove dropped index_meta from output rowset tablet schema
+                output_rs_tablet_schema->remove_index(ann_index->index_id());
             }
+
             
DBUG_EXECUTE_IF("index_builder.update_inverted_index_info.drop_index", {
                 auto indexes_count = 
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
                         "index_builder.update_inverted_index_info.drop_index", 
"indexes_count", 0);
@@ -172,6 +183,8 @@ Status IndexBuilder::update_inverted_index_info() {
                     continue;
                 }
                 const TabletColumn& col = 
output_rs_tablet_schema->column_by_uid(column_uid);
+
+                // inverted index
                 auto exist_indexs = 
output_rs_tablet_schema->inverted_indexs(col);
                 for (const auto& exist_index : exist_indexs) {
                     if (exist_index->index_id() != index.index_id()) {
@@ -188,6 +201,21 @@ Status IndexBuilder::update_inverted_index_info() {
                         }
                     }
                 }
+
+                // ann index
+                const auto* exist_index = 
output_rs_tablet_schema->ann_index(col);
+                if (exist_index && exist_index->index_id() != 
index.index_id()) {
+                    if (exist_index->is_same_except_id(&index)) {
+                        LOG(WARNING) << fmt::format(
+                                "column: {} has a exist ann index, but the 
index id not "
+                                "equal request's index id, , exist index id: 
{}, request's index "
+                                "id: {}, remove exist index in new 
output_rs_tablet_schema",
+                                column_uid, exist_index->index_id(), 
index.index_id());
+                        without_index_uids.insert(exist_index->index_id());
+                        
output_rs_tablet_schema->remove_index(exist_index->index_id());
+                    }
+                }
+
                 output_rs_tablet_schema->append_index(std::move(index));
             }
         }
@@ -357,7 +385,7 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
         LOG(INFO) << "all row nums. source_rows=" << 
output_rowset_meta->num_rows();
         return Status::OK();
     } else {
-        // create inverted index writer
+        // create inverted or ann index writer
         const auto& fs = output_rowset_meta->fs();
         auto output_rowset_schema = output_rowset_meta->tablet_schema();
         size_t inverted_index_size = 0;
@@ -402,8 +430,10 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
                         fs, index_path_prefix, 
output_rowset_meta->rowset_id().to_string(),
                         seg_ptr->id(), 
output_rowset_schema->get_inverted_index_storage_format());
             }
-            // create inverted index writer
+            // create inverted index writer, or ann index writer
             for (auto inverted_index : _alter_inverted_indexes) {
+                DCHECK(inverted_index.index_type == TIndexType::INVERTED ||
+                       inverted_index.index_type == TIndexType::ANN);
                 DCHECK_EQ(inverted_index.columns.size(), 1);
                 auto index_id = inverted_index.index_id;
                 auto column_name = inverted_index.columns[0];
@@ -425,44 +455,79 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
                 // variant column is not support for building index
                 auto is_support_inverted_index =
                         
IndexColumnWriter::check_support_inverted_index(column);
+                auto is_support_ann_index = 
IndexColumnWriter::check_support_ann_index(column);
                 
DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_support_inverted_index",
                                 { is_support_inverted_index = false; })
-                if (!is_support_inverted_index) {
+                if (!is_support_inverted_index && !is_support_ann_index) {
                     continue;
                 }
                 
DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id));
                 _olap_data_convertor->add_column_data_convertor(column);
                 return_columns.emplace_back(column_idx);
                 std::unique_ptr<Field> field(FieldFactory::create(column));
-                auto index_metas = 
output_rowset_schema->inverted_indexs(column);
-                for (const auto& index_meta : index_metas) {
-                    if (index_meta->index_id() != index_id) {
-                        continue;
-                    }
-                    std::unique_ptr<segment_v2::IndexColumnWriter> 
inverted_index_builder;
-                    try {
-                        RETURN_IF_ERROR(segment_v2::IndexColumnWriter::create(
-                                field.get(), &inverted_index_builder, 
index_file_writer.get(),
-                                index_meta));
-                        DBUG_EXECUTE_IF(
-                                
"IndexBuilder::handle_single_rowset_index_column_writer_create_"
-                                "error",
-                                {
-                                    _CLTHROWA(CL_ERR_IO,
-                                              "debug point: "
-                                              
"handle_single_rowset_index_column_writer_create_"
-                                              "error");
-                                })
-                    } catch (const std::exception& e) {
-                        return 
Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
-                                "CLuceneError occured: {}", e.what());
+
+                if (inverted_index.index_type == TIndexType::INVERTED) {
+                    // inverted index
+                    auto index_metas = 
output_rowset_schema->inverted_indexs(column);
+                    for (const auto& index_meta : index_metas) {
+                        if (index_meta->index_id() != index_id) {
+                            continue;
+                        }
+                        std::unique_ptr<segment_v2::IndexColumnWriter> 
inverted_index_builder;
+                        try {
+                            
RETURN_IF_ERROR(segment_v2::IndexColumnWriter::create(
+                                    field.get(), &inverted_index_builder, 
index_file_writer.get(),
+                                    index_meta));
+                            DBUG_EXECUTE_IF(
+                                    
"IndexBuilder::handle_single_rowset_index_column_writer_create_"
+                                    "error",
+                                    {
+                                        _CLTHROWA(CL_ERR_IO,
+                                                  "debug point: "
+                                                  
"handle_single_rowset_index_column_writer_create_"
+                                                  "error");
+                                    })
+                        } catch (const std::exception& e) {
+                            return 
Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                                    "CLuceneError occured: {}", e.what());
+                        }
+
+                        if (inverted_index_builder) {
+                            auto writer_sign = std::make_pair(seg_ptr->id(), 
index_id);
+                            _index_column_writers.insert(
+                                    std::make_pair(writer_sign, 
std::move(inverted_index_builder)));
+                            
inverted_index_writer_signs.emplace_back(writer_sign);
+                        }
                     }
+                } else if (inverted_index.index_type == TIndexType::ANN) {
+                    // ann index
+                    const auto* index_meta = 
output_rowset_schema->ann_index(column);
+                    if (index_meta && index_meta->index_id() == index_id) {
+                        std::unique_ptr<segment_v2::IndexColumnWriter> 
index_writer;
+                        try {
+                            
RETURN_IF_ERROR(segment_v2::IndexColumnWriter::create(
+                                    field.get(), &index_writer, 
index_file_writer.get(),
+                                    index_meta));
+                            DBUG_EXECUTE_IF(
+                                    
"IndexBuilder::handle_single_rowset_index_column_writer_create_"
+                                    "error",
+                                    {
+                                        _CLTHROWA(CL_ERR_IO,
+                                                  "debug point: "
+                                                  
"handle_single_rowset_index_column_writer_create_"
+                                                  "error");
+                                    })
+                        } catch (const std::exception& e) {
+                            return 
Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                                    "CLuceneError occured: {}", e.what());
+                        }
 
-                    if (inverted_index_builder) {
-                        auto writer_sign = std::make_pair(seg_ptr->id(), 
index_id);
-                        _inverted_index_builders.insert(
-                                std::make_pair(writer_sign, 
std::move(inverted_index_builder)));
-                        inverted_index_writer_signs.emplace_back(writer_sign);
+                        if (index_writer) {
+                            auto writer_sign = std::make_pair(seg_ptr->id(), 
index_id);
+                            _index_column_writers.insert(
+                                    std::make_pair(writer_sign, 
std::move(index_writer)));
+                            
inverted_index_writer_signs.emplace_back(writer_sign);
+                        }
                     }
                 }
             }
@@ -510,7 +575,7 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
                     return status;
                 }
 
-                // write inverted index data
+                // write inverted index data, or ann index data
                 status = _write_inverted_index_data(output_rowset_schema, 
iter->data_id(),
                                                     block.get());
                 DBUG_EXECUTE_IF(
@@ -529,8 +594,8 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
             // finish write inverted index, flush data to compound file
             for (auto& writer_sign : inverted_index_writer_signs) {
                 try {
-                    if (_inverted_index_builders[writer_sign]) {
-                        
RETURN_IF_ERROR(_inverted_index_builders[writer_sign]->finish());
+                    if (_index_column_writers[writer_sign]) {
+                        
RETURN_IF_ERROR(_index_column_writers[writer_sign]->finish());
                     }
                     
DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_index_build_finish_error", {
                         _CLTHROWA(CL_ERR_IO,
@@ -556,7 +621,7 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
             }
             inverted_index_size += 
index_file_writer->get_index_file_total_size();
         }
-        _inverted_index_builders.clear();
+        _index_column_writers.clear();
         _index_file_writers.clear();
         
output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size());
         
output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() +
@@ -571,7 +636,7 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
 
 Status IndexBuilder::_write_inverted_index_data(TabletSchemaSPtr 
tablet_schema, int64_t segment_idx,
                                                 vectorized::Block* block) {
-    VLOG_DEBUG << "begin to write inverted index";
+    VLOG_DEBUG << "begin to write inverted/ann index";
     // converter block data
     _olap_data_convertor->set_source_content(block, 0, block->rows());
     for (auto i = 0; i < _alter_inverted_indexes.size(); ++i) {
@@ -634,14 +699,14 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
         try {
             auto data = *(data_ptr + 2);
             auto nested_null_map = *(data_ptr + 3);
-            
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+            
RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_array_values(
                     field->get_sub_field(0)->size(), reinterpret_cast<const 
void*>(data),
                     reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
             
DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", {
                 _CLTHROWA(CL_ERR_IO, "debug point: 
_add_nullable_add_array_values_error");
             })
-            
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_nulls(null_map,
-                                                                               
          num_rows));
+            RETURN_IF_ERROR(
+                    
_index_column_writers[index_writer_sign]->add_array_nulls(null_map, num_rows));
         } catch (const std::exception& e) {
             return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
                     "CLuceneError occured: {}", e.what());
@@ -665,11 +730,11 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
         do {
             auto step = next_run_step();
             if (null_map[offset]) {
-                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(
+                
RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_nulls(
                         static_cast<uint32_t>(step)));
             } else {
-                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
-                        column_name, *ptr, step));
+                
RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_values(column_name,
+                                                                               
      *ptr, step));
             }
             *ptr += field->size() * step;
             offset += step;
@@ -699,13 +764,13 @@ Status IndexBuilder::_add_data(const std::string& 
column_name,
             if (element_cnt > 0) {
                 auto data = *(data_ptr + 2);
                 auto nested_null_map = *(data_ptr + 3);
-                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+                
RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_array_values(
                         field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
                         reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
             }
         } else {
-            
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
-                    column_name, *ptr, num_rows));
+            
RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_values(column_name,
 *ptr,
+                                                                               
  num_rows));
         }
         DBUG_EXECUTE_IF("IndexBuilder::_add_data_throw_exception",
                         { _CLTHROWA(CL_ERR_IO, "debug point: 
_add_data_throw_exception"); })
@@ -785,6 +850,7 @@ Status IndexBuilder::do_build_inverted_index() {
                                         _tablet->tablet_id());
     }
 
+    DCHECK(!_alter_index_ids.empty());
     _input_rowsets =
             
_tablet->pick_candidate_rowsets_to_build_inverted_index(_alter_index_ids, 
_is_drop_op);
     if (_input_rowsets.empty()) {
diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h
index 478e6557b93..d87d88c5e76 100644
--- a/be/src/olap/task/index_builder.h
+++ b/be/src/olap/task/index_builder.h
@@ -83,7 +83,7 @@ private:
     std::unique_ptr<vectorized::OlapBlockDataConvertor> _olap_data_convertor;
     // "<segment_id, index_id>" -> IndexColumnWriter
     std::unordered_map<std::pair<int64_t, int64_t>, 
std::unique_ptr<segment_v2::IndexColumnWriter>>
-            _inverted_index_builders;
+            _index_column_writers;
     std::unordered_map<int64_t, std::unique_ptr<IndexFileWriter>> 
_index_file_writers;
     // <rowset_id, segment_id>
     std::unordered_map<std::pair<std::string, int64_t>, 
std::unique_ptr<IndexFileReader>>
diff --git a/be/test/olap/index_builder_test.cpp 
b/be/test/olap/index_builder_test.cpp
index afe3a4b8e4d..b0af40ff1f0 100644
--- a/be/test/olap/index_builder_test.cpp
+++ b/be/test/olap/index_builder_test.cpp
@@ -110,6 +110,31 @@ protected:
         tablet_schema->append_column(column_2);
     }
 
+    TabletSchemaSPtr create_ann_tablet_schema() {
+        TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+        TabletSchemaPB tablet_schema_pb;
+        tablet_schema_pb.set_keys_type(DUP_KEYS);
+        tablet_schema->init_from_pb(tablet_schema_pb);
+        // Set basic properties of TabletSchema directly
+        tablet_schema->_inverted_index_storage_format = 
InvertedIndexStorageFormatPB::V2;
+
+        TabletColumn array_column;
+        array_column.set_name("arr1");
+        array_column.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY);
+        array_column.set_unique_id(1);
+        array_column.set_length(0);
+        array_column.set_index_length(0);
+        array_column.set_is_nullable(false);
+
+        TabletColumn child_column;
+        child_column.set_name("arr_sub_float");
+        child_column.set_type(FieldType::OLAP_FIELD_TYPE_FLOAT);
+        child_column.set_length(INT_MAX);
+        array_column.add_sub_column(child_column);
+        tablet_schema->append_column(array_column);
+        return tablet_schema;
+    }
+
     TabletMetaSharedPtr create_tablet_meta() {
         TabletMetaPB tablet_meta_pb;
         tablet_meta_pb.set_table_id(1);
@@ -177,7 +202,7 @@ TEST_F(IndexBuilderTest, BasicBuildTest) {
     EXPECT_EQ(builder._alter_index_ids.size(), 1);
 }
 
-TEST_F(IndexBuilderTest, DropIndexTest) {
+TEST_F(IndexBuilderTest, DropInvertedIndexTest) {
     // 0. prepare tablet path
     auto tablet_path = _absolute_dir + "/" + std::to_string(15676);
     _tablet->_tablet_path = tablet_path;
@@ -253,6 +278,7 @@ TEST_F(IndexBuilderTest, DropIndexTest) {
 
     // 7. Prepare index for dropping
     TOlapTableIndex drop_index;
+    drop_index.index_type = TIndexType::INVERTED;
     drop_index.index_id = 1;
     drop_index.columns.emplace_back("k1");
     _alter_indexes.push_back(drop_index);
@@ -312,7 +338,168 @@ TEST_F(IndexBuilderTest, DropIndexTest) {
     //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1));
 }
 
-TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) {
+TEST_F(IndexBuilderTest, DropAnnIndexTest) {
+    // prepare tablet path
+    auto tablet_path = _absolute_dir + "/" + std::to_string(15676);
+    _tablet->_tablet_path = tablet_path;
+    
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+    RowsetSharedPtr rowset;
+
+    // Create test ann index properties
+    std::map<std::string, std::string> properties;
+    properties["index_type"] = "hnsw";
+    properties["metric_type"] = "l2_distance";
+    properties["dim"] = "4";
+    properties["max_degree"] = "16";
+
+    // First add an initial index to the schema (for arr1 column)
+    TabletIndex initial_index;
+    initial_index._index_id = 1;
+    initial_index._index_name = "arr1_index";
+    initial_index._index_type = IndexType::ANN;
+    initial_index._col_unique_ids.push_back(1); // unique_id for arr1
+    initial_index._properties = properties;
+
+    _tablet_schema = create_ann_tablet_schema();
+    _tablet_schema->append_index(std::move(initial_index));
+
+    // 3. Create a rowset writer context
+    RowsetWriterContext writer_context;
+    writer_context.rowset_id.init(15676);
+    writer_context.tablet_id = 15676;
+    writer_context.tablet_schema_hash = 567997577;
+    writer_context.partition_id = 10;
+    writer_context.rowset_type = BETA_ROWSET;
+    writer_context.tablet_path = tablet_path;
+    writer_context.rowset_state = VISIBLE;
+    writer_context.tablet_schema = _tablet_schema;
+    writer_context.version.first = 10;
+    writer_context.version.second = 10;
+
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+    // Create a rowset writer
+    auto res = RowsetFactory::create_rowset_writer(*_engine_ref, 
writer_context, false);
+    ASSERT_TRUE(res.has_value()) << res.error();
+    auto rowset_writer = std::move(res).value();
+
+    // Write data to the rowset
+    {
+        vectorized::DataTypePtr inner_float = 
std::make_shared<vectorized::DataTypeFloat32>();
+        vectorized::DataTypePtr array_type =
+                std::make_shared<vectorized::DataTypeArray>(inner_float);
+
+        // create a MutableColumnPtr
+        vectorized::MutableColumnPtr col = array_type->create_column();
+        // row0
+        {
+            vectorized::Array arr;
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(1.0F));
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(2.0F));
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(3.0F));
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(4.0F));
+            col->insert(vectorized::Field::create_field<TYPE_ARRAY>(arr));
+        }
+        // row1
+        {
+            vectorized::Array arr;
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(5.0F));
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(6.0F));
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(7.0F));
+            arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(8.0F));
+            col->insert(vectorized::Field::create_field<TYPE_ARRAY>(arr));
+        }
+        // wrap the constructed column into a ColumnWithTypeAndName
+        vectorized::ColumnPtr column_array = std::move(col);
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
array_type, "arr1");
+
+        // construct Block (containing only this column), with 2 rows
+        vectorized::Block block;
+        block.insert(type_and_name);
+
+        // Add the block to the rowset
+        Status s = rowset_writer->add_block(&block);
+        ASSERT_TRUE(s.ok()) << s.to_string();
+
+        // Flush the writer
+        s = rowset_writer->flush();
+        ASSERT_TRUE(s.ok()) << s.to_string();
+
+        // Build the rowset
+        ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+        // Add the rowset to the tablet
+        ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+    }
+
+    // Verify index exists before dropping
+    EXPECT_TRUE(_tablet_schema->has_ann_index());
+    EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1));
+
+    // Prepare index for dropping
+    TOlapTableIndex drop_index;
+    drop_index.index_type = TIndexType::type::ANN;
+    drop_index.index_id = 1;
+    drop_index.index_name = "arr1_index";
+    drop_index.columns.emplace_back("arr1");
+    _alter_indexes.clear();
+    _alter_indexes.push_back(drop_index);
+
+    // Create IndexBuilder with drop operation
+    IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), 
_tablet, _columns,
+                         _alter_indexes, true);
+
+    // Initialize and verify
+    auto status = builder.init();
+    EXPECT_TRUE(status.ok()) << status.to_string();
+    EXPECT_EQ(builder._alter_index_ids.size(), 1);
+
+    // Execute drop operation
+    status = builder.do_build_inverted_index();
+    EXPECT_TRUE(status.ok()) << status.to_string();
+
+    // Verify the index has been removed
+    // check old tablet path and new tablet path
+    bool exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, 
&exists).ok());
+    EXPECT_TRUE(exists);
+
+    // Check files in old and new directories
+    std::vector<io::FileInfo> files;
+    bool dir_exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, 
&dir_exists).ok());
+    EXPECT_TRUE(dir_exists);
+    int new_idx_file_count = 0;
+    int new_dat_file_count = 0;
+    int old_idx_file_count = 0;
+    int old_dat_file_count = 0;
+    for (const auto& file : files) {
+        std::string filename = file.file_name;
+        if (filename.find("15676_0.idx") != std::string::npos) {
+            old_idx_file_count++;
+        }
+        if (filename.find("15676_0.dat") != std::string::npos) {
+            old_dat_file_count++;
+        }
+        if 
(filename.find("020000000000000100000000000000000000000000000000_0.idx") !=
+            std::string::npos) {
+            new_idx_file_count++;
+        }
+        if 
(filename.find("020000000000000100000000000000000000000000000000_0.dat") !=
+            std::string::npos) {
+            new_dat_file_count++;
+        }
+    }
+    // The index should have been removed
+    EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file 
before drop";
+    EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file 
before drop";
+    EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file 
after drop";
+    EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file 
after drop";
+}
+
+TEST_F(IndexBuilderTest, BuildInvertedIndexAfterWritingDataTest) {
     // 0. prepare tablet path
     auto tablet_path = _absolute_dir + "/" + std::to_string(14673);
     _tablet->_tablet_path = tablet_path;
@@ -459,6 +646,184 @@ TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) {
     //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(2));
 }
 
+TEST_F(IndexBuilderTest, BuildAnnIndexAfterWritingDataTest) {
+    // 0. prepare tablet path
+    auto tablet_path = _absolute_dir + "/" + std::to_string(14686);
+    
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+    // 1. Prepare data for writing
+    RowsetSharedPtr rowset;
+    const int num_rows = 100;
+
+    // 2. Use ANN schema with array<float> column
+    auto ann_schema = create_ann_tablet_schema();
+
+    // 3. Update schema in tablet meta
+    TabletMetaPB tablet_meta_pb;
+    _tablet_meta->to_meta_pb(&tablet_meta_pb, false);
+
+    TabletSchemaPB ann_schema_pb;
+    ann_schema->to_schema_pb(&ann_schema_pb);
+    tablet_meta_pb.mutable_schema()->CopyFrom(ann_schema_pb);
+
+    _tablet_meta->init_from_pb(tablet_meta_pb);
+
+    // 4. Reinitialize tablet to use new schema
+    _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, 
_data_dir.get());
+    _tablet->_tablet_path = tablet_path;
+    ASSERT_TRUE(_tablet->init().ok());
+
+    _tablet_schema = ann_schema;
+
+    // 3. Create a rowset writer context
+    RowsetWriterContext writer_context;
+    writer_context.rowset_id.init(15686);
+    writer_context.tablet_id = 15686;
+    writer_context.tablet_schema_hash = 567997577;
+    writer_context.partition_id = 10;
+    writer_context.rowset_type = BETA_ROWSET;
+    writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15686);
+    writer_context.rowset_state = VISIBLE;
+    writer_context.tablet_schema = _tablet_schema;
+    writer_context.version.first = 10;
+    writer_context.version.second = 10;
+
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+    // 4. Create a rowset writer
+    auto res = RowsetFactory::create_rowset_writer(*_engine_ref, 
writer_context, false);
+    ASSERT_TRUE(res.has_value()) << res.error();
+    auto rowset_writer = std::move(res).value();
+
+    // 5. Write data to the rowset with float arrays
+    {
+        vectorized::DataTypePtr inner_float = 
std::make_shared<vectorized::DataTypeFloat32>();
+        vectorized::DataTypePtr array_type =
+                std::make_shared<vectorized::DataTypeArray>(inner_float);
+
+        // create a MutableColumnPtr
+        vectorized::MutableColumnPtr col = array_type->create_column();
+
+        // Add data for each row - arrays of 4 floats (matching dim=4 in 
properties)
+        for (int i = 0; i < num_rows; ++i) {
+            vectorized::Array arr;
+            // Create 4-dimensional float vectors
+            
arr.push_back(vectorized::Field::create_field<TYPE_FLOAT>(static_cast<float>(i 
% 10)));
+            arr.push_back(
+                    
vectorized::Field::create_field<TYPE_FLOAT>(static_cast<float>((i + 1) % 10)));
+            arr.push_back(
+                    
vectorized::Field::create_field<TYPE_FLOAT>(static_cast<float>((i + 2) % 10)));
+            arr.push_back(
+                    
vectorized::Field::create_field<TYPE_FLOAT>(static_cast<float>((i + 3) % 10)));
+            col->insert(vectorized::Field::create_field<TYPE_ARRAY>(arr));
+        }
+
+        // wrap the constructed column into a ColumnWithTypeAndName
+        vectorized::ColumnPtr column_array = std::move(col);
+        vectorized::ColumnWithTypeAndName type_and_name(column_array, 
array_type, "arr1");
+
+        // construct Block (containing only this column), with num_rows rows
+        vectorized::Block block;
+        block.insert(type_and_name);
+
+        // Add the block to the rowset
+        Status s = rowset_writer->add_block(&block);
+        ASSERT_TRUE(s.ok()) << s.to_string();
+
+        // Flush the writer
+        s = rowset_writer->flush();
+        ASSERT_TRUE(s.ok()) << s.to_string();
+
+        // Build the rowset
+        ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+        // Add the rowset to the tablet
+        ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+    }
+
+    // 6. Prepare ANN index for building
+    std::map<std::string, std::string> properties;
+    properties["index_type"] = "hnsw";
+    properties["metric_type"] = "l2_distance";
+    properties["dim"] = "4";
+    properties["max_degree"] = "16";
+
+    TOlapTableIndex ann_index;
+    ann_index.__set_index_id(1);
+    ann_index.__set_columns({"arr1"});
+    ann_index.__set_index_name("arr1_ann_index");
+    ann_index.__set_index_type(TIndexType::ANN);
+    // NOTE: wrong way, it doesn't set __isset.properties flag
+    // ann_index.properties = properties;
+    ann_index.__set_properties(properties);
+    _alter_indexes.push_back(ann_index);
+
+    // 7. Create IndexBuilder
+    IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), 
_tablet, _columns,
+                         _alter_indexes, false);
+
+    // 8. Initialize and verify
+    auto status = builder.init();
+    EXPECT_TRUE(status.ok()) << status.to_string();
+    EXPECT_EQ(builder._alter_index_ids.size(), 1);
+
+    // 9. Build ANN index
+    status = builder.do_build_inverted_index();
+    EXPECT_TRUE(status.ok()) << status.to_string();
+
+    // 10. Check paths and files
+    auto old_tablet_path = _absolute_dir + "/" + std::to_string(15686);
+    auto new_tablet_path = _absolute_dir + "/" + std::to_string(14686);
+    bool old_exists = false;
+    bool new_exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, 
&old_exists).ok());
+    EXPECT_TRUE(old_exists);
+    EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, 
&new_exists).ok());
+    EXPECT_TRUE(new_exists);
+
+    // Check files in old and new directories
+    std::vector<io::FileInfo> old_files;
+    bool old_dir_exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()
+                        ->list(old_tablet_path, true, &old_files, 
&old_dir_exists)
+                        .ok());
+    EXPECT_TRUE(old_dir_exists);
+    int idx_file_count = 0;
+    int dat_file_count = 0;
+    for (const auto& file : old_files) {
+        std::string filename = file.file_name;
+        if (filename.find(".idx") != std::string::npos) {
+            idx_file_count++;
+        }
+        if (filename.find(".dat") != std::string::npos) {
+            dat_file_count++;
+        }
+    }
+    EXPECT_EQ(idx_file_count, 0) << "Old directory should contain exactly 0 
.idx file";
+    EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 
.dat file";
+
+    std::vector<io::FileInfo> new_files;
+    bool new_dir_exists = false;
+    EXPECT_TRUE(io::global_local_filesystem()
+                        ->list(new_tablet_path, true, &new_files, 
&new_dir_exists)
+                        .ok());
+    EXPECT_TRUE(new_dir_exists);
+    int new_idx_file_count = 0;
+    int new_dat_file_count = 0;
+    for (const auto& file : new_files) {
+        std::string filename = file.file_name;
+        if (filename.find(".idx") != std::string::npos) {
+            new_idx_file_count++;
+        }
+        if (filename.find(".dat") != std::string::npos) {
+            new_dat_file_count++;
+        }
+    }
+    EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 
1 .idx files";
+    EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 
1 .dat file";
+}
+
 TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) {
     // 0. prepare tablet path
     auto tablet_path = _absolute_dir + "/" + std::to_string(14675);
diff --git a/be/test/olap/vector_search/ann_index_writer_test.cpp 
b/be/test/olap/vector_search/ann_index_writer_test.cpp
index ad4b6881c42..6a6af3b761f 100644
--- a/be/test/olap/vector_search/ann_index_writer_test.cpp
+++ b/be/test/olap/vector_search/ann_index_writer_test.cpp
@@ -89,6 +89,7 @@ protected:
 
         // Create tablet index
         _tablet_index = std::make_unique<TabletIndex>();
+        _tablet_index->_index_type = IndexType::ANN;
         _tablet_index->_properties = _properties;
         _tablet_index->_index_id = 1;
         _tablet_index->_index_name = "test_ann_index";
@@ -416,6 +417,7 @@ TEST_F(AnnIndexWriterTest, TestInvalidMetricType) {
     properties["metric_type"] = "invalid_metric";
 
     auto tablet_index = std::make_unique<TabletIndex>();
+    tablet_index->_index_type = IndexType::ANN;
     tablet_index->_properties = properties;
     tablet_index->_index_id = 1;
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java 
b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java
index b8af31e5049..ae6c399c318 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java
@@ -2116,7 +2116,7 @@ public class SchemaChangeHandler extends AlterHandler {
                     lightSchemaChange = false;
 
                     // ngram_bf index can do light_schema_change in both local 
and cloud mode
-                    // inverted index can only do light_schema_change in local 
mode
+                    // inverted index and ann index can only do 
light_schema_change in local mode
                     if 
(index.isLightAddIndexSupported(enableAddIndexForNewData)) {
                         alterIndexes.add(index);
                         isDropIndex = false;
@@ -2887,7 +2887,7 @@ public class SchemaChangeHandler extends AlterHandler {
     public void addIndexChangeJob(IndexChangeJob indexChangeJob) {
         indexChangeJobs.put(indexChangeJob.getJobId(), indexChangeJob);
         runnableIndexChangeJob.put(indexChangeJob.getJobId(), indexChangeJob);
-        LOG.info("add inverted index job {}", indexChangeJob.getJobId());
+        LOG.info("add inverted/ann index change job {}", 
indexChangeJob.getJobId());
     }
 
     private void clearFinishedOrCancelledSchemaChangeJobV2() {
@@ -2906,7 +2906,7 @@ public class SchemaChangeHandler extends AlterHandler {
             IndexChangeJob indexChangeJob = iterator.next().getValue();
             if (indexChangeJob.isExpire()) {
                 iterator.remove();
-                LOG.info("remove expired inverted index job {}. finish at {}",
+                LOG.info("remove expired inverted/ann index change job {}. 
finish at {}",
                         indexChangeJob.getJobId(), 
TimeUtils.longToTimeString(indexChangeJob.getFinishedTimeMs()));
             }
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
index 44200ea5a59..3c86e5bc454 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
@@ -18,7 +18,6 @@
 package org.apache.doris.catalog;
 
 import org.apache.doris.analysis.IndexDef;
-import org.apache.doris.analysis.IndexDef.IndexType;
 import org.apache.doris.analysis.InvertedIndexUtil;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
@@ -189,12 +188,14 @@ public class Index implements Writable {
 
     // Whether the index can be changed in light mode
     public boolean isLightIndexChangeSupported() {
-        return indexType == IndexDef.IndexType.INVERTED || indexType == 
IndexType.NGRAM_BF;
+        return indexType == IndexDef.IndexType.INVERTED
+                || indexType == IndexDef.IndexType.NGRAM_BF
+                || indexType == IndexDef.IndexType.ANN;
     }
 
     // Whether the index can be added in light mode
     // cloud mode supports light add for ngram_bf index and non-tokenized 
inverted index (parser="none")
-    // local mode supports light add for both inverted index and ngram_bf index
+    // local mode supports light add for inverted index, ann index and 
ngram_bf index
     // the rest of the index types do not support light add
     public boolean isLightAddIndexSupported(boolean enableAddIndexForNewData) {
         if (Config.isCloudMode()) {
@@ -206,7 +207,7 @@ public class Index implements Writable {
             return false;
         }
         return (indexType == IndexDef.IndexType.NGRAM_BF && 
enableAddIndexForNewData)
-                || (indexType == IndexDef.IndexType.INVERTED);
+                || (indexType == IndexDef.IndexType.INVERTED) || (indexType == 
IndexDef.IndexType.ANN);
     }
 
     public String getInvertedIndexCustomAnalyzer() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java
index 8b83f690125..d34c24feb63 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java
@@ -142,7 +142,15 @@ public class BuildIndexOp extends AlterTableOp {
             throw new AnalysisException(indexType + " index is not needed to 
build.");
         }
 
-        indexDef = new IndexDefinition(indexName, partitionNamesInfo, 
indexType);
+        if (indexType == IndexDef.IndexType.ANN) {
+            List<String> columns = existedIdx.getColumns();
+            Map<String, String> properties = existedIdx.getProperties();
+            String comment = existedIdx.getComment();
+            indexDef = new IndexDefinition(indexName, false, columns, "ANN", 
properties, comment);
+        } else {
+            indexDef = new IndexDefinition(indexName, partitionNamesInfo, 
indexType);
+        }
+
         if (!table.isPartitionedTable()) {
             List<String> specifiedPartitions = indexDef.getPartitionNames();
             if (!specifiedPartitions.isEmpty()) {
@@ -150,10 +158,6 @@ public class BuildIndexOp extends AlterTableOp {
                     + " is not partitioned, cannot build index with 
partitions.");
             }
         }
-        if (indexDef.getIndexType() == IndexDef.IndexType.ANN) {
-            throw new AnalysisException(
-                "ANN index can only be created during table creation, not 
through BUILD INDEX.");
-        }
         indexDef.validate();
         this.index = existedIdx.clone();
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java
index 332859b536d..7e41b6f5e18 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java
@@ -20,7 +20,6 @@ package org.apache.doris.nereids.trees.plans.commands.info;
 import org.apache.doris.alter.AlterOpType;
 import org.apache.doris.analysis.AlterTableClause;
 import org.apache.doris.analysis.CreateIndexClause;
-import org.apache.doris.analysis.IndexDef;
 import org.apache.doris.catalog.Index;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.UserException;
@@ -79,11 +78,6 @@ public class CreateIndexOp extends AlterTableOp {
             tableName.analyze(ctx);
         }
 
-        if (indexDef.getIndexType() == IndexDef.IndexType.ANN) {
-            throw new AnalysisException(
-                "ANN index can only be created during table creation, not 
through CREATE INDEX.");
-        }
-
         indexDef.validate();
         index = indexDef.translateToCatalogStyle();
     }
diff --git a/regression-test/suites/ann_index_p0/build_ann_index_test.groovy 
b/regression-test/suites/ann_index_p0/build_ann_index_test.groovy
new file mode 100644
index 00000000000..e8de0d3d2d1
--- /dev/null
+++ b/regression-test/suites/ann_index_p0/build_ann_index_test.groovy
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("build_ann_index_test") {
+    if (isCloudMode()) {
+        return // TODO enable this case after enable light index in cloud mode
+    }
+
+    // prepare test table
+    def timeout = 30000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+
+    def wait_for_latest_op_on_table_finish = { tableName, opTimeout ->
+        for(int t = delta_time; t <= opTimeout; t += delta_time){
+            alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = 
"${tableName}" ORDER BY CreateTime DESC LIMIT 1;"""
+            alter_res = alter_res.toString()
+            if(alter_res.contains("FINISHED")) {
+                sleep(3000) // wait change table state to normal
+                logger.info(tableName + " latest alter job finished, detail: " 
+ alter_res)
+                break
+            }
+            useTime = t
+            sleep(delta_time)
+        }
+        assertTrue(useTime <= opTimeout, "wait_for_latest_op_on_table_finish 
timeout")
+    }
+
+    def wait_for_last_build_index_on_table_finish = { tableName, opTimeout ->
+        for(int t = delta_time; t <= opTimeout; t += delta_time){
+            alter_res = sql """SHOW BUILD INDEX WHERE TableName = 
"${tableName}" ORDER BY JobId """
+
+            if (alter_res.size() == 0) {
+                logger.info(tableName + " last index job finished")
+                return "SKIPPED"
+            }
+            if (alter_res.size() > 0) {
+                def last_job_state = alter_res[alter_res.size()-1][7];
+                if (last_job_state == "FINISHED" || last_job_state == 
"CANCELLED") {
+                    sleep(3000) // wait change table state to normal
+                    logger.info(tableName + " last index job finished, state: 
" + last_job_state + ", detail: " + alter_res)
+                    return last_job_state;
+                }
+            }
+            useTime = t
+            sleep(delta_time)
+        }
+        logger.info("wait_for_last_build_index_on_table_finish debug: " + 
alter_res)
+        assertTrue(useTime <= opTimeout, 
"wait_for_last_build_index_on_table_finish timeout")
+        return "wait_timeout"
+    }
+
+    sql "set enable_common_expr_pushdown=true;"
+    sql "drop table if exists table_build_ann_index_test;"
+    def tableName = "table_build_ann_index_test"
+
+    // case 1: create table -- insert data -- create index -- build index
+    sql """
+    CREATE TABLE `table_build_ann_index_test` (
+      `id` int NOT NULL COMMENT "",
+      `embedding` array<float> NOT NULL COMMENT ""
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`) COMMENT "OLAP"
+    DISTRIBUTED BY HASH(`id`) BUCKETS 2
+    PROPERTIES (
+      "replication_num" = "1"
+    );
+    """
+
+    sql """
+    INSERT INTO table_build_ann_index_test (id, embedding) VALUES
+        (0, [39.906116, 10.495334, 54.08394, 88.67262, 55.243687, 10.162686, 
36.335983, 38.684258]),
+        (1, [62.759315, 97.15586, 25.832521, 39.604908, 88.76715, 72.64085, 
9.688437, 17.721428]),
+        (2, [15.447449, 59.7771, 65.54516, 12.973712, 99.685135, 72.080734, 
85.71118, 99.35976]),
+        (3, [72.26747, 46.42257, 32.368374, 80.50209, 5.777631, 98.803314, 
7.0915947, 68.62693]),
+        (4, [22.098177, 74.10027, 63.634556, 4.710955, 12.405106, 79.39356, 
63.014366, 68.67834]),
+        (5, [27.53003, 72.1106, 50.891026, 38.459953, 68.30715, 20.610682, 
94.806274, 45.181377]),
+        (6, [77.73215, 64.42907, 71.50025, 43.85641, 94.42648, 50.04773, 
65.12575, 68.58207]),
+        (7, [2.1537063, 82.667885, 16.171143, 71.126656, 5.335274, 40.286068, 
11.943586, 3.69409]),
+        (8, [54.435013, 56.800594, 59.335514, 55.829235, 85.46627, 33.388138, 
11.076194, 20.480877]),
+        (9, [76.197945, 60.623528, 84.229805, 31.652937, 71.82595, 48.04684, 
71.29212, 30.282396]);
+    """
+
+    // CREATE INDEX
+    sql """
+    CREATE INDEX idx_test_ann ON table_build_ann_index_test(`embedding`) USING 
ANN PROPERTIES(
+        "index_type"="hnsw",
+        "metric_type"="l2_distance",
+        "dim"="8"
+    );
+    """
+    wait_for_latest_op_on_table_finish(tableName, timeout)
+
+    // BUILD INDEX
+    sql "BUILD INDEX idx_test_ann ON table_build_ann_index_test;"
+    wait_for_last_build_index_on_table_finish(tableName, timeout)
+}
diff --git a/regression-test/suites/ann_index_p0/create_ann_index_test.groovy 
b/regression-test/suites/ann_index_p0/create_ann_index_test.groovy
index e2fd6f75a25..2ceaf61f6c7 100644
--- a/regression-test/suites/ann_index_p0/create_ann_index_test.groovy
+++ b/regression-test/suites/ann_index_p0/create_ann_index_test.groovy
@@ -17,7 +17,6 @@
 
 suite("create_ann_index_test") {
     sql "set enable_common_expr_pushdown=true;"
-    // Test that CREATE INDEX for ANN is not supported
     sql "drop table if exists tbl_not_null"
     sql """
     CREATE TABLE `tbl_not_null` (
@@ -31,16 +30,14 @@ suite("create_ann_index_test") {
     );
     """
 
-    test {
-        sql """
-            CREATE INDEX idx_test_ann ON tbl_not_null(`embedding`) USING ANN 
PROPERTIES(
-                "index_type"="hnsw",
-                "metric_type"="l2_distance",
-                "dim"="1"
-            );
-        """
-        exception "ANN index can only be created during table creation, not 
through CREATE INDEX"
-    }
+    // Test that CREATE INDEX for ANN is supported
+    sql """
+    CREATE INDEX idx_test_ann ON tbl_not_null(`embedding`) USING ANN 
PROPERTIES(
+        "index_type"="hnsw",
+        "metric_type"="l2_distance",
+        "dim"="1"
+    );
+    """
 
     // Test cases for creating tables with ANN indexes
 
@@ -343,7 +340,7 @@ suite("create_ann_index_test") {
     """
 
     sql "drop table if exists tbl_efconstruction"
-    
+
     test {
         sql """
             CREATE TABLE tbl_efconstruction (
@@ -389,4 +386,4 @@ suite("create_ann_index_test") {
 
         exception "ANN index is not supported in index format V1"
     }
-}
\ No newline at end of file
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to