This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 96c8bf418b6 branch-3.1: [feat](inverted index) Adding Storage Format 
V3 for Inverted Index #44414 #45805 (#52206)
96c8bf418b6 is described below

commit 96c8bf418b6eb6f7faa738668256416dc93e9910
Author: zzzxl <[email protected]>
AuthorDate: Tue Jun 24 23:50:25 2025 +0800

    branch-3.1: [feat](inverted index) Adding Storage Format V3 for Inverted 
Index #44414 #45805 (#52206)
    
    Chery-pick from
    #44414
    #45805
---
 be/src/olap/inverted_index_parser.cpp              |   9 +
 be/src/olap/inverted_index_parser.h                |   5 +
 be/src/olap/rowset/beta_rowset.cpp                 |  18 +-
 .../char_filter/char_replace_char_filter.h         |   2 +-
 .../segment_v2/inverted_index_file_reader.cpp      |   2 +-
 .../segment_v2/inverted_index_file_writer.cpp      |  22 +--
 .../rowset/segment_v2/inverted_index_file_writer.h |  10 +-
 .../rowset/segment_v2/inverted_index_writer.cpp    |  22 +++
 be/src/olap/tablet_meta.cpp                        |   3 +
 .../segment_v2/inverted_index_file_writer_test.cpp |   2 +-
 .../segment_v2/inverted_index_writer_test.cpp      |  12 +-
 .../java/org/apache/doris/analysis/IndexDef.java   |   3 +-
 .../apache/doris/analysis/InvertedIndexUtil.java   |  33 +++-
 .../cloud/datasource/CloudInternalCatalog.java     |   6 +-
 .../apache/doris/common/util/PropertyAnalyzer.java |   6 +
 .../trees/plans/commands/info/IndexDefinition.java |   3 +-
 .../doris/analysis/InvertedIndexUtilTest.java      | 193 +++++++++++++++++++++
 .../apache/doris/common/PropertyAnalyzerTest.java  |  50 ++++++
 .../trees/plans/commands/IndexDefinitionTest.java  |   3 +-
 gensrc/proto/olap_file.proto                       |   1 +
 gensrc/thrift/Types.thrift                         |   7 +-
 .../inverted_index_p0/test_inverted_index_v3.out   | Bin 0 -> 223 bytes
 .../test_inverted_index_v3_fault_injection.groovy  |  60 +++++++
 .../test_inverted_index_v3.groovy                  | 117 +++++++++++++
 24 files changed, 551 insertions(+), 38 deletions(-)

diff --git a/be/src/olap/inverted_index_parser.cpp 
b/be/src/olap/inverted_index_parser.cpp
index b80539b1b4e..eeee8699981 100644
--- a/be/src/olap/inverted_index_parser.cpp
+++ b/be/src/olap/inverted_index_parser.cpp
@@ -152,4 +152,13 @@ std::string get_parser_stopwords_from_properties(
     }
 }
 
+std::string get_parser_dict_compression_from_properties(
+        const std::map<std::string, std::string>& properties) {
+    if (properties.find(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY) != 
properties.end()) {
+        return properties.at(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY);
+    } else {
+        return "";
+    }
+}
+
 } // namespace doris
diff --git a/be/src/olap/inverted_index_parser.h 
b/be/src/olap/inverted_index_parser.h
index ec7cf0c8e70..a48157bef14 100644
--- a/be/src/olap/inverted_index_parser.h
+++ b/be/src/olap/inverted_index_parser.h
@@ -91,6 +91,8 @@ const std::string INVERTED_INDEX_PARSER_LOWERCASE_KEY = 
"lower_case";
 
 const std::string INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords";
 
+const std::string INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY = 
"dict_compression";
+
 std::string inverted_index_parser_type_to_string(InvertedIndexParserType 
parser_type);
 
 InvertedIndexParserType get_inverted_index_parser_type_from_string(const 
std::string& parser_str);
@@ -127,4 +129,7 @@ std::string get_parser_lowercase_from_properties(
 std::string get_parser_stopwords_from_properties(
         const std::map<std::string, std::string>& properties);
 
+std::string get_parser_dict_compression_from_properties(
+        const std::map<std::string, std::string>& properties);
+
 } // namespace doris
diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index df936f5f4ae..6b3648f78a9 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -719,10 +719,24 @@ Status 
BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value,
                                           rapidjson::Document::AllocatorType& 
allocator) {
     const auto& fs = _rowset_meta->fs();
     auto storage_format = _schema->get_inverted_index_storage_format();
-    auto format_str = storage_format == InvertedIndexStorageFormatPB::V1 ? 
"V1" : "V2";
+    std::string format_str;
+    switch (storage_format) {
+    case InvertedIndexStorageFormatPB::V1:
+        format_str = "V1";
+        break;
+    case InvertedIndexStorageFormatPB::V2:
+        format_str = "V2";
+        break;
+    case InvertedIndexStorageFormatPB::V3:
+        format_str = "V3";
+        break;
+    default:
+        return Status::InternalError("inverted index storage format error");
+        break;
+    }
     auto rs_id = rowset_id().to_string();
     rowset_value->AddMember("rowset_id", rapidjson::Value(rs_id.c_str(), 
allocator), allocator);
-    rowset_value->AddMember("index_storage_format", 
rapidjson::Value(format_str, allocator),
+    rowset_value->AddMember("index_storage_format", 
rapidjson::Value(format_str.c_str(), allocator),
                             allocator);
     rapidjson::Value segments(rapidjson::kArrayType);
     for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
index d9e5080d2d5..1e5e6f5d5ce 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
@@ -17,7 +17,7 @@
 
 #pragma once
 
-#include <CLucene.h>
+#include <CLucene.h> // IWYU pragma: keep
 #include <CLucene/analysis/CharFilter.h>
 
 #include <bitset>
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
index 1b9440ae14b..fc08e4e8a41 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
@@ -81,7 +81,7 @@ Status InvertedIndexFileReader::_init_from(int32_t 
read_buffer_size, const io::I
 
         // 3. read file
         int32_t version = _stream->readInt(); // Read version number
-        if (version == InvertedIndexStorageFormatPB::V2) {
+        if (version >= InvertedIndexStorageFormatPB::V2) {
             DCHECK(version == _storage_format);
             int32_t numIndices = _stream->readInt(); // Read number of indices
             ReaderFileEntry* entry = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
index 6a2f15b4476..30cf7a73cf3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
@@ -156,7 +156,7 @@ Status InvertedIndexFileWriter::close() {
         }
     } else {
         try {
-            RETURN_IF_ERROR(write_v2());
+            RETURN_IF_ERROR(write());
             for (const auto& entry : _indices_dirs) {
                 const auto& dir = entry.second;
                 // delete index path, which contains separated inverted index 
files
@@ -296,7 +296,7 @@ Status InvertedIndexFileWriter::write_v1() {
     return Status::OK();
 }
 
-Status InvertedIndexFileWriter::write_v2() {
+Status InvertedIndexFileWriter::write() {
     std::unique_ptr<lucene::store::Directory, DirectoryDeleter> out_dir = 
nullptr;
     std::unique_ptr<lucene::store::IndexOutput> compound_file_output = nullptr;
     ErrorContext error_context;
@@ -304,10 +304,10 @@ Status InvertedIndexFileWriter::write_v2() {
         // Calculate header length and initialize offset
         int64_t current_offset = headerLength();
         // Prepare file metadata
-        auto file_metadata = prepare_file_metadata_v2(current_offset);
+        auto file_metadata = prepare_file_metadata(current_offset);
 
         // Create output stream
-        auto result = create_output_stream_v2();
+        auto result = create_output_stream();
         out_dir = std::move(result.first);
         compound_file_output = std::move(result.second);
 
@@ -318,7 +318,7 @@ Status InvertedIndexFileWriter::write_v2() {
         write_index_headers_and_metadata(compound_file_output.get(), 
file_metadata);
 
         // Copy file data
-        copy_files_data_v2(compound_file_output.get(), file_metadata);
+        copy_files_data(compound_file_output.get(), file_metadata);
 
         _total_file_size = compound_file_output->getFilePointer();
         _file_info.set_index_size(_total_file_size);
@@ -473,7 +473,7 @@ void 
InvertedIndexFileWriter::write_header_and_data_v1(lucene::store::IndexOutpu
 
 std::pair<std::unique_ptr<lucene::store::Directory, DirectoryDeleter>,
           std::unique_ptr<lucene::store::IndexOutput>>
-InvertedIndexFileWriter::create_output_stream_v2() {
+InvertedIndexFileWriter::create_output_stream() {
     io::Path index_path 
{InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix)};
 
     auto* out_dir = DorisFSDirectoryFactory::getDirectory(_fs, 
index_path.parent_path().c_str());
@@ -489,15 +489,15 @@ InvertedIndexFileWriter::create_output_stream_v2() {
 
 void 
InvertedIndexFileWriter::write_version_and_indices_count(lucene::store::IndexOutput*
 output) {
     // Write the version number
-    output->writeInt(InvertedIndexStorageFormatPB::V2);
+    output->writeInt(_storage_format);
 
     // Write the number of indices
     const auto num_indices = static_cast<uint32_t>(_indices_dirs.size());
     output->writeInt(num_indices);
 }
 
-std::vector<InvertedIndexFileWriter::FileMetadata>
-InvertedIndexFileWriter::prepare_file_metadata_v2(int64_t& current_offset) {
+std::vector<InvertedIndexFileWriter::FileMetadata> 
InvertedIndexFileWriter::prepare_file_metadata(
+        int64_t& current_offset) {
     std::vector<FileMetadata> file_metadata;
     std::vector<FileMetadata> meta_files;
     std::vector<FileMetadata> normal_files;
@@ -598,8 +598,8 @@ void 
InvertedIndexFileWriter::write_index_headers_and_metadata(
     }
 }
 
-void InvertedIndexFileWriter::copy_files_data_v2(lucene::store::IndexOutput* 
output,
-                                                 const 
std::vector<FileMetadata>& file_metadata) {
+void InvertedIndexFileWriter::copy_files_data(lucene::store::IndexOutput* 
output,
+                                              const std::vector<FileMetadata>& 
file_metadata) {
     const int64_t buffer_length = 16384;
     uint8_t buffer[buffer_length];
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h 
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
index ba42ffdceb1..ab7cdbff152 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
@@ -71,7 +71,7 @@ public:
     Status delete_index(const TabletIndex* index_meta);
     Status initialize(InvertedIndexDirectoryMap& indices_dirs);
     virtual ~InvertedIndexFileWriter() = default;
-    Status write_v2();
+    Status write();
     Status write_v1();
     Status close();
     const InvertedIndexFileInfo* get_index_file_info() const {
@@ -122,7 +122,7 @@ private:
     // Helper functions specific to write_v2
     virtual std::pair<std::unique_ptr<lucene::store::Directory, 
DirectoryDeleter>,
                       std::unique_ptr<lucene::store::IndexOutput>>
-    create_output_stream_v2();
+    create_output_stream();
     void write_version_and_indices_count(lucene::store::IndexOutput* output);
     struct FileMetadata {
         int64_t index_id;
@@ -141,11 +141,11 @@ private:
                   length(len),
                   directory(dir) {}
     };
-    std::vector<FileMetadata> prepare_file_metadata_v2(int64_t& 
current_offset);
+    std::vector<FileMetadata> prepare_file_metadata(int64_t& current_offset);
     virtual void write_index_headers_and_metadata(lucene::store::IndexOutput* 
output,
                                                   const 
std::vector<FileMetadata>& file_metadata);
-    void copy_files_data_v2(lucene::store::IndexOutput* output,
-                            const std::vector<FileMetadata>& file_metadata);
+    void copy_files_data(lucene::store::IndexOutput* output,
+                         const std::vector<FileMetadata>& file_metadata);
     Status _insert_directory_into_map(int64_t index_id, const std::string& 
index_suffix,
                                       std::shared_ptr<DorisFSDirectory> dir);
     // Member variables...
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index d8688a34acc..633483e7ae3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -219,6 +219,28 @@ public:
         (*field)->setOmitTermFreqAndPositions(
                 
!(get_parser_phrase_support_string_from_properties(_index_meta->properties()) ==
                   INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES));
+        DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::create_field_v3", {
+            if (_index_file_writer->get_storage_format() != 
InvertedIndexStorageFormatPB::V3) {
+                return 
Status::Error<doris::ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                        "debug point: 
InvertedIndexColumnWriterImpl::create_field_v3 error");
+            }
+        })
+        if (_index_file_writer->get_storage_format() >= 
InvertedIndexStorageFormatPB::V3) {
+            (*field)->setIndexVersion(IndexVersion::kV3);
+            // Only effective in v3
+            std::string dict_compression =
+                    
get_parser_dict_compression_from_properties(_index_meta->properties());
+            
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::create_field_dic_compression", {
+                if (dict_compression != INVERTED_INDEX_PARSER_TRUE) {
+                    return 
Status::Error<doris::ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                            "debug point: "
+                            
"InvertedIndexColumnWriterImpl::create_field_dic_compression error");
+                }
+            })
+            if (dict_compression == INVERTED_INDEX_PARSER_TRUE) {
+                (*field)->updateFlag(FlagBits::DICT_COMPRESS);
+            }
+        }
         return Status::OK();
     }
 
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 81113f593e5..ba20216e0c3 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -214,6 +214,9 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t 
partition_id, int64_t tablet_id
     case TInvertedIndexFileStorageFormat::V2:
         
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
         break;
+    case TInvertedIndexFileStorageFormat::V3:
+        
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3);
+        break;
     default:
         
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
         break;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
index 8fd66f47c24..49d4ce7c07b 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
@@ -596,7 +596,7 @@ TEST_F(InvertedIndexFileWriterTest, 
WriteV2ExceptionHandlingTest) {
     EXPECT_CALL(writer_mock, write_index_headers_and_metadata(::testing::_, 
::testing::_))
             .WillOnce(::testing::Throw(CLuceneError(CL_ERR_IO, "Simulated 
exception", false)));
 
-    Status status = writer_mock.write_v2();
+    Status status = writer_mock.write();
     ASSERT_FALSE(status.ok());
     ASSERT_EQ(status.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
 }
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp
index d4b52e749c4..4a2eec8d85f 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp
@@ -350,7 +350,7 @@ public:
         status = column_writer->finish();
         EXPECT_TRUE(status.ok()) << status;
 
-        status = index_file_writer->write_v2();
+        status = index_file_writer->write();
         EXPECT_TRUE(status.ok()) << status;
 
         // Verify the terms stats
@@ -418,7 +418,7 @@ public:
         status = column_writer->finish();
         EXPECT_TRUE(status.ok()) << status;
 
-        status = index_file_writer->write_v2();
+        status = index_file_writer->write();
         EXPECT_TRUE(status.ok()) << status;
 
         // Verify the terms stats
@@ -484,7 +484,7 @@ public:
         status = column_writer->finish();
         EXPECT_TRUE(status.ok()) << status;
 
-        status = index_file_writer->write_v2();
+        status = index_file_writer->write();
         EXPECT_TRUE(status.ok()) << status;
 
         // For BKD index, we need to verify using BkdIndexReader instead of 
check_terms_stats
@@ -560,7 +560,7 @@ public:
         status = column_writer->finish();
         EXPECT_TRUE(status.ok()) << status;
 
-        status = index_file_writer->write_v2();
+        status = index_file_writer->write();
         EXPECT_TRUE(status.ok()) << status;
 
         // Restore original config value
@@ -745,12 +745,12 @@ TEST_F(InvertedIndexWriterTest, 
CompareUnicodeStringWriteResults) {
     // Finish and close both writers
     status = column_writer_enabled->finish();
     EXPECT_TRUE(status.ok()) << status;
-    status = index_file_writer_enabled->write_v2();
+    status = index_file_writer_enabled->write();
     EXPECT_TRUE(status.ok()) << status;
 
     status = column_writer_disabled->finish();
     EXPECT_TRUE(status.ok()) << status;
-    status = index_file_writer_disabled->write_v2();
+    status = index_file_writer_disabled->write();
     EXPECT_TRUE(status.ok()) << status;
 
     // Restore original config value
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index edf62f44d52..143c9f09d2a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -273,7 +273,8 @@ public class IndexDef {
             }
 
             if (indexType == IndexType.INVERTED) {
-                InvertedIndexUtil.checkInvertedIndexParser(indexColName, 
colType, properties);
+                InvertedIndexUtil.checkInvertedIndexParser(indexColName, 
colType, properties,
+                        invertedIndexFileStorageFormat);
             } else if (indexType == IndexType.NGRAM_BF) {
                 if (colType != PrimitiveType.CHAR && colType != 
PrimitiveType.VARCHAR
                         && colType != PrimitiveType.STRING) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
index 8c050cb8dc3..988416610e5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
@@ -19,6 +19,7 @@ package org.apache.doris.analysis;
 
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.common.AnalysisException;
+import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
 
 import java.util.Arrays;
 import java.util.HashMap;
@@ -59,6 +60,8 @@ public class InvertedIndexUtil {
 
     public static String INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords";
 
+    public static String INVERTED_INDEX_DICT_COMPRESSION_KEY = 
"dict_compression";
+
     public static String getInvertedIndexParser(Map<String, String> 
properties) {
         String parser = properties == null ? null : 
properties.get(INVERTED_INDEX_PARSER_KEY);
         // default is "none" if not set
@@ -122,11 +125,12 @@ public class InvertedIndexUtil {
     }
 
     public static void checkInvertedIndexParser(String indexColName, 
PrimitiveType colType,
-            Map<String, String> properties) throws AnalysisException {
+            Map<String, String> properties,
+            TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) 
throws AnalysisException {
         String parser = null;
         if (properties != null) {
             parser = properties.get(INVERTED_INDEX_PARSER_KEY);
-            checkInvertedIndexProperties(properties);
+            checkInvertedIndexProperties(properties, colType, 
invertedIndexFileStorageFormat);
         }
 
         // default is "none" if not set
@@ -158,7 +162,8 @@ public class InvertedIndexUtil {
         }
     }
 
-    public static void checkInvertedIndexProperties(Map<String, String> 
properties) throws AnalysisException {
+    public static void checkInvertedIndexProperties(Map<String, String> 
properties, PrimitiveType colType,
+            TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) 
throws AnalysisException {
         Set<String> allowedKeys = new HashSet<>(Arrays.asList(
                 INVERTED_INDEX_PARSER_KEY,
                 INVERTED_INDEX_PARSER_MODE_KEY,
@@ -168,7 +173,8 @@ public class InvertedIndexUtil {
                 INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT,
                 INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY,
                 INVERTED_INDEX_PARSER_LOWERCASE_KEY,
-                INVERTED_INDEX_PARSER_STOPWORDS_KEY
+                INVERTED_INDEX_PARSER_STOPWORDS_KEY,
+                INVERTED_INDEX_DICT_COMPRESSION_KEY
         ));
 
         for (String key : properties.keySet()) {
@@ -185,6 +191,7 @@ public class InvertedIndexUtil {
         String ignoreAbove = 
properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY);
         String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
         String stopWords = properties.get(INVERTED_INDEX_PARSER_STOPWORDS_KEY);
+        String dictCompression = 
properties.get(INVERTED_INDEX_DICT_COMPRESSION_KEY);
 
         if (parser != null && 
!parser.matches("none|english|unicode|chinese|standard|icu|basic|ik")) {
             throw new AnalysisException("Invalid inverted index 'parser' 
value: " + parser
@@ -239,5 +246,23 @@ public class InvertedIndexUtil {
             throw new AnalysisException("Invalid inverted index 'stopWords' 
value: " + stopWords
                     + ", stopWords must be none");
         }
+
+        if (dictCompression != null) {
+            if (!colType.isStringType()) {
+                throw new AnalysisException("dict_compression can only be set 
for StringType columns. type: "
+                        + colType);
+            }
+
+            if (!dictCompression.matches("true|false")) {
+                throw new AnalysisException(
+                        "Invalid inverted index 'dict_compression' value: "
+                                + dictCompression + ", dict_compression must 
be true or false");
+            }
+
+            if (invertedIndexFileStorageFormat != 
TInvertedIndexFileStorageFormat.V3) {
+                throw new AnalysisException(
+                        "dict_compression can only be set when storage format 
is V3");
+            }
+        }
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
index aa3286a74ae..bcb17d3c039 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
@@ -335,8 +335,12 @@ public class CloudInternalCatalog extends InternalCatalog {
         if (invertedIndexFileStorageFormat != null) {
             if (invertedIndexFileStorageFormat == 
TInvertedIndexFileStorageFormat.V1) {
                 
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V1);
-            } else {
+            } else if (invertedIndexFileStorageFormat == 
TInvertedIndexFileStorageFormat.V2) {
                 
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V2);
+            } else if (invertedIndexFileStorageFormat == 
TInvertedIndexFileStorageFormat.V3) {
+                
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V3);
+            } else {
+                throw new DdlException("invalid inverted index storage 
format");
             }
         }
         schemaBuilder.setRowStorePageSize(pageSize);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
index b873cd8e7cf..68b1535c18d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
@@ -1146,6 +1146,8 @@ public class PropertyAnalyzer {
         } else {
             if (Config.inverted_index_storage_format.equalsIgnoreCase("V1")) {
                 return TInvertedIndexFileStorageFormat.V1;
+            } else if 
(Config.inverted_index_storage_format.equalsIgnoreCase("V3")) {
+                return TInvertedIndexFileStorageFormat.V3;
             } else {
                 return TInvertedIndexFileStorageFormat.V2;
             }
@@ -1155,9 +1157,13 @@ public class PropertyAnalyzer {
             return TInvertedIndexFileStorageFormat.V1;
         } else if (invertedIndexFileStorageFormat.equalsIgnoreCase("v2")) {
             return TInvertedIndexFileStorageFormat.V2;
+        } else if (invertedIndexFileStorageFormat.equalsIgnoreCase("v3")) {
+            return TInvertedIndexFileStorageFormat.V3;
         } else if (invertedIndexFileStorageFormat.equalsIgnoreCase("default")) 
{
             if (Config.inverted_index_storage_format.equalsIgnoreCase("V1")) {
                 return TInvertedIndexFileStorageFormat.V1;
+            } else if 
(Config.inverted_index_storage_format.equalsIgnoreCase("V3")) {
+                return TInvertedIndexFileStorageFormat.V3;
             } else {
                 return TInvertedIndexFileStorageFormat.V2;
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
index b0f97e2fc27..da63e4fb5c4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@@ -156,7 +156,8 @@ public class IndexDefinition {
             if (indexType == IndexType.INVERTED) {
                 try {
                     InvertedIndexUtil.checkInvertedIndexParser(indexColName,
-                            colType.toCatalogDataType().getPrimitiveType(), 
properties);
+                            colType.toCatalogDataType().getPrimitiveType(), 
properties,
+                            invertedIndexFileStorageFormat);
                 } catch (Exception ex) {
                     throw new AnalysisException("invalid INVERTED index:" + 
ex.getMessage(), ex);
                 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/InvertedIndexUtilTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/InvertedIndexUtilTest.java
new file mode 100644
index 00000000000..6f1880c5bb6
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/InvertedIndexUtilTest.java
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class InvertedIndexUtilTest {
+    @Test
+    public void testCheckInvertedIndexProperties() throws AnalysisException {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"true");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat = 
TInvertedIndexFileStorageFormat.V3;
+        InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, 
invertedIndexFileStorageFormat);
+
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"invalid_value");
+        try {
+            InvertedIndexUtil.checkInvertedIndexProperties(properties, 
colType, invertedIndexFileStorageFormat);
+            Assertions.fail("Expected AnalysisException was not thrown");
+        } catch (AnalysisException e) {
+            Assertions.assertEquals(
+                    "errCode = 2, detailMessage = Invalid inverted index 
'dict_compression' value: invalid_value, "
+                            + "dict_compression must be true or false",
+                    e.getMessage());
+        }
+    }
+
+    @Test
+    public void testDictCompressionValidTrue() throws AnalysisException {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"true");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V3;
+
+        InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, 
storageFormat);
+    }
+
+    @Test
+    public void testDictCompressionValidFalse() throws AnalysisException {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"false");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V3;
+
+        InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, 
storageFormat);
+    }
+
+    @Test
+    public void testDictCompressionNonStringType() {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"true");
+
+        PrimitiveType colType = PrimitiveType.INT;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V3;
+
+        AnalysisException thrown = 
Assertions.assertThrows(AnalysisException.class, () -> {
+            InvertedIndexUtil.checkInvertedIndexProperties(properties, 
colType, storageFormat);
+        });
+
+        Assertions.assertEquals(
+                "errCode = 2, detailMessage = dict_compression can only be set 
for StringType columns. type: INT",
+                thrown.getMessage()
+        );
+    }
+
+    @Test
+    public void testDictCompressionInvalidStorageFormat() {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"true");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V2;
+
+        AnalysisException thrown = 
Assertions.assertThrows(AnalysisException.class, () -> {
+            InvertedIndexUtil.checkInvertedIndexProperties(properties, 
colType, storageFormat);
+        });
+
+        Assertions.assertEquals(
+                "errCode = 2, detailMessage = dict_compression can only be set 
when storage format is V3",
+                thrown.getMessage()
+        );
+    }
+
+    @Test
+    public void testDictCompressionInvalidValue() {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"invalid_value");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V3;
+
+        AnalysisException thrown = 
Assertions.assertThrows(AnalysisException.class, () -> {
+            InvertedIndexUtil.checkInvertedIndexProperties(properties, 
colType, storageFormat);
+        });
+
+        Assertions.assertEquals(
+                "errCode = 2, detailMessage = Invalid inverted index 
'dict_compression' value: invalid_value, "
+                        + "dict_compression must be true or false",
+                thrown.getMessage()
+        );
+    }
+
+    @Test
+    public void testDictCompressionCaseSensitivity() throws AnalysisException {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"True");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V3;
+
+        AnalysisException thrown = 
Assertions.assertThrows(AnalysisException.class, () -> {
+            InvertedIndexUtil.checkInvertedIndexProperties(properties, 
colType, storageFormat);
+        });
+
+        Assertions.assertEquals(
+                "errCode = 2, detailMessage = Invalid inverted index 
'dict_compression' value: True, "
+                        + "dict_compression must be true or false",
+                thrown.getMessage()
+        );
+    }
+
+    @Test
+    public void testDictCompressionNullValue() throws AnalysisException {
+        Map<String, String> properties = new HashMap<>();
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V3;
+
+        InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, 
storageFormat);
+    }
+
+    @Test
+    public void testDictCompressionWhenStorageFormatIsNull() {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"true");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = null;
+
+        AnalysisException thrown = 
Assertions.assertThrows(AnalysisException.class, () -> {
+            InvertedIndexUtil.checkInvertedIndexProperties(properties, 
colType, storageFormat);
+        });
+
+        Assertions.assertEquals(
+                "errCode = 2, detailMessage = dict_compression can only be set 
when storage format is V3",
+                thrown.getMessage()
+        );
+    }
+
+    @Test
+    public void testDictCompressionWithNonV3AndValidValue() {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, 
"false");
+
+        PrimitiveType colType = PrimitiveType.STRING;
+        TInvertedIndexFileStorageFormat storageFormat = 
TInvertedIndexFileStorageFormat.V2;
+
+        AnalysisException thrown = 
Assertions.assertThrows(AnalysisException.class, () -> {
+            InvertedIndexUtil.checkInvertedIndexProperties(properties, 
colType, storageFormat);
+        });
+
+        Assertions.assertEquals(
+                "errCode = 2, detailMessage = dict_compression can only be set 
when storage format is V3",
+                thrown.getMessage()
+        );
+    }
+}
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
index 041ca89bfc5..6d708aa0826 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
@@ -28,6 +28,7 @@ import org.apache.doris.catalog.Type;
 import org.apache.doris.common.util.PropertyAnalyzer;
 import org.apache.doris.common.util.TimeUtils;
 import org.apache.doris.resource.Tag;
+import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
 import org.apache.doris.thrift.TStorageFormat;
 import org.apache.doris.thrift.TStorageMedium;
 
@@ -37,6 +38,7 @@ import com.google.common.collect.Sets;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
 import org.junit.rules.ExpectedException;
 
 import java.time.Instant;
@@ -236,4 +238,52 @@ public class PropertyAnalyzerTest {
             Assert.assertTrue(e.getMessage().contains("Storage page size must 
be between 4KB and 10MB"));
         }
     }
+
+    @Test
+    public void testAnalyzeInvertedIndexFileStorageFormat() throws 
AnalysisException {
+        TInvertedIndexFileStorageFormat result = 
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(null);
+        Assertions.assertEquals(TInvertedIndexFileStorageFormat.V2, result);
+
+        Config.inverted_index_storage_format = "V1";
+        result = PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(new 
HashMap<>());
+        Assertions.assertEquals(TInvertedIndexFileStorageFormat.V1, result);
+
+        Map<String, String> propertiesWithV1 = new HashMap<>();
+        
propertiesWithV1.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT, 
"v1");
+        result = 
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithV1);
+        Assertions.assertEquals(TInvertedIndexFileStorageFormat.V1, result);
+
+        Map<String, String> propertiesWithV2 = new HashMap<>();
+        
propertiesWithV2.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT, 
"v2");
+        result = 
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithV2);
+        Assertions.assertEquals(TInvertedIndexFileStorageFormat.V2, result);
+
+        Map<String, String> propertiesWithV3 = new HashMap<>();
+        
propertiesWithV3.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT, 
"v3");
+        result = 
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithV3);
+        Assertions.assertEquals(TInvertedIndexFileStorageFormat.V3, result);
+
+        Config.inverted_index_storage_format = "V1";
+        Map<String, String> propertiesWithDefaultV1 = new HashMap<>();
+        
propertiesWithDefaultV1.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
 "default");
+        result = 
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithDefaultV1);
+        Assertions.assertEquals(TInvertedIndexFileStorageFormat.V1, result);
+
+        Config.inverted_index_storage_format = "V2";
+        Map<String, String> propertiesWithDefaultV2 = new HashMap<>();
+        
propertiesWithDefaultV2.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
 "default");
+        result = 
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithDefaultV2);
+        Assertions.assertEquals(TInvertedIndexFileStorageFormat.V2, result);
+
+        Map<String, String> propertiesWithUnknown = new HashMap<>();
+        
propertiesWithUnknown.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
 "unknown_format");
+        try {
+            
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithUnknown);
+            Assertions.fail("Expected AnalysisException was not thrown");
+        } catch (AnalysisException e) {
+            Assertions.assertEquals(
+                    "errCode = 2, detailMessage = unknown inverted index 
storage format: unknown_format",
+                    e.getMessage());
+        }
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
index 93caa559cc2..a02cfe1338e 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
@@ -47,7 +47,8 @@ public class IndexDefinitionTest {
                                         null, "comment");
         try {
             def.checkColumn(new ColumnDefinition("col1", VariantType.INSTANCE, 
false, AggregateType.NONE, true,
-                                                 null, "comment"), 
KeysType.UNIQUE_KEYS, true, TInvertedIndexFileStorageFormat.V1);
+                                                 null, "comment"), 
KeysType.UNIQUE_KEYS, true,
+                                                        
TInvertedIndexFileStorageFormat.V1);
             Assertions.fail("No exception throws.");
         } catch (AnalysisException e) {
             org.junit.jupiter.api.Assertions.assertInstanceOf(
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 92d6a9fe3b8..d62c9df5073 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -356,6 +356,7 @@ enum IndexType {
 enum InvertedIndexStorageFormatPB {
     V1 = 0;
     V2 = 1;
+    V3 = 2;
 }
 
 message TabletIndexPB {
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index 58716f51f3f..887031611d7 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -122,9 +122,10 @@ enum TStorageBackendType {
 // This enum is used to distinguish between different organizational methods
 // of inverted index data, affecting how the index is stored and accessed.
 enum TInvertedIndexFileStorageFormat {
-    DEFAULT, // Default format, unspecified storage method.
-    V1,      // Index per idx: Each index is stored separately based on its 
identifier.
-    V2       // Segment id per idx: Indexes are organized based on segment 
identifiers, grouping indexes by their associated segment.
+    DEFAULT = 0, // Default format, unspecified storage method.
+    V1 = 1,      // Index per idx: Each index is stored separately based on 
its identifier.
+    V2 = 2       // Segment id per idx: Indexes are organized based on segment 
identifiers, grouping indexes by their associated segment.
+    V3 = 3       // Position and dictionary compression
 }
 
 struct TScalarType {
diff --git a/regression-test/data/inverted_index_p0/test_inverted_index_v3.out 
b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out
new file mode 100644
index 00000000000..9dc20f3e0e0
Binary files /dev/null and 
b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out differ
diff --git 
a/regression-test/suites/fault_injection_p0/test_inverted_index_v3_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_inverted_index_v3_fault_injection.groovy
new file mode 100644
index 00000000000..98c0e110964
--- /dev/null
+++ 
b/regression-test/suites/fault_injection_p0/test_inverted_index_v3_fault_injection.groovy
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_inverted_index_v3_fault_injection", "nonConcurrent"){
+    def indexTbName1 = "test_inverted_index_v3_fault_injection"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+
+    sql """
+      CREATE TABLE ${indexTbName1} (
+      `@timestamp` int(11) NULL COMMENT "",
+      `clientip` varchar(20) NULL COMMENT "",
+      `request` text NULL COMMENT "",
+      `status` int(11) NULL COMMENT "",
+      `size` int(11) NULL COMMENT "",
+      INDEX clientip_idx (`clientip`) COMMENT '',
+      INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1",
+      "inverted_index_storage_format" = "V3"
+      );
+    """
+
+    try {
+      
GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+      
+      sql """ INSERT INTO ${indexTbName1} VALUES (1, '40.135.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 24736); """
+    } finally {
+      
GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+    }
+
+    try {
+      
GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+      
GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_dic_compression")
+
+      sql """ INSERT INTO ${indexTbName1} VALUES (2, '40.135.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 24736); """
+    } finally {
+      
GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+      
GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_dic_compression")
+    }
+}
\ No newline at end of file
diff --git 
a/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy 
b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy
new file mode 100644
index 00000000000..ea7dd0b595f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_inverted_index_v3", "p0"){
+    def indexTbName1 = "test_inverted_index_v3_1"
+    def indexTbName2 = "test_inverted_index_v3_2"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+    sql "DROP TABLE IF EXISTS ${indexTbName2}"
+
+    sql """
+      CREATE TABLE ${indexTbName1} (
+      `@timestamp` int(11) NULL COMMENT "",
+      `clientip` varchar(20) NULL COMMENT "",
+      `request` text NULL COMMENT "",
+      `status` int(11) NULL COMMENT "",
+      `size` int(11) NULL COMMENT "",
+      INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1",
+      "inverted_index_storage_format" = "V2"
+      );
+    """
+
+    sql """
+      CREATE TABLE ${indexTbName2} (
+      `@timestamp` int(11) NULL COMMENT "",
+      `clientip` varchar(20) NULL COMMENT "",
+      `request` text NULL COMMENT "",
+      `status` int(11) NULL COMMENT "",
+      `size` int(11) NULL COMMENT "",
+      INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1",
+      "inverted_index_storage_format" = "V3"
+      );
+    """
+
+    def load_httplogs_data = {table_name, label, read_flag, format_flag, 
file_name, ignore_failure=false,
+                        expected_succ_rows = -1, load_to_single_tablet = 
'true' ->
+        
+        // load the json data
+        streamLoad {
+            table "${table_name}"
+            
+            // set http request header params
+            set 'label', label + "_" + UUID.randomUUID().toString()
+            set 'read_json_by_line', read_flag
+            set 'format', format_flag
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+            if (expected_succ_rows >= 0) {
+                set 'max_filter_ratio', '1'
+            }
+
+            // if declared a check callback, the default check condition will 
ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                       if (ignore_failure && expected_succ_rows < 0) { return }
+                    if (exception != null) {
+                        throw exception
+                    }
+                    log.info("Stream load result: ${result}".toString())
+                    def json = parseJson(result)
+                    assertEquals("success", json.Status.toLowerCase())
+                    if (expected_succ_rows >= 0) {
+                        assertEquals(json.NumberLoadedRows, expected_succ_rows)
+                    } else {
+                        assertEquals(json.NumberTotalRows, 
json.NumberLoadedRows + json.NumberUnselectedRows)
+                        assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes 
> 0)
+                }
+            }
+        }
+    }
+
+    try {
+      load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 
'documents-1000.json')
+      load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 
'documents-1000.json')
+      
+      sql "sync"
+
+      qt_sql """ select count() from ${indexTbName1} where request match_any 
'hm bg'; """
+      qt_sql """ select count() from ${indexTbName1} where request match_all 
'hm bg'; """
+      qt_sql """ select count() from ${indexTbName1} where request 
match_phrase 'hm bg'; """
+      qt_sql """ select count() from ${indexTbName1} where request 
match_phrase_prefix 'hm bg'; """
+
+      qt_sql """ select count() from ${indexTbName2} where request match_any 
'hm bg'; """
+      qt_sql """ select count() from ${indexTbName2} where request match_all 
'hm bg'; """
+      qt_sql """ select count() from ${indexTbName2} where request 
match_phrase 'hm bg'; """
+      qt_sql """ select count() from ${indexTbName2} where request 
match_phrase_prefix 'hm bg'; """
+
+    } finally {
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to