This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 96c8bf418b6 branch-3.1: [feat](inverted index) Adding Storage Format
V3 for Inverted Index #44414 #45805 (#52206)
96c8bf418b6 is described below
commit 96c8bf418b6eb6f7faa738668256416dc93e9910
Author: zzzxl <[email protected]>
AuthorDate: Tue Jun 24 23:50:25 2025 +0800
branch-3.1: [feat](inverted index) Adding Storage Format V3 for Inverted
Index #44414 #45805 (#52206)
Chery-pick from
#44414
#45805
---
be/src/olap/inverted_index_parser.cpp | 9 +
be/src/olap/inverted_index_parser.h | 5 +
be/src/olap/rowset/beta_rowset.cpp | 18 +-
.../char_filter/char_replace_char_filter.h | 2 +-
.../segment_v2/inverted_index_file_reader.cpp | 2 +-
.../segment_v2/inverted_index_file_writer.cpp | 22 +--
.../rowset/segment_v2/inverted_index_file_writer.h | 10 +-
.../rowset/segment_v2/inverted_index_writer.cpp | 22 +++
be/src/olap/tablet_meta.cpp | 3 +
.../segment_v2/inverted_index_file_writer_test.cpp | 2 +-
.../segment_v2/inverted_index_writer_test.cpp | 12 +-
.../java/org/apache/doris/analysis/IndexDef.java | 3 +-
.../apache/doris/analysis/InvertedIndexUtil.java | 33 +++-
.../cloud/datasource/CloudInternalCatalog.java | 6 +-
.../apache/doris/common/util/PropertyAnalyzer.java | 6 +
.../trees/plans/commands/info/IndexDefinition.java | 3 +-
.../doris/analysis/InvertedIndexUtilTest.java | 193 +++++++++++++++++++++
.../apache/doris/common/PropertyAnalyzerTest.java | 50 ++++++
.../trees/plans/commands/IndexDefinitionTest.java | 3 +-
gensrc/proto/olap_file.proto | 1 +
gensrc/thrift/Types.thrift | 7 +-
.../inverted_index_p0/test_inverted_index_v3.out | Bin 0 -> 223 bytes
.../test_inverted_index_v3_fault_injection.groovy | 60 +++++++
.../test_inverted_index_v3.groovy | 117 +++++++++++++
24 files changed, 551 insertions(+), 38 deletions(-)
diff --git a/be/src/olap/inverted_index_parser.cpp
b/be/src/olap/inverted_index_parser.cpp
index b80539b1b4e..eeee8699981 100644
--- a/be/src/olap/inverted_index_parser.cpp
+++ b/be/src/olap/inverted_index_parser.cpp
@@ -152,4 +152,13 @@ std::string get_parser_stopwords_from_properties(
}
}
+std::string get_parser_dict_compression_from_properties(
+ const std::map<std::string, std::string>& properties) {
+ if (properties.find(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY) !=
properties.end()) {
+ return properties.at(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY);
+ } else {
+ return "";
+ }
+}
+
} // namespace doris
diff --git a/be/src/olap/inverted_index_parser.h
b/be/src/olap/inverted_index_parser.h
index ec7cf0c8e70..a48157bef14 100644
--- a/be/src/olap/inverted_index_parser.h
+++ b/be/src/olap/inverted_index_parser.h
@@ -91,6 +91,8 @@ const std::string INVERTED_INDEX_PARSER_LOWERCASE_KEY =
"lower_case";
const std::string INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords";
+const std::string INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY =
"dict_compression";
+
std::string inverted_index_parser_type_to_string(InvertedIndexParserType
parser_type);
InvertedIndexParserType get_inverted_index_parser_type_from_string(const
std::string& parser_str);
@@ -127,4 +129,7 @@ std::string get_parser_lowercase_from_properties(
std::string get_parser_stopwords_from_properties(
const std::map<std::string, std::string>& properties);
+std::string get_parser_dict_compression_from_properties(
+ const std::map<std::string, std::string>& properties);
+
} // namespace doris
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index df936f5f4ae..6b3648f78a9 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -719,10 +719,24 @@ Status
BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value,
rapidjson::Document::AllocatorType&
allocator) {
const auto& fs = _rowset_meta->fs();
auto storage_format = _schema->get_inverted_index_storage_format();
- auto format_str = storage_format == InvertedIndexStorageFormatPB::V1 ?
"V1" : "V2";
+ std::string format_str;
+ switch (storage_format) {
+ case InvertedIndexStorageFormatPB::V1:
+ format_str = "V1";
+ break;
+ case InvertedIndexStorageFormatPB::V2:
+ format_str = "V2";
+ break;
+ case InvertedIndexStorageFormatPB::V3:
+ format_str = "V3";
+ break;
+ default:
+ return Status::InternalError("inverted index storage format error");
+ break;
+ }
auto rs_id = rowset_id().to_string();
rowset_value->AddMember("rowset_id", rapidjson::Value(rs_id.c_str(),
allocator), allocator);
- rowset_value->AddMember("index_storage_format",
rapidjson::Value(format_str, allocator),
+ rowset_value->AddMember("index_storage_format",
rapidjson::Value(format_str.c_str(), allocator),
allocator);
rapidjson::Value segments(rapidjson::kArrayType);
for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
index d9e5080d2d5..1e5e6f5d5ce 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
@@ -17,7 +17,7 @@
#pragma once
-#include <CLucene.h>
+#include <CLucene.h> // IWYU pragma: keep
#include <CLucene/analysis/CharFilter.h>
#include <bitset>
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
index 1b9440ae14b..fc08e4e8a41 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
@@ -81,7 +81,7 @@ Status InvertedIndexFileReader::_init_from(int32_t
read_buffer_size, const io::I
// 3. read file
int32_t version = _stream->readInt(); // Read version number
- if (version == InvertedIndexStorageFormatPB::V2) {
+ if (version >= InvertedIndexStorageFormatPB::V2) {
DCHECK(version == _storage_format);
int32_t numIndices = _stream->readInt(); // Read number of indices
ReaderFileEntry* entry = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
index 6a2f15b4476..30cf7a73cf3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
@@ -156,7 +156,7 @@ Status InvertedIndexFileWriter::close() {
}
} else {
try {
- RETURN_IF_ERROR(write_v2());
+ RETURN_IF_ERROR(write());
for (const auto& entry : _indices_dirs) {
const auto& dir = entry.second;
// delete index path, which contains separated inverted index
files
@@ -296,7 +296,7 @@ Status InvertedIndexFileWriter::write_v1() {
return Status::OK();
}
-Status InvertedIndexFileWriter::write_v2() {
+Status InvertedIndexFileWriter::write() {
std::unique_ptr<lucene::store::Directory, DirectoryDeleter> out_dir =
nullptr;
std::unique_ptr<lucene::store::IndexOutput> compound_file_output = nullptr;
ErrorContext error_context;
@@ -304,10 +304,10 @@ Status InvertedIndexFileWriter::write_v2() {
// Calculate header length and initialize offset
int64_t current_offset = headerLength();
// Prepare file metadata
- auto file_metadata = prepare_file_metadata_v2(current_offset);
+ auto file_metadata = prepare_file_metadata(current_offset);
// Create output stream
- auto result = create_output_stream_v2();
+ auto result = create_output_stream();
out_dir = std::move(result.first);
compound_file_output = std::move(result.second);
@@ -318,7 +318,7 @@ Status InvertedIndexFileWriter::write_v2() {
write_index_headers_and_metadata(compound_file_output.get(),
file_metadata);
// Copy file data
- copy_files_data_v2(compound_file_output.get(), file_metadata);
+ copy_files_data(compound_file_output.get(), file_metadata);
_total_file_size = compound_file_output->getFilePointer();
_file_info.set_index_size(_total_file_size);
@@ -473,7 +473,7 @@ void
InvertedIndexFileWriter::write_header_and_data_v1(lucene::store::IndexOutpu
std::pair<std::unique_ptr<lucene::store::Directory, DirectoryDeleter>,
std::unique_ptr<lucene::store::IndexOutput>>
-InvertedIndexFileWriter::create_output_stream_v2() {
+InvertedIndexFileWriter::create_output_stream() {
io::Path index_path
{InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix)};
auto* out_dir = DorisFSDirectoryFactory::getDirectory(_fs,
index_path.parent_path().c_str());
@@ -489,15 +489,15 @@ InvertedIndexFileWriter::create_output_stream_v2() {
void
InvertedIndexFileWriter::write_version_and_indices_count(lucene::store::IndexOutput*
output) {
// Write the version number
- output->writeInt(InvertedIndexStorageFormatPB::V2);
+ output->writeInt(_storage_format);
// Write the number of indices
const auto num_indices = static_cast<uint32_t>(_indices_dirs.size());
output->writeInt(num_indices);
}
-std::vector<InvertedIndexFileWriter::FileMetadata>
-InvertedIndexFileWriter::prepare_file_metadata_v2(int64_t& current_offset) {
+std::vector<InvertedIndexFileWriter::FileMetadata>
InvertedIndexFileWriter::prepare_file_metadata(
+ int64_t& current_offset) {
std::vector<FileMetadata> file_metadata;
std::vector<FileMetadata> meta_files;
std::vector<FileMetadata> normal_files;
@@ -598,8 +598,8 @@ void
InvertedIndexFileWriter::write_index_headers_and_metadata(
}
}
-void InvertedIndexFileWriter::copy_files_data_v2(lucene::store::IndexOutput*
output,
- const
std::vector<FileMetadata>& file_metadata) {
+void InvertedIndexFileWriter::copy_files_data(lucene::store::IndexOutput*
output,
+ const std::vector<FileMetadata>&
file_metadata) {
const int64_t buffer_length = 16384;
uint8_t buffer[buffer_length];
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
index ba42ffdceb1..ab7cdbff152 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
@@ -71,7 +71,7 @@ public:
Status delete_index(const TabletIndex* index_meta);
Status initialize(InvertedIndexDirectoryMap& indices_dirs);
virtual ~InvertedIndexFileWriter() = default;
- Status write_v2();
+ Status write();
Status write_v1();
Status close();
const InvertedIndexFileInfo* get_index_file_info() const {
@@ -122,7 +122,7 @@ private:
// Helper functions specific to write_v2
virtual std::pair<std::unique_ptr<lucene::store::Directory,
DirectoryDeleter>,
std::unique_ptr<lucene::store::IndexOutput>>
- create_output_stream_v2();
+ create_output_stream();
void write_version_and_indices_count(lucene::store::IndexOutput* output);
struct FileMetadata {
int64_t index_id;
@@ -141,11 +141,11 @@ private:
length(len),
directory(dir) {}
};
- std::vector<FileMetadata> prepare_file_metadata_v2(int64_t&
current_offset);
+ std::vector<FileMetadata> prepare_file_metadata(int64_t& current_offset);
virtual void write_index_headers_and_metadata(lucene::store::IndexOutput*
output,
const
std::vector<FileMetadata>& file_metadata);
- void copy_files_data_v2(lucene::store::IndexOutput* output,
- const std::vector<FileMetadata>& file_metadata);
+ void copy_files_data(lucene::store::IndexOutput* output,
+ const std::vector<FileMetadata>& file_metadata);
Status _insert_directory_into_map(int64_t index_id, const std::string&
index_suffix,
std::shared_ptr<DorisFSDirectory> dir);
// Member variables...
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index d8688a34acc..633483e7ae3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -219,6 +219,28 @@ public:
(*field)->setOmitTermFreqAndPositions(
!(get_parser_phrase_support_string_from_properties(_index_meta->properties()) ==
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES));
+ DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::create_field_v3", {
+ if (_index_file_writer->get_storage_format() !=
InvertedIndexStorageFormatPB::V3) {
+ return
Status::Error<doris::ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+ "debug point:
InvertedIndexColumnWriterImpl::create_field_v3 error");
+ }
+ })
+ if (_index_file_writer->get_storage_format() >=
InvertedIndexStorageFormatPB::V3) {
+ (*field)->setIndexVersion(IndexVersion::kV3);
+ // Only effective in v3
+ std::string dict_compression =
+
get_parser_dict_compression_from_properties(_index_meta->properties());
+
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::create_field_dic_compression", {
+ if (dict_compression != INVERTED_INDEX_PARSER_TRUE) {
+ return
Status::Error<doris::ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+ "debug point: "
+
"InvertedIndexColumnWriterImpl::create_field_dic_compression error");
+ }
+ })
+ if (dict_compression == INVERTED_INDEX_PARSER_TRUE) {
+ (*field)->updateFlag(FlagBits::DICT_COMPRESS);
+ }
+ }
return Status::OK();
}
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 81113f593e5..ba20216e0c3 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -214,6 +214,9 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t
partition_id, int64_t tablet_id
case TInvertedIndexFileStorageFormat::V2:
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
break;
+ case TInvertedIndexFileStorageFormat::V3:
+
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3);
+ break;
default:
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
break;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
index 8fd66f47c24..49d4ce7c07b 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
@@ -596,7 +596,7 @@ TEST_F(InvertedIndexFileWriterTest,
WriteV2ExceptionHandlingTest) {
EXPECT_CALL(writer_mock, write_index_headers_and_metadata(::testing::_,
::testing::_))
.WillOnce(::testing::Throw(CLuceneError(CL_ERR_IO, "Simulated
exception", false)));
- Status status = writer_mock.write_v2();
+ Status status = writer_mock.write();
ASSERT_FALSE(status.ok());
ASSERT_EQ(status.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
}
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp
index d4b52e749c4..4a2eec8d85f 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_writer_test.cpp
@@ -350,7 +350,7 @@ public:
status = column_writer->finish();
EXPECT_TRUE(status.ok()) << status;
- status = index_file_writer->write_v2();
+ status = index_file_writer->write();
EXPECT_TRUE(status.ok()) << status;
// Verify the terms stats
@@ -418,7 +418,7 @@ public:
status = column_writer->finish();
EXPECT_TRUE(status.ok()) << status;
- status = index_file_writer->write_v2();
+ status = index_file_writer->write();
EXPECT_TRUE(status.ok()) << status;
// Verify the terms stats
@@ -484,7 +484,7 @@ public:
status = column_writer->finish();
EXPECT_TRUE(status.ok()) << status;
- status = index_file_writer->write_v2();
+ status = index_file_writer->write();
EXPECT_TRUE(status.ok()) << status;
// For BKD index, we need to verify using BkdIndexReader instead of
check_terms_stats
@@ -560,7 +560,7 @@ public:
status = column_writer->finish();
EXPECT_TRUE(status.ok()) << status;
- status = index_file_writer->write_v2();
+ status = index_file_writer->write();
EXPECT_TRUE(status.ok()) << status;
// Restore original config value
@@ -745,12 +745,12 @@ TEST_F(InvertedIndexWriterTest,
CompareUnicodeStringWriteResults) {
// Finish and close both writers
status = column_writer_enabled->finish();
EXPECT_TRUE(status.ok()) << status;
- status = index_file_writer_enabled->write_v2();
+ status = index_file_writer_enabled->write();
EXPECT_TRUE(status.ok()) << status;
status = column_writer_disabled->finish();
EXPECT_TRUE(status.ok()) << status;
- status = index_file_writer_disabled->write_v2();
+ status = index_file_writer_disabled->write();
EXPECT_TRUE(status.ok()) << status;
// Restore original config value
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index edf62f44d52..143c9f09d2a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -273,7 +273,8 @@ public class IndexDef {
}
if (indexType == IndexType.INVERTED) {
- InvertedIndexUtil.checkInvertedIndexParser(indexColName,
colType, properties);
+ InvertedIndexUtil.checkInvertedIndexParser(indexColName,
colType, properties,
+ invertedIndexFileStorageFormat);
} else if (indexType == IndexType.NGRAM_BF) {
if (colType != PrimitiveType.CHAR && colType !=
PrimitiveType.VARCHAR
&& colType != PrimitiveType.STRING) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
index 8c050cb8dc3..988416610e5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
@@ -19,6 +19,7 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
+import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
import java.util.Arrays;
import java.util.HashMap;
@@ -59,6 +60,8 @@ public class InvertedIndexUtil {
public static String INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords";
+ public static String INVERTED_INDEX_DICT_COMPRESSION_KEY =
"dict_compression";
+
public static String getInvertedIndexParser(Map<String, String>
properties) {
String parser = properties == null ? null :
properties.get(INVERTED_INDEX_PARSER_KEY);
// default is "none" if not set
@@ -122,11 +125,12 @@ public class InvertedIndexUtil {
}
public static void checkInvertedIndexParser(String indexColName,
PrimitiveType colType,
- Map<String, String> properties) throws AnalysisException {
+ Map<String, String> properties,
+ TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat)
throws AnalysisException {
String parser = null;
if (properties != null) {
parser = properties.get(INVERTED_INDEX_PARSER_KEY);
- checkInvertedIndexProperties(properties);
+ checkInvertedIndexProperties(properties, colType,
invertedIndexFileStorageFormat);
}
// default is "none" if not set
@@ -158,7 +162,8 @@ public class InvertedIndexUtil {
}
}
- public static void checkInvertedIndexProperties(Map<String, String>
properties) throws AnalysisException {
+ public static void checkInvertedIndexProperties(Map<String, String>
properties, PrimitiveType colType,
+ TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat)
throws AnalysisException {
Set<String> allowedKeys = new HashSet<>(Arrays.asList(
INVERTED_INDEX_PARSER_KEY,
INVERTED_INDEX_PARSER_MODE_KEY,
@@ -168,7 +173,8 @@ public class InvertedIndexUtil {
INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT,
INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY,
INVERTED_INDEX_PARSER_LOWERCASE_KEY,
- INVERTED_INDEX_PARSER_STOPWORDS_KEY
+ INVERTED_INDEX_PARSER_STOPWORDS_KEY,
+ INVERTED_INDEX_DICT_COMPRESSION_KEY
));
for (String key : properties.keySet()) {
@@ -185,6 +191,7 @@ public class InvertedIndexUtil {
String ignoreAbove =
properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY);
String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
String stopWords = properties.get(INVERTED_INDEX_PARSER_STOPWORDS_KEY);
+ String dictCompression =
properties.get(INVERTED_INDEX_DICT_COMPRESSION_KEY);
if (parser != null &&
!parser.matches("none|english|unicode|chinese|standard|icu|basic|ik")) {
throw new AnalysisException("Invalid inverted index 'parser'
value: " + parser
@@ -239,5 +246,23 @@ public class InvertedIndexUtil {
throw new AnalysisException("Invalid inverted index 'stopWords'
value: " + stopWords
+ ", stopWords must be none");
}
+
+ if (dictCompression != null) {
+ if (!colType.isStringType()) {
+ throw new AnalysisException("dict_compression can only be set
for StringType columns. type: "
+ + colType);
+ }
+
+ if (!dictCompression.matches("true|false")) {
+ throw new AnalysisException(
+ "Invalid inverted index 'dict_compression' value: "
+ + dictCompression + ", dict_compression must
be true or false");
+ }
+
+ if (invertedIndexFileStorageFormat !=
TInvertedIndexFileStorageFormat.V3) {
+ throw new AnalysisException(
+ "dict_compression can only be set when storage format
is V3");
+ }
+ }
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
index aa3286a74ae..bcb17d3c039 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java
@@ -335,8 +335,12 @@ public class CloudInternalCatalog extends InternalCatalog {
if (invertedIndexFileStorageFormat != null) {
if (invertedIndexFileStorageFormat ==
TInvertedIndexFileStorageFormat.V1) {
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V1);
- } else {
+ } else if (invertedIndexFileStorageFormat ==
TInvertedIndexFileStorageFormat.V2) {
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V2);
+ } else if (invertedIndexFileStorageFormat ==
TInvertedIndexFileStorageFormat.V3) {
+
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V3);
+ } else {
+ throw new DdlException("invalid inverted index storage
format");
}
}
schemaBuilder.setRowStorePageSize(pageSize);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
index b873cd8e7cf..68b1535c18d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
@@ -1146,6 +1146,8 @@ public class PropertyAnalyzer {
} else {
if (Config.inverted_index_storage_format.equalsIgnoreCase("V1")) {
return TInvertedIndexFileStorageFormat.V1;
+ } else if
(Config.inverted_index_storage_format.equalsIgnoreCase("V3")) {
+ return TInvertedIndexFileStorageFormat.V3;
} else {
return TInvertedIndexFileStorageFormat.V2;
}
@@ -1155,9 +1157,13 @@ public class PropertyAnalyzer {
return TInvertedIndexFileStorageFormat.V1;
} else if (invertedIndexFileStorageFormat.equalsIgnoreCase("v2")) {
return TInvertedIndexFileStorageFormat.V2;
+ } else if (invertedIndexFileStorageFormat.equalsIgnoreCase("v3")) {
+ return TInvertedIndexFileStorageFormat.V3;
} else if (invertedIndexFileStorageFormat.equalsIgnoreCase("default"))
{
if (Config.inverted_index_storage_format.equalsIgnoreCase("V1")) {
return TInvertedIndexFileStorageFormat.V1;
+ } else if
(Config.inverted_index_storage_format.equalsIgnoreCase("V3")) {
+ return TInvertedIndexFileStorageFormat.V3;
} else {
return TInvertedIndexFileStorageFormat.V2;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
index b0f97e2fc27..da63e4fb5c4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@@ -156,7 +156,8 @@ public class IndexDefinition {
if (indexType == IndexType.INVERTED) {
try {
InvertedIndexUtil.checkInvertedIndexParser(indexColName,
- colType.toCatalogDataType().getPrimitiveType(),
properties);
+ colType.toCatalogDataType().getPrimitiveType(),
properties,
+ invertedIndexFileStorageFormat);
} catch (Exception ex) {
throw new AnalysisException("invalid INVERTED index:" +
ex.getMessage(), ex);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/InvertedIndexUtilTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/InvertedIndexUtilTest.java
new file mode 100644
index 00000000000..6f1880c5bb6
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/analysis/InvertedIndexUtilTest.java
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class InvertedIndexUtilTest {
+ @Test
+ public void testCheckInvertedIndexProperties() throws AnalysisException {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"true");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat =
TInvertedIndexFileStorageFormat.V3;
+ InvertedIndexUtil.checkInvertedIndexProperties(properties, colType,
invertedIndexFileStorageFormat);
+
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"invalid_value");
+ try {
+ InvertedIndexUtil.checkInvertedIndexProperties(properties,
colType, invertedIndexFileStorageFormat);
+ Assertions.fail("Expected AnalysisException was not thrown");
+ } catch (AnalysisException e) {
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = Invalid inverted index
'dict_compression' value: invalid_value, "
+ + "dict_compression must be true or false",
+ e.getMessage());
+ }
+ }
+
+ @Test
+ public void testDictCompressionValidTrue() throws AnalysisException {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"true");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V3;
+
+ InvertedIndexUtil.checkInvertedIndexProperties(properties, colType,
storageFormat);
+ }
+
+ @Test
+ public void testDictCompressionValidFalse() throws AnalysisException {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"false");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V3;
+
+ InvertedIndexUtil.checkInvertedIndexProperties(properties, colType,
storageFormat);
+ }
+
+ @Test
+ public void testDictCompressionNonStringType() {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"true");
+
+ PrimitiveType colType = PrimitiveType.INT;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V3;
+
+ AnalysisException thrown =
Assertions.assertThrows(AnalysisException.class, () -> {
+ InvertedIndexUtil.checkInvertedIndexProperties(properties,
colType, storageFormat);
+ });
+
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = dict_compression can only be set
for StringType columns. type: INT",
+ thrown.getMessage()
+ );
+ }
+
+ @Test
+ public void testDictCompressionInvalidStorageFormat() {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"true");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V2;
+
+ AnalysisException thrown =
Assertions.assertThrows(AnalysisException.class, () -> {
+ InvertedIndexUtil.checkInvertedIndexProperties(properties,
colType, storageFormat);
+ });
+
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = dict_compression can only be set
when storage format is V3",
+ thrown.getMessage()
+ );
+ }
+
+ @Test
+ public void testDictCompressionInvalidValue() {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"invalid_value");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V3;
+
+ AnalysisException thrown =
Assertions.assertThrows(AnalysisException.class, () -> {
+ InvertedIndexUtil.checkInvertedIndexProperties(properties,
colType, storageFormat);
+ });
+
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = Invalid inverted index
'dict_compression' value: invalid_value, "
+ + "dict_compression must be true or false",
+ thrown.getMessage()
+ );
+ }
+
+ @Test
+ public void testDictCompressionCaseSensitivity() throws AnalysisException {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"True");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V3;
+
+ AnalysisException thrown =
Assertions.assertThrows(AnalysisException.class, () -> {
+ InvertedIndexUtil.checkInvertedIndexProperties(properties,
colType, storageFormat);
+ });
+
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = Invalid inverted index
'dict_compression' value: True, "
+ + "dict_compression must be true or false",
+ thrown.getMessage()
+ );
+ }
+
+ @Test
+ public void testDictCompressionNullValue() throws AnalysisException {
+ Map<String, String> properties = new HashMap<>();
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V3;
+
+ InvertedIndexUtil.checkInvertedIndexProperties(properties, colType,
storageFormat);
+ }
+
+ @Test
+ public void testDictCompressionWhenStorageFormatIsNull() {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"true");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat = null;
+
+ AnalysisException thrown =
Assertions.assertThrows(AnalysisException.class, () -> {
+ InvertedIndexUtil.checkInvertedIndexProperties(properties,
colType, storageFormat);
+ });
+
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = dict_compression can only be set
when storage format is V3",
+ thrown.getMessage()
+ );
+ }
+
+ @Test
+ public void testDictCompressionWithNonV3AndValidValue() {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY,
"false");
+
+ PrimitiveType colType = PrimitiveType.STRING;
+ TInvertedIndexFileStorageFormat storageFormat =
TInvertedIndexFileStorageFormat.V2;
+
+ AnalysisException thrown =
Assertions.assertThrows(AnalysisException.class, () -> {
+ InvertedIndexUtil.checkInvertedIndexProperties(properties,
colType, storageFormat);
+ });
+
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = dict_compression can only be set
when storage format is V3",
+ thrown.getMessage()
+ );
+ }
+}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
index 041ca89bfc5..6d708aa0826 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
@@ -28,6 +28,7 @@ import org.apache.doris.catalog.Type;
import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.resource.Tag;
+import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
import org.apache.doris.thrift.TStorageFormat;
import org.apache.doris.thrift.TStorageMedium;
@@ -37,6 +38,7 @@ import com.google.common.collect.Sets;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
import org.junit.rules.ExpectedException;
import java.time.Instant;
@@ -236,4 +238,52 @@ public class PropertyAnalyzerTest {
Assert.assertTrue(e.getMessage().contains("Storage page size must
be between 4KB and 10MB"));
}
}
+
+ @Test
+ public void testAnalyzeInvertedIndexFileStorageFormat() throws
AnalysisException {
+ TInvertedIndexFileStorageFormat result =
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(null);
+ Assertions.assertEquals(TInvertedIndexFileStorageFormat.V2, result);
+
+ Config.inverted_index_storage_format = "V1";
+ result = PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(new
HashMap<>());
+ Assertions.assertEquals(TInvertedIndexFileStorageFormat.V1, result);
+
+ Map<String, String> propertiesWithV1 = new HashMap<>();
+
propertiesWithV1.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
"v1");
+ result =
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithV1);
+ Assertions.assertEquals(TInvertedIndexFileStorageFormat.V1, result);
+
+ Map<String, String> propertiesWithV2 = new HashMap<>();
+
propertiesWithV2.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
"v2");
+ result =
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithV2);
+ Assertions.assertEquals(TInvertedIndexFileStorageFormat.V2, result);
+
+ Map<String, String> propertiesWithV3 = new HashMap<>();
+
propertiesWithV3.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
"v3");
+ result =
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithV3);
+ Assertions.assertEquals(TInvertedIndexFileStorageFormat.V3, result);
+
+ Config.inverted_index_storage_format = "V1";
+ Map<String, String> propertiesWithDefaultV1 = new HashMap<>();
+
propertiesWithDefaultV1.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
"default");
+ result =
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithDefaultV1);
+ Assertions.assertEquals(TInvertedIndexFileStorageFormat.V1, result);
+
+ Config.inverted_index_storage_format = "V2";
+ Map<String, String> propertiesWithDefaultV2 = new HashMap<>();
+
propertiesWithDefaultV2.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
"default");
+ result =
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithDefaultV2);
+ Assertions.assertEquals(TInvertedIndexFileStorageFormat.V2, result);
+
+ Map<String, String> propertiesWithUnknown = new HashMap<>();
+
propertiesWithUnknown.put(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT,
"unknown_format");
+ try {
+
PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(propertiesWithUnknown);
+ Assertions.fail("Expected AnalysisException was not thrown");
+ } catch (AnalysisException e) {
+ Assertions.assertEquals(
+ "errCode = 2, detailMessage = unknown inverted index
storage format: unknown_format",
+ e.getMessage());
+ }
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
index 93caa559cc2..a02cfe1338e 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
@@ -47,7 +47,8 @@ public class IndexDefinitionTest {
null, "comment");
try {
def.checkColumn(new ColumnDefinition("col1", VariantType.INSTANCE,
false, AggregateType.NONE, true,
- null, "comment"),
KeysType.UNIQUE_KEYS, true, TInvertedIndexFileStorageFormat.V1);
+ null, "comment"),
KeysType.UNIQUE_KEYS, true,
+
TInvertedIndexFileStorageFormat.V1);
Assertions.fail("No exception throws.");
} catch (AnalysisException e) {
org.junit.jupiter.api.Assertions.assertInstanceOf(
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 92d6a9fe3b8..d62c9df5073 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -356,6 +356,7 @@ enum IndexType {
enum InvertedIndexStorageFormatPB {
V1 = 0;
V2 = 1;
+ V3 = 2;
}
message TabletIndexPB {
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index 58716f51f3f..887031611d7 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -122,9 +122,10 @@ enum TStorageBackendType {
// This enum is used to distinguish between different organizational methods
// of inverted index data, affecting how the index is stored and accessed.
enum TInvertedIndexFileStorageFormat {
- DEFAULT, // Default format, unspecified storage method.
- V1, // Index per idx: Each index is stored separately based on its
identifier.
- V2 // Segment id per idx: Indexes are organized based on segment
identifiers, grouping indexes by their associated segment.
+ DEFAULT = 0, // Default format, unspecified storage method.
+ V1 = 1, // Index per idx: Each index is stored separately based on
its identifier.
+ V2 = 2 // Segment id per idx: Indexes are organized based on segment
identifiers, grouping indexes by their associated segment.
+ V3 = 3 // Position and dictionary compression
}
struct TScalarType {
diff --git a/regression-test/data/inverted_index_p0/test_inverted_index_v3.out
b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out
new file mode 100644
index 00000000000..9dc20f3e0e0
Binary files /dev/null and
b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out differ
diff --git
a/regression-test/suites/fault_injection_p0/test_inverted_index_v3_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_inverted_index_v3_fault_injection.groovy
new file mode 100644
index 00000000000..98c0e110964
--- /dev/null
+++
b/regression-test/suites/fault_injection_p0/test_inverted_index_v3_fault_injection.groovy
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_inverted_index_v3_fault_injection", "nonConcurrent"){
+ def indexTbName1 = "test_inverted_index_v3_fault_injection"
+
+ sql "DROP TABLE IF EXISTS ${indexTbName1}"
+
+ sql """
+ CREATE TABLE ${indexTbName1} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` varchar(20) NULL COMMENT "",
+ `request` text NULL COMMENT "",
+ `status` int(11) NULL COMMENT "",
+ `size` int(11) NULL COMMENT "",
+ INDEX clientip_idx (`clientip`) COMMENT '',
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "inverted_index_storage_format" = "V3"
+ );
+ """
+
+ try {
+
GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+
+ sql """ INSERT INTO ${indexTbName1} VALUES (1, '40.135.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 200, 24736); """
+ } finally {
+
GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+ }
+
+ try {
+
GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+
GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_dic_compression")
+
+ sql """ INSERT INTO ${indexTbName1} VALUES (2, '40.135.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 200, 24736); """
+ } finally {
+
GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_v3")
+
GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::create_field_dic_compression")
+ }
+}
\ No newline at end of file
diff --git
a/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy
b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy
new file mode 100644
index 00000000000..ea7dd0b595f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_inverted_index_v3", "p0"){
+ def indexTbName1 = "test_inverted_index_v3_1"
+ def indexTbName2 = "test_inverted_index_v3_2"
+
+ sql "DROP TABLE IF EXISTS ${indexTbName1}"
+ sql "DROP TABLE IF EXISTS ${indexTbName2}"
+
+ sql """
+ CREATE TABLE ${indexTbName1} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` varchar(20) NULL COMMENT "",
+ `request` text NULL COMMENT "",
+ `status` int(11) NULL COMMENT "",
+ `size` int(11) NULL COMMENT "",
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "inverted_index_storage_format" = "V2"
+ );
+ """
+
+ sql """
+ CREATE TABLE ${indexTbName2} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` varchar(20) NULL COMMENT "",
+ `request` text NULL COMMENT "",
+ `status` int(11) NULL COMMENT "",
+ `size` int(11) NULL COMMENT "",
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "inverted_index_storage_format" = "V3"
+ );
+ """
+
+ def load_httplogs_data = {table_name, label, read_flag, format_flag,
file_name, ignore_failure=false,
+ expected_succ_rows = -1, load_to_single_tablet =
'true' ->
+
+ // load the json data
+ streamLoad {
+ table "${table_name}"
+
+ // set http request header params
+ set 'label', label + "_" + UUID.randomUUID().toString()
+ set 'read_json_by_line', read_flag
+ set 'format', format_flag
+ file file_name // import json file
+ time 10000 // limit inflight 10s
+ if (expected_succ_rows >= 0) {
+ set 'max_filter_ratio', '1'
+ }
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (ignore_failure && expected_succ_rows < 0) { return }
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ if (expected_succ_rows >= 0) {
+ assertEquals(json.NumberLoadedRows, expected_succ_rows)
+ } else {
+ assertEquals(json.NumberTotalRows,
json.NumberLoadedRows + json.NumberUnselectedRows)
+ assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes
> 0)
+ }
+ }
+ }
+ }
+
+ try {
+ load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json',
'documents-1000.json')
+ load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json',
'documents-1000.json')
+
+ sql "sync"
+
+ qt_sql """ select count() from ${indexTbName1} where request match_any
'hm bg'; """
+ qt_sql """ select count() from ${indexTbName1} where request match_all
'hm bg'; """
+ qt_sql """ select count() from ${indexTbName1} where request
match_phrase 'hm bg'; """
+ qt_sql """ select count() from ${indexTbName1} where request
match_phrase_prefix 'hm bg'; """
+
+ qt_sql """ select count() from ${indexTbName2} where request match_any
'hm bg'; """
+ qt_sql """ select count() from ${indexTbName2} where request match_all
'hm bg'; """
+ qt_sql """ select count() from ${indexTbName2} where request
match_phrase 'hm bg'; """
+ qt_sql """ select count() from ${indexTbName2} where request
match_phrase_prefix 'hm bg'; """
+
+ } finally {
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]