This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 78f318d9fa0 [test](inverted index) add ut for index parser (#52001)
78f318d9fa0 is described below
commit 78f318d9fa083239f49fff5ec2b051683b3fda01
Author: airborne12 <[email protected]>
AuthorDate: Fri Jun 20 13:02:14 2025 +0800
[test](inverted index) add ut for index parser (#52001)
add ut case for inverted index parser
---
be/test/olap/inverted_index_parser_test.cpp | 320 ++++++++++++++++++++++++++++
1 file changed, 320 insertions(+)
diff --git a/be/test/olap/inverted_index_parser_test.cpp
b/be/test/olap/inverted_index_parser_test.cpp
new file mode 100644
index 00000000000..5b62b8fc4b3
--- /dev/null
+++ b/be/test/olap/inverted_index_parser_test.cpp
@@ -0,0 +1,320 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/inverted_index_parser.h"
+
+#include <gtest/gtest.h>
+
+#include <map>
+#include <string>
+
+namespace doris {
+
+class InvertedIndexParserTest : public testing::Test {
+public:
+ void SetUp() override {}
+ void TearDown() override {}
+};
+
+// Test inverted_index_parser_type_to_string function
+TEST_F(InvertedIndexParserTest, TestParserTypeToString) {
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_NONE),
+ INVERTED_INDEX_PARSER_NONE);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_STANDARD),
+ INVERTED_INDEX_PARSER_STANDARD);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_UNICODE),
+ INVERTED_INDEX_PARSER_UNICODE);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_ENGLISH),
+ INVERTED_INDEX_PARSER_ENGLISH);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_CHINESE),
+ INVERTED_INDEX_PARSER_CHINESE);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_ICU),
+ INVERTED_INDEX_PARSER_ICU);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_BASIC),
+ INVERTED_INDEX_PARSER_BASIC);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_IK),
+ INVERTED_INDEX_PARSER_IK);
+
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_UNKNOWN),
+ INVERTED_INDEX_PARSER_UNKNOWN);
+}
+
+// Test get_inverted_index_parser_type_from_string function
+TEST_F(InvertedIndexParserTest, TestGetParserTypeFromString) {
+ // Test all valid parser types (case insensitive)
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("none"),
+ InvertedIndexParserType::PARSER_NONE);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("NONE"),
+ InvertedIndexParserType::PARSER_NONE);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("standard"),
+ InvertedIndexParserType::PARSER_STANDARD);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("Standard"),
+ InvertedIndexParserType::PARSER_STANDARD);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("unicode"),
+ InvertedIndexParserType::PARSER_UNICODE);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("english"),
+ InvertedIndexParserType::PARSER_ENGLISH);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("chinese"),
+ InvertedIndexParserType::PARSER_CHINESE);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("icu"),
+ InvertedIndexParserType::PARSER_ICU);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("basic"),
+ InvertedIndexParserType::PARSER_BASIC);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("ik"),
InvertedIndexParserType::PARSER_IK);
+
+ // Test unknown parser type
+ EXPECT_EQ(get_inverted_index_parser_type_from_string("invalid"),
+ InvertedIndexParserType::PARSER_UNKNOWN);
+ EXPECT_EQ(get_inverted_index_parser_type_from_string(""),
+ InvertedIndexParserType::PARSER_UNKNOWN);
+}
+
+// Test get_parser_string_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserStringFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties
+ EXPECT_EQ(get_parser_string_from_properties(properties),
INVERTED_INDEX_PARSER_NONE);
+
+ // Test with parser key present
+ properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_ENGLISH;
+ EXPECT_EQ(get_parser_string_from_properties(properties),
INVERTED_INDEX_PARSER_ENGLISH);
+
+ // Test with different parser value
+ properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_CHINESE;
+ EXPECT_EQ(get_parser_string_from_properties(properties),
INVERTED_INDEX_PARSER_CHINESE);
+}
+
+// Test get_parser_mode_string_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserModeStringFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties
+ EXPECT_EQ(get_parser_mode_string_from_properties(properties),
+ INVERTED_INDEX_PARSER_COARSE_GRANULARITY);
+
+ // Test with parser_mode key present
+ properties[INVERTED_INDEX_PARSER_MODE_KEY] =
INVERTED_INDEX_PARSER_FINE_GRANULARITY;
+ EXPECT_EQ(get_parser_mode_string_from_properties(properties),
+ INVERTED_INDEX_PARSER_FINE_GRANULARITY);
+
+ // Test with IK parser (should return smart mode when no mode specified)
+ properties.clear();
+ properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_IK;
+ EXPECT_EQ(get_parser_mode_string_from_properties(properties),
INVERTED_INDEX_PARSER_SMART);
+
+ // Test with non-IK parser (should return coarse granularity)
+ properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_ENGLISH;
+ EXPECT_EQ(get_parser_mode_string_from_properties(properties),
+ INVERTED_INDEX_PARSER_COARSE_GRANULARITY);
+}
+
+// Test get_parser_phrase_support_string_from_properties function
+TEST_F(InvertedIndexParserTest,
TestGetParserPhraseSupportStringFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties
+ EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties),
+ INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO);
+
+ // Test with phrase support key present
+ properties[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] =
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES;
+ EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties),
+ INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
+
+ properties[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] =
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO;
+ EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties),
+ INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO);
+}
+
+// Test get_parser_char_filter_map_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserCharFilterMapFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties
+ CharFilterMap result =
get_parser_char_filter_map_from_properties(properties);
+ EXPECT_TRUE(result.empty());
+
+ // Test with missing char_filter_type
+ properties["some_key"] = "some_value";
+ result = get_parser_char_filter_map_from_properties(properties);
+ EXPECT_TRUE(result.empty());
+
+ // Test with valid char_replace filter but missing pattern
+ properties.clear();
+ properties[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = "char_replace";
+ result = get_parser_char_filter_map_from_properties(properties);
+ EXPECT_TRUE(result.empty());
+
+ // Test with valid char_replace filter and pattern
+ properties[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN] = "._";
+ result = get_parser_char_filter_map_from_properties(properties);
+ EXPECT_EQ(result.size(), 3);
+ EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE], "char_replace");
+ EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN], "._");
+ EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], " "); //
default replacement
+
+ // Test with custom replacement
+ properties[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT] = "-";
+ result = get_parser_char_filter_map_from_properties(properties);
+ EXPECT_EQ(result.size(), 3);
+ EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], "-");
+
+ // Test with invalid filter type
+ properties.clear();
+ properties[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = "invalid_type";
+ result = get_parser_char_filter_map_from_properties(properties);
+ EXPECT_TRUE(result.empty());
+}
+
+// Test get_parser_ignore_above_value_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserIgnoreAboveValueFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties
+ EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties),
+ INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE);
+
+ // Test with ignore_above key present
+ properties[INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY] = "512";
+ EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties),
"512");
+
+ properties[INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY] = "1024";
+ EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties),
"1024");
+}
+
+// Test get_parser_lowercase_from_properties function (template function)
+TEST_F(InvertedIndexParserTest, TestGetParserLowercaseFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties (default template parameter false)
+ EXPECT_EQ(get_parser_lowercase_from_properties(properties), "");
+
+ // Test with empty properties (template parameter true)
+ EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties),
INVERTED_INDEX_PARSER_TRUE);
+
+ // Test with lower_case key present
+ properties[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
INVERTED_INDEX_PARSER_TRUE;
+ EXPECT_EQ(get_parser_lowercase_from_properties(properties),
INVERTED_INDEX_PARSER_TRUE);
+ EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties),
INVERTED_INDEX_PARSER_TRUE);
+
+ properties[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
INVERTED_INDEX_PARSER_FALSE;
+ EXPECT_EQ(get_parser_lowercase_from_properties(properties),
INVERTED_INDEX_PARSER_FALSE);
+ EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties),
INVERTED_INDEX_PARSER_FALSE);
+}
+
+// Test get_parser_stopwords_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserStopwordsFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties
+ EXPECT_EQ(get_parser_stopwords_from_properties(properties), "");
+
+ // Test with stopwords key present
+ properties[INVERTED_INDEX_PARSER_STOPWORDS_KEY] = "a,an,the";
+ EXPECT_EQ(get_parser_stopwords_from_properties(properties), "a,an,the");
+
+ properties[INVERTED_INDEX_PARSER_STOPWORDS_KEY] = "";
+ EXPECT_EQ(get_parser_stopwords_from_properties(properties), "");
+}
+
+// Test get_parser_dict_compression_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserDictCompressionFromProperties) {
+ std::map<std::string, std::string> properties;
+
+ // Test with empty properties
+ EXPECT_EQ(get_parser_dict_compression_from_properties(properties), "");
+
+ // Test with dict_compression key present
+ properties[INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY] = "true";
+ EXPECT_EQ(get_parser_dict_compression_from_properties(properties), "true");
+
+ properties[INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY] = "false";
+ EXPECT_EQ(get_parser_dict_compression_from_properties(properties),
"false");
+}
+
+// Test InvertedIndexCtx structure
+TEST_F(InvertedIndexParserTest, TestInvertedIndexCtxStructure) {
+ InvertedIndexCtx ctx;
+
+ // Test default initialization
+ ctx.parser_type = InvertedIndexParserType::PARSER_ENGLISH;
+ ctx.parser_mode = INVERTED_INDEX_PARSER_FINE_GRANULARITY;
+ ctx.lower_case = INVERTED_INDEX_PARSER_TRUE;
+ ctx.stop_words = "a,an,the";
+ ctx.analyzer = nullptr;
+
+ EXPECT_EQ(ctx.parser_type, InvertedIndexParserType::PARSER_ENGLISH);
+ EXPECT_EQ(ctx.parser_mode, INVERTED_INDEX_PARSER_FINE_GRANULARITY);
+ EXPECT_EQ(ctx.lower_case, INVERTED_INDEX_PARSER_TRUE);
+ EXPECT_EQ(ctx.stop_words, "a,an,the");
+ EXPECT_EQ(ctx.analyzer, nullptr);
+
+ // Test char_filter_map
+ ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] =
"char_replace";
+ ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN] = "._";
+ ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT] = " ";
+
+ EXPECT_EQ(ctx.char_filter_map.size(), 3);
+ EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE],
"char_replace");
+ EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN],
"._");
+
EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], "
");
+}
+
+// Test constants
+TEST_F(InvertedIndexParserTest, TestConstants) {
+ // Test parser constants
+ EXPECT_EQ(INVERTED_INDEX_PARSER_UNKNOWN, "unknown");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_NONE, "none");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_STANDARD, "standard");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_UNICODE, "unicode");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_ENGLISH, "english");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_CHINESE, "chinese");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_ICU, "icu");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_BASIC, "basic");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_IK, "ik");
+
+ // Test mode constants
+ EXPECT_EQ(INVERTED_INDEX_PARSER_FINE_GRANULARITY, "fine_grained");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_COARSE_GRANULARITY, "coarse_grained");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_MAX_WORD, "ik_max_word");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_SMART, "ik_smart");
+
+ // Test boolean constants
+ EXPECT_EQ(INVERTED_INDEX_PARSER_TRUE, "true");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_FALSE, "false");
+
+ // Test phrase support constants
+ EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES, "true");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO, "false");
+
+ // Test key constants
+ EXPECT_EQ(INVERTED_INDEX_PARSER_KEY, "parser");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_MODE_KEY, "parser_mode");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY, "support_phrase");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_LOWERCASE_KEY, "lower_case");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_STOPWORDS_KEY, "stopwords");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY, "dict_compression");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY, "ignore_above");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE, "256");
+
+ // Test char filter constants
+ EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE, "char_filter_type");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN,
"char_filter_pattern");
+ EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT,
"char_filter_replacement");
+}
+
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]