This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 78f318d9fa0 [test](inverted index) add ut for index parser (#52001)
78f318d9fa0 is described below

commit 78f318d9fa083239f49fff5ec2b051683b3fda01
Author: airborne12 <[email protected]>
AuthorDate: Fri Jun 20 13:02:14 2025 +0800

    [test](inverted index) add ut for index parser (#52001)
    
    add ut case for inverted index parser
---
 be/test/olap/inverted_index_parser_test.cpp | 320 ++++++++++++++++++++++++++++
 1 file changed, 320 insertions(+)

diff --git a/be/test/olap/inverted_index_parser_test.cpp 
b/be/test/olap/inverted_index_parser_test.cpp
new file mode 100644
index 00000000000..5b62b8fc4b3
--- /dev/null
+++ b/be/test/olap/inverted_index_parser_test.cpp
@@ -0,0 +1,320 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/inverted_index_parser.h"
+
+#include <gtest/gtest.h>
+
+#include <map>
+#include <string>
+
+namespace doris {
+
+class InvertedIndexParserTest : public testing::Test {
+public:
+    void SetUp() override {}
+    void TearDown() override {}
+};
+
+// Test inverted_index_parser_type_to_string function
+TEST_F(InvertedIndexParserTest, TestParserTypeToString) {
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_NONE),
+              INVERTED_INDEX_PARSER_NONE);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_STANDARD),
+              INVERTED_INDEX_PARSER_STANDARD);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_UNICODE),
+              INVERTED_INDEX_PARSER_UNICODE);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_ENGLISH),
+              INVERTED_INDEX_PARSER_ENGLISH);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_CHINESE),
+              INVERTED_INDEX_PARSER_CHINESE);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_ICU),
+              INVERTED_INDEX_PARSER_ICU);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_BASIC),
+              INVERTED_INDEX_PARSER_BASIC);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_IK),
+              INVERTED_INDEX_PARSER_IK);
+    
EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_UNKNOWN),
+              INVERTED_INDEX_PARSER_UNKNOWN);
+}
+
+// Test get_inverted_index_parser_type_from_string function
+TEST_F(InvertedIndexParserTest, TestGetParserTypeFromString) {
+    // Test all valid parser types (case insensitive)
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("none"),
+              InvertedIndexParserType::PARSER_NONE);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("NONE"),
+              InvertedIndexParserType::PARSER_NONE);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("standard"),
+              InvertedIndexParserType::PARSER_STANDARD);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("Standard"),
+              InvertedIndexParserType::PARSER_STANDARD);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("unicode"),
+              InvertedIndexParserType::PARSER_UNICODE);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("english"),
+              InvertedIndexParserType::PARSER_ENGLISH);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("chinese"),
+              InvertedIndexParserType::PARSER_CHINESE);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("icu"),
+              InvertedIndexParserType::PARSER_ICU);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("basic"),
+              InvertedIndexParserType::PARSER_BASIC);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("ik"), 
InvertedIndexParserType::PARSER_IK);
+
+    // Test unknown parser type
+    EXPECT_EQ(get_inverted_index_parser_type_from_string("invalid"),
+              InvertedIndexParserType::PARSER_UNKNOWN);
+    EXPECT_EQ(get_inverted_index_parser_type_from_string(""),
+              InvertedIndexParserType::PARSER_UNKNOWN);
+}
+
+// Test get_parser_string_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserStringFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties
+    EXPECT_EQ(get_parser_string_from_properties(properties), 
INVERTED_INDEX_PARSER_NONE);
+
+    // Test with parser key present
+    properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_ENGLISH;
+    EXPECT_EQ(get_parser_string_from_properties(properties), 
INVERTED_INDEX_PARSER_ENGLISH);
+
+    // Test with different parser value
+    properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_CHINESE;
+    EXPECT_EQ(get_parser_string_from_properties(properties), 
INVERTED_INDEX_PARSER_CHINESE);
+}
+
+// Test get_parser_mode_string_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserModeStringFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties
+    EXPECT_EQ(get_parser_mode_string_from_properties(properties),
+              INVERTED_INDEX_PARSER_COARSE_GRANULARITY);
+
+    // Test with parser_mode key present
+    properties[INVERTED_INDEX_PARSER_MODE_KEY] = 
INVERTED_INDEX_PARSER_FINE_GRANULARITY;
+    EXPECT_EQ(get_parser_mode_string_from_properties(properties),
+              INVERTED_INDEX_PARSER_FINE_GRANULARITY);
+
+    // Test with IK parser (should return smart mode when no mode specified)
+    properties.clear();
+    properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_IK;
+    EXPECT_EQ(get_parser_mode_string_from_properties(properties), 
INVERTED_INDEX_PARSER_SMART);
+
+    // Test with non-IK parser (should return coarse granularity)
+    properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_ENGLISH;
+    EXPECT_EQ(get_parser_mode_string_from_properties(properties),
+              INVERTED_INDEX_PARSER_COARSE_GRANULARITY);
+}
+
+// Test get_parser_phrase_support_string_from_properties function
+TEST_F(InvertedIndexParserTest, 
TestGetParserPhraseSupportStringFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties
+    EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties),
+              INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO);
+
+    // Test with phrase support key present
+    properties[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] = 
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES;
+    EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties),
+              INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
+
+    properties[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] = 
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO;
+    EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties),
+              INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO);
+}
+
+// Test get_parser_char_filter_map_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserCharFilterMapFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties
+    CharFilterMap result = 
get_parser_char_filter_map_from_properties(properties);
+    EXPECT_TRUE(result.empty());
+
+    // Test with missing char_filter_type
+    properties["some_key"] = "some_value";
+    result = get_parser_char_filter_map_from_properties(properties);
+    EXPECT_TRUE(result.empty());
+
+    // Test with valid char_replace filter but missing pattern
+    properties.clear();
+    properties[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = "char_replace";
+    result = get_parser_char_filter_map_from_properties(properties);
+    EXPECT_TRUE(result.empty());
+
+    // Test with valid char_replace filter and pattern
+    properties[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN] = "._";
+    result = get_parser_char_filter_map_from_properties(properties);
+    EXPECT_EQ(result.size(), 3);
+    EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE], "char_replace");
+    EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN], "._");
+    EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], " "); // 
default replacement
+
+    // Test with custom replacement
+    properties[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT] = "-";
+    result = get_parser_char_filter_map_from_properties(properties);
+    EXPECT_EQ(result.size(), 3);
+    EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], "-");
+
+    // Test with invalid filter type
+    properties.clear();
+    properties[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = "invalid_type";
+    result = get_parser_char_filter_map_from_properties(properties);
+    EXPECT_TRUE(result.empty());
+}
+
+// Test get_parser_ignore_above_value_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserIgnoreAboveValueFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties
+    EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties),
+              INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE);
+
+    // Test with ignore_above key present
+    properties[INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY] = "512";
+    EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties), 
"512");
+
+    properties[INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY] = "1024";
+    EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties), 
"1024");
+}
+
+// Test get_parser_lowercase_from_properties function (template function)
+TEST_F(InvertedIndexParserTest, TestGetParserLowercaseFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties (default template parameter false)
+    EXPECT_EQ(get_parser_lowercase_from_properties(properties), "");
+
+    // Test with empty properties (template parameter true)
+    EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties), 
INVERTED_INDEX_PARSER_TRUE);
+
+    // Test with lower_case key present
+    properties[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = 
INVERTED_INDEX_PARSER_TRUE;
+    EXPECT_EQ(get_parser_lowercase_from_properties(properties), 
INVERTED_INDEX_PARSER_TRUE);
+    EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties), 
INVERTED_INDEX_PARSER_TRUE);
+
+    properties[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = 
INVERTED_INDEX_PARSER_FALSE;
+    EXPECT_EQ(get_parser_lowercase_from_properties(properties), 
INVERTED_INDEX_PARSER_FALSE);
+    EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties), 
INVERTED_INDEX_PARSER_FALSE);
+}
+
+// Test get_parser_stopwords_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserStopwordsFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties
+    EXPECT_EQ(get_parser_stopwords_from_properties(properties), "");
+
+    // Test with stopwords key present
+    properties[INVERTED_INDEX_PARSER_STOPWORDS_KEY] = "a,an,the";
+    EXPECT_EQ(get_parser_stopwords_from_properties(properties), "a,an,the");
+
+    properties[INVERTED_INDEX_PARSER_STOPWORDS_KEY] = "";
+    EXPECT_EQ(get_parser_stopwords_from_properties(properties), "");
+}
+
+// Test get_parser_dict_compression_from_properties function
+TEST_F(InvertedIndexParserTest, TestGetParserDictCompressionFromProperties) {
+    std::map<std::string, std::string> properties;
+
+    // Test with empty properties
+    EXPECT_EQ(get_parser_dict_compression_from_properties(properties), "");
+
+    // Test with dict_compression key present
+    properties[INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY] = "true";
+    EXPECT_EQ(get_parser_dict_compression_from_properties(properties), "true");
+
+    properties[INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY] = "false";
+    EXPECT_EQ(get_parser_dict_compression_from_properties(properties), 
"false");
+}
+
+// Test InvertedIndexCtx structure
+TEST_F(InvertedIndexParserTest, TestInvertedIndexCtxStructure) {
+    InvertedIndexCtx ctx;
+
+    // Test default initialization
+    ctx.parser_type = InvertedIndexParserType::PARSER_ENGLISH;
+    ctx.parser_mode = INVERTED_INDEX_PARSER_FINE_GRANULARITY;
+    ctx.lower_case = INVERTED_INDEX_PARSER_TRUE;
+    ctx.stop_words = "a,an,the";
+    ctx.analyzer = nullptr;
+
+    EXPECT_EQ(ctx.parser_type, InvertedIndexParserType::PARSER_ENGLISH);
+    EXPECT_EQ(ctx.parser_mode, INVERTED_INDEX_PARSER_FINE_GRANULARITY);
+    EXPECT_EQ(ctx.lower_case, INVERTED_INDEX_PARSER_TRUE);
+    EXPECT_EQ(ctx.stop_words, "a,an,the");
+    EXPECT_EQ(ctx.analyzer, nullptr);
+
+    // Test char_filter_map
+    ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = 
"char_replace";
+    ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN] = "._";
+    ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT] = " ";
+
+    EXPECT_EQ(ctx.char_filter_map.size(), 3);
+    EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE], 
"char_replace");
+    EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN], 
"._");
+    
EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], " 
");
+}
+
+// Test constants
+TEST_F(InvertedIndexParserTest, TestConstants) {
+    // Test parser constants
+    EXPECT_EQ(INVERTED_INDEX_PARSER_UNKNOWN, "unknown");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_NONE, "none");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_STANDARD, "standard");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_UNICODE, "unicode");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_ENGLISH, "english");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_CHINESE, "chinese");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_ICU, "icu");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_BASIC, "basic");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_IK, "ik");
+
+    // Test mode constants
+    EXPECT_EQ(INVERTED_INDEX_PARSER_FINE_GRANULARITY, "fine_grained");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_COARSE_GRANULARITY, "coarse_grained");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_MAX_WORD, "ik_max_word");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_SMART, "ik_smart");
+
+    // Test boolean constants
+    EXPECT_EQ(INVERTED_INDEX_PARSER_TRUE, "true");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_FALSE, "false");
+
+    // Test phrase support constants
+    EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES, "true");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO, "false");
+
+    // Test key constants
+    EXPECT_EQ(INVERTED_INDEX_PARSER_KEY, "parser");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_MODE_KEY, "parser_mode");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY, "support_phrase");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_LOWERCASE_KEY, "lower_case");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_STOPWORDS_KEY, "stopwords");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY, "dict_compression");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY, "ignore_above");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE, "256");
+
+    // Test char filter constants
+    EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE, "char_filter_type");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN, 
"char_filter_pattern");
+    EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT, 
"char_filter_replacement");
+}
+
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to