This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 39f48833014 [improve](ip-function)improve is_ip_address_in_range for 
inverted index speed (#41768)
39f48833014 is described below

commit 39f48833014fdc0b268000fc648544e469a900cd
Author: amory <wangqian...@selectdb.com>
AuthorDate: Wed Oct 16 09:44:26 2024 +0800

    [improve](ip-function)improve is_ip_address_in_range for inverted index 
speed (#41768)
    
    ## Proposed changes
    speed up is_ip_address_in_range with inverted index
---
 .../olap/rowset/segment_v2/inverted_index_reader.h |   2 +
 be/src/vec/functions/function_ip.h                 | 252 ++++++++++++---------
 be/src/vec/runtime/ip_address_cidr.h               |  27 +++
 .../functions/scalar/IsIpAddressInRange.java       |   6 +
 .../test_ip_cidr_search_with_inverted_index.out    |  70 ++++++
 .../test_ip_cidr_search_with_inverted_index.groovy | 134 +++++++++++
 6 files changed, 385 insertions(+), 106 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index 3756988ba7d..d3a0ff3cf11 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -410,6 +410,8 @@ public:
             M(PrimitiveType::TYPE_STRING)
             M(PrimitiveType::TYPE_DATEV2)
             M(PrimitiveType::TYPE_DATETIMEV2)
+            M(PrimitiveType::TYPE_IPV4)
+            M(PrimitiveType::TYPE_IPV6)
 #undef M
         default:
             return Status::NotSupported("Unsupported primitive type {} for 
inverted index reader",
diff --git a/be/src/vec/functions/function_ip.h 
b/be/src/vec/functions/function_ip.h
index 30b901b4459..ddb99d80a1b 100644
--- a/be/src/vec/functions/function_ip.h
+++ b/be/src/vec/functions/function_ip.h
@@ -606,110 +606,177 @@ public:
         return std::make_shared<DataTypeUInt8>();
     }
 
-    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
-                        size_t result, size_t input_rows_count) const override 
{
-        const auto& addr_column_with_type_and_name = 
block.get_by_position(arguments[0]);
-        const auto& cidr_column_with_type_and_name = 
block.get_by_position(arguments[1]);
-        WhichDataType addr_type(addr_column_with_type_and_name.type);
-        WhichDataType cidr_type(cidr_column_with_type_and_name.type);
-        const auto& [addr_column, addr_const] =
-                unpack_if_const(addr_column_with_type_and_name.column);
-        const auto& [cidr_column, cidr_const] =
-                unpack_if_const(cidr_column_with_type_and_name.column);
-        const auto* str_addr_column = assert_cast<const 
ColumnString*>(addr_column.get());
-        const auto* str_cidr_column = assert_cast<const 
ColumnString*>(cidr_column.get());
-
-        auto col_res = ColumnUInt8::create(input_rows_count, 0);
+    template <PrimitiveType PT, typename ColumnType>
+    void execute_impl_with_ip(size_t input_rows_count, bool addr_const, bool 
cidr_const,
+                              const ColumnString* str_cidr_column, const 
ColumnPtr addr_column,
+                              ColumnUInt8* col_res) const {
         auto& col_res_data = col_res->get_data();
-
+        const auto& ip_data = assert_cast<const 
ColumnType*>(addr_column.get())->get_data();
         for (size_t i = 0; i < input_rows_count; ++i) {
             auto addr_idx = index_check_const(i, addr_const);
             auto cidr_idx = index_check_const(i, cidr_const);
-
-            const auto addr =
-                    
IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view());
             const auto cidr =
                     
parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view());
-            col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0;
+            if constexpr (PT == PrimitiveType::TYPE_IPV4) {
+                if (cidr._address.as_v4()) {
+                    col_res_data[i] = match_ipv4_subnet(ip_data[addr_idx], 
cidr._address.as_v4(),
+                                                        cidr._prefix)
+                                              ? 1
+                                              : 0;
+                } else {
+                    col_res_data[i] = 0;
+                }
+            } else if constexpr (PT == PrimitiveType::TYPE_IPV6) {
+                if (cidr._address.as_v6()) {
+                    col_res_data[i] = 
match_ipv6_subnet((uint8*)(&ip_data[addr_idx]),
+                                                        cidr._address.as_v6(), 
cidr._prefix)
+                                              ? 1
+                                              : 0;
+                } else {
+                    col_res_data[i] = 0;
+                }
+            }
         }
-
-        block.replace_by_position(result, std::move(col_res));
-        return Status::OK();
     }
-};
-
-// old version throw exception when meet null value
-class FunctionIsIPAddressInRangeOld : public IFunction {
-public:
-    static constexpr auto name = "is_ip_address_in_range";
-    static FunctionPtr create() { return 
std::make_shared<FunctionIsIPAddressInRange>(); }
-
-    String get_name() const override { return name; }
 
-    size_t get_number_of_arguments() const override { return 2; }
+    Status evaluate_inverted_index(
+            const ColumnsWithTypeAndName& arguments,
+            const std::vector<vectorized::IndexFieldNameAndTypePair>& 
data_type_with_names,
+            std::vector<segment_v2::InvertedIndexIterator*> iterators, 
uint32_t num_rows,
+            segment_v2::InvertedIndexResultBitmap& bitmap_result) const 
override {
+        DCHECK(arguments.size() == 1);
+        DCHECK(data_type_with_names.size() == 1);
+        DCHECK(iterators.size() == 1);
+        auto* iter = iterators[0];
+        auto data_type_with_name = data_type_with_names[0];
+        if (iter == nullptr) {
+            return Status::OK();
+        }
 
-    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
-        return std::make_shared<DataTypeUInt8>();
+        if (iter->get_inverted_index_reader_type() != 
segment_v2::InvertedIndexReaderType::BKD) {
+            // Not support only bkd index
+            return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
+                    "Inverted index evaluate skipped, ip range reader can only 
support by bkd "
+                    "reader");
+        }
+        // Get the is_ip_address_in_range from the arguments: cidr
+        const auto& cidr_column_with_type_and_name = arguments[0];
+        // in is_ip_address_in_range param is const Field
+        ColumnPtr arg_column = cidr_column_with_type_and_name.column;
+        DataTypePtr arg_type = cidr_column_with_type_and_name.type;
+        if ((is_column_nullable(*arg_column) && 
!is_column_const(*remove_nullable(arg_column))) ||
+            (!is_column_nullable(*arg_column) && 
!is_column_const(*arg_column))) {
+            // if not we should skip inverted index and evaluate in expression
+            return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
+                    "Inverted index evaluate skipped, is_ip_address_in_range 
only support const "
+                    "value");
+        }
+        // check param type is string
+        if (!WhichDataType(*arg_type).is_string()) {
+            return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
+                    "Inverted index evaluate skipped, is_ip_address_in_range 
only support string "
+                    "type");
+        }
+        // min && max ip address
+        Field min_ip, max_ip;
+        IPAddressCIDR cidr = parse_ip_with_cidr(arg_column->get_data_at(0));
+        if 
(WhichDataType(remove_nullable(data_type_with_name.second)).is_ipv4() &&
+            cidr._address.as_v4()) {
+            auto range = apply_cidr_mask(cidr._address.as_v4(), cidr._prefix);
+            min_ip = range.first;
+            max_ip = range.second;
+        } else if 
(WhichDataType(remove_nullable(data_type_with_name.second)).is_ipv6() &&
+                   cidr._address.as_v6()) {
+            auto cidr_range_ipv6_col = ColumnIPv6::create(2, 0);
+            auto& cidr_range_ipv6_data = cidr_range_ipv6_col->get_data();
+            apply_cidr_mask(reinterpret_cast<const 
char*>(cidr._address.as_v6()),
+                            reinterpret_cast<char*>(&cidr_range_ipv6_data[0]),
+                            reinterpret_cast<char*>(&cidr_range_ipv6_data[1]), 
cidr._prefix);
+            min_ip = cidr_range_ipv6_data[0];
+            max_ip = cidr_range_ipv6_data[1];
+        }
+        // apply for inverted index
+        std::shared_ptr<roaring::Roaring> res_roaring = 
std::make_shared<roaring::Roaring>();
+        std::shared_ptr<roaring::Roaring> max_roaring = 
std::make_shared<roaring::Roaring>();
+        std::shared_ptr<roaring::Roaring> null_bitmap = 
std::make_shared<roaring::Roaring>();
+
+        auto param_type = 
data_type_with_name.second->get_type_as_type_descriptor().type;
+        std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> 
query_param = nullptr;
+        // >= min ip
+        
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
+                param_type, &min_ip, query_param));
+        RETURN_IF_ERROR(iter->read_from_inverted_index(
+                data_type_with_name.first, query_param->get_value(),
+                segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY, 
num_rows, res_roaring));
+        // <= max ip
+        
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
+                param_type, &max_ip, query_param));
+        RETURN_IF_ERROR(iter->read_from_inverted_index(
+                data_type_with_name.first, query_param->get_value(),
+                segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY, 
num_rows, max_roaring));
+
+        DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
+            auto req_id = 
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+                    "ip.inverted_index_filtered", "req_id", 0);
+            LOG(INFO) << "execute inverted index req_id: " << req_id
+                      << " min: " << res_roaring->cardinality();
+        });
+        *res_roaring &= *max_roaring;
+        DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
+            auto req_id = 
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+                    "ip.inverted_index_filtered", "req_id", 0);
+            LOG(INFO) << "execute inverted index req_id: " << req_id
+                      << " max: " << max_roaring->cardinality()
+                      << " result: " << res_roaring->cardinality();
+        });
+        segment_v2::InvertedIndexResultBitmap result(res_roaring, null_bitmap);
+        bitmap_result = result;
+        bitmap_result.mask_out_null();
+        return Status::OK();
     }
 
-    bool use_default_implementation_for_nulls() const override { return false; 
}
-
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) const override 
{
+        DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
+            auto req_id = 
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+                    "ip.inverted_index_filtered", "req_id", 0);
+            return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                    "{} has already execute inverted index req_id {} , should 
not execute expr "
+                    "with rows: {}",
+                    get_name(), req_id, input_rows_count);
+        });
         const auto& addr_column_with_type_and_name = 
block.get_by_position(arguments[0]);
         const auto& cidr_column_with_type_and_name = 
block.get_by_position(arguments[1]);
-        WhichDataType addr_type(addr_column_with_type_and_name.type);
-        WhichDataType cidr_type(cidr_column_with_type_and_name.type);
         const auto& [addr_column, addr_const] =
                 unpack_if_const(addr_column_with_type_and_name.column);
         const auto& [cidr_column, cidr_const] =
                 unpack_if_const(cidr_column_with_type_and_name.column);
-        const ColumnString* str_addr_column = nullptr;
-        const ColumnString* str_cidr_column = nullptr;
-        const NullMap* null_map_addr = nullptr;
-        const NullMap* null_map_cidr = nullptr;
-
-        if (addr_type.is_nullable()) {
-            const auto* addr_column_nullable =
-                    assert_cast<const ColumnNullable*>(addr_column.get());
-            str_addr_column = assert_cast<const ColumnString*>(
-                    addr_column_nullable->get_nested_column_ptr().get());
-            null_map_addr = &addr_column_nullable->get_null_map_data();
-        } else {
-            str_addr_column = assert_cast<const 
ColumnString*>(addr_column.get());
-        }
-
-        if (cidr_type.is_nullable()) {
-            const auto* cidr_column_nullable =
-                    assert_cast<const ColumnNullable*>(cidr_column.get());
-            str_cidr_column = assert_cast<const ColumnString*>(
-                    cidr_column_nullable->get_nested_column_ptr().get());
-            null_map_cidr = &cidr_column_nullable->get_null_map_data();
-        } else {
-            str_cidr_column = assert_cast<const 
ColumnString*>(cidr_column.get());
-        }
 
         auto col_res = ColumnUInt8::create(input_rows_count, 0);
         auto& col_res_data = col_res->get_data();
 
-        for (size_t i = 0; i < input_rows_count; ++i) {
-            auto addr_idx = index_check_const(i, addr_const);
-            auto cidr_idx = index_check_const(i, cidr_const);
-            if (null_map_addr && (*null_map_addr)[addr_idx]) [[unlikely]] {
-                throw Exception(ErrorCode::INVALID_ARGUMENT,
-                                "The arguments of function {} must be String, 
not NULL",
-                                get_name());
-            }
-            if (null_map_cidr && (*null_map_cidr)[cidr_idx]) [[unlikely]] {
-                throw Exception(ErrorCode::INVALID_ARGUMENT,
-                                "The arguments of function {} must be String, 
not NULL",
-                                get_name());
+        if (is_ipv4(addr_column_with_type_and_name.type)) {
+            execute_impl_with_ip<PrimitiveType::TYPE_IPV4, ColumnIPv4>(
+                    input_rows_count, addr_const, cidr_const,
+                    assert_cast<const ColumnString*>(cidr_column.get()), 
addr_column, col_res);
+        } else if (is_ipv6(addr_column_with_type_and_name.type)) {
+            execute_impl_with_ip<PrimitiveType::TYPE_IPV6, ColumnIPv6>(
+                    input_rows_count, addr_const, cidr_const,
+                    assert_cast<const ColumnString*>(cidr_column.get()), 
addr_column, col_res);
+        } else {
+            const auto* str_addr_column = assert_cast<const 
ColumnString*>(addr_column.get());
+            const auto* str_cidr_column = assert_cast<const 
ColumnString*>(cidr_column.get());
+
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                auto addr_idx = index_check_const(i, addr_const);
+                auto cidr_idx = index_check_const(i, cidr_const);
+
+                const auto addr =
+                        
IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view());
+                const auto cidr =
+                        
parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view());
+                col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0;
             }
-            const auto addr =
-                    
IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view());
-            const auto cidr =
-                    
parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view());
-            col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0;
         }
 
         block.replace_by_position(result, std::move(col_res));
@@ -797,21 +864,6 @@ public:
                                                       
std::move(col_upper_range_output)}));
         return Status::OK();
     }
-
-private:
-    static inline std::pair<UInt32, UInt32> apply_cidr_mask(UInt32 src, UInt8 
bits_to_keep) {
-        if (bits_to_keep >= 8 * sizeof(UInt32)) {
-            return {src, src};
-        }
-        if (bits_to_keep == 0) {
-            return {static_cast<UInt32>(0), static_cast<UInt32>(-1)};
-        }
-        UInt32 mask = static_cast<UInt32>(-1) << (8 * sizeof(UInt32) - 
bits_to_keep);
-        UInt32 lower = src & mask;
-        UInt32 upper = lower | ~mask;
-
-        return {lower, upper};
-    }
 };
 
 class FunctionIPv6CIDRToRange : public IFunction {
@@ -936,18 +988,6 @@ public:
         return ColumnStruct::create(
                 Columns {std::move(col_res_lower_range), 
std::move(col_res_upper_range)});
     }
-
-private:
-    static void apply_cidr_mask(const char* __restrict src, char* __restrict 
dst_lower,
-                                char* __restrict dst_upper, UInt8 
bits_to_keep) {
-        // little-endian mask
-        const auto& mask = get_cidr_mask_ipv6(bits_to_keep);
-
-        for (int8_t i = IPV6_BINARY_LENGTH - 1; i >= 0; --i) {
-            dst_lower[i] = src[i] & mask[i];
-            dst_upper[i] = dst_lower[i] | ~mask[i];
-        }
-    }
 };
 
 class FunctionIsIPv4Compat : public IFunction {
diff --git a/be/src/vec/runtime/ip_address_cidr.h 
b/be/src/vec/runtime/ip_address_cidr.h
index de4c0050982..9e6ac1fd57b 100644
--- a/be/src/vec/runtime/ip_address_cidr.h
+++ b/be/src/vec/runtime/ip_address_cidr.h
@@ -24,6 +24,33 @@
 #include "vec/common/format_ip.h"
 namespace doris {
 
+namespace vectorized {
+static inline std::pair<UInt32, UInt32> apply_cidr_mask(UInt32 src, UInt8 
bits_to_keep) {
+    if (bits_to_keep >= 8 * sizeof(UInt32)) {
+        return {src, src};
+    }
+    if (bits_to_keep == 0) {
+        return {static_cast<UInt32>(0), static_cast<UInt32>(-1)};
+    }
+    UInt32 mask = static_cast<UInt32>(-1) << (8 * sizeof(UInt32) - 
bits_to_keep);
+    UInt32 lower = src & mask;
+    UInt32 upper = lower | ~mask;
+
+    return {lower, upper};
+}
+
+static inline void apply_cidr_mask(const char* __restrict src, char* 
__restrict dst_lower,
+                                   char* __restrict dst_upper, UInt8 
bits_to_keep) {
+    // little-endian mask
+    const auto& mask = get_cidr_mask_ipv6(bits_to_keep);
+
+    for (int8_t i = IPV6_BINARY_LENGTH - 1; i >= 0; --i) {
+        dst_lower[i] = src[i] & mask[i];
+        dst_upper[i] = dst_lower[i] | ~mask[i];
+    }
+}
+} // namespace vectorized
+
 class IPAddressVariant {
 public:
     explicit IPAddressVariant(std::string_view address_str) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
index 85fe4dcab78..a29c1502223 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
@@ -24,6 +24,8 @@ import 
org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
 import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.BooleanType;
+import org.apache.doris.nereids.types.IPv4Type;
+import org.apache.doris.nereids.types.IPv6Type;
 import org.apache.doris.nereids.types.StringType;
 import org.apache.doris.nereids.types.VarcharType;
 
@@ -39,6 +41,10 @@ public class IsIpAddressInRange extends ScalarFunction
         implements BinaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv4Type.INSTANCE, 
VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv4Type.INSTANCE, 
StringType.INSTANCE),
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv6Type.INSTANCE, 
VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv6Type.INSTANCE, 
StringType.INSTANCE),
             
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT),
             
FunctionSignature.ret(BooleanType.INSTANCE).args(StringType.INSTANCE, 
StringType.INSTANCE));
 
diff --git 
a/regression-test/data/inverted_index_p0/test_ip_cidr_search_with_inverted_index.out
 
b/regression-test/data/inverted_index_p0/test_ip_cidr_search_with_inverted_index.out
new file mode 100644
index 00000000000..f365449b9e5
--- /dev/null
+++ 
b/regression-test/data/inverted_index_p0/test_ip_cidr_search_with_inverted_index.out
@@ -0,0 +1,70 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+5
+
+-- !sql_without_ii_0 --
+
+-- !sql_without_ii_1 --
+
+-- !sql_without_ii_2 --
+
+-- !sql_without_ii_3 --
+2      42.117.228.166  2001:16a0:2:200a::2     true
+
+-- !sql_without_ii_4 --
+
+-- !sql_without_ii_5 --
+5      255.255.255.255 \N      \N
+
+-- !sql_without_ii_6 --
+
+-- !sql_without_ii_7 --
+
+-- !sql_without_ii_8 --
+
+-- !sql_without_ii_9 --
+
+-- !sql_with_ii_0 --
+
+-- !sql_with_ii_1 --
+
+-- !sql_with_ii_2 --
+
+-- !sql_with_ii_3 --
+2      42.117.228.166  2001:16a0:2:200a::2     true
+
+-- !sql_with_ii_4 --
+
+-- !sql_with_ii_5 --
+5      255.255.255.255 \N      \N
+
+-- !sql_with_ii_6 --
+
+-- !sql_with_ii_7 --
+
+-- !sql_with_ii_8 --
+
+-- !sql_with_ii_9 --
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+2      42.117.228.166  2001:16a0:2:200a::2
+
+-- !sql --
+
+-- !sql --
+5      255.255.255.255 \N
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_ip_cidr_search_with_inverted_index.groovy
 
b/regression-test/suites/inverted_index_p0/test_ip_cidr_search_with_inverted_index.groovy
new file mode 100644
index 00000000000..61751f2404f
--- /dev/null
+++ 
b/regression-test/suites/inverted_index_p0/test_ip_cidr_search_with_inverted_index.groovy
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_ip_cidr_search_with_inverted_index", "nonConcurrent"){
+    // prepare test table
+    sql "DROP TABLE IF EXISTS tc_ip_cidr_search_with_inverted_index"
+    // create 1 replica table
+    sql """
+       CREATE TABLE IF NOT EXISTS `tc_ip_cidr_search_with_inverted_index` (
+        `id` INT NULL,
+          `ipv4` IPV4 NULL,
+          `ipv6` IPV6 NULL,
+          `cidr` TEXT NULL,
+          INDEX v4_index (`ipv4`) USING INVERTED,
+          INDEX v6_index (`ipv6`) USING INVERTED
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 4
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "is_being_synced" = "false",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """
+    sql """ set enable_profile = true;"""
+
+    sql """ insert into tc_ip_cidr_search_with_inverted_index values(1, 
'59.50.185.152', '2a02:e980:83:5b09:ecb8:c669:b336:650e', '127.0.0.0/8'),(3, 
'119.36.22.147', '2001:4888:1f:e891:161:26::', '127.0.0.0/8'),(2, 
'42.117.228.166', '2001:16a0:2:200a::2', null); """
+    sql """ insert into tc_ip_cidr_search_with_inverted_index values(4, '.', 
'2001:1b70:a1:610::b102:2', null); """
+    sql """ insert into tc_ip_cidr_search_with_inverted_index values(5, 
'255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffffg', null) """
+
+    qt_sql """ select count() from tc_ip_cidr_search_with_inverted_index"""
+    // without inverted index query
+    sql """ set enable_common_expr_pushdown = false; """
+    sql """ set enable_inverted_index_query=false; """
+    // select ipv6 in ipv4 cidr
+    qt_sql_without_ii_0 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv6, '255.255.255.255/12') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'255.255.255.255/12') order by id; """
+    // select ipv6 in ipv6 cidr
+    qt_sql_without_ii_1 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv6, 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+    qt_sql_without_ii_2 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv6, '::ffff:192.168.0.4/128') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'::ffff:192.168.0.4/128') order by id; """
+    qt_sql_without_ii_3 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv6, '2001:16a0:2:200a::2/64') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'2001:16a0:2:200a::2/64') order by id; """
+
+    // select ipv4 in ipv6 cidr
+    qt_sql_without_ii_4 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv4, 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+    // select ipv4 in ipv4 cidr
+    qt_sql_without_ii_5 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv6, '255.255.255.255/12') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'255.255.255.255/12') order by id; """
+    qt_sql_without_ii_6 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv4, '127.0.0.0/8') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'127.0.0.0/8') order by id; """
+    qt_sql_without_ii_7 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv4, '192.168.100.0/24') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'192.168.100.0/24') order by id; """
+
+    // select in null cidr
+    qt_sql_without_ii_8 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv4, null) from tc_ip_cidr_search_with_inverted_index 
where is_ip_address_in_range(ipv4, null) order by id; """
+    qt_sql_without_ii_9 """  select id, ipv4, ipv6, 
is_ip_address_in_range(ipv6, null) from tc_ip_cidr_search_with_inverted_index 
where is_ip_address_in_range(ipv6, null) order by id; """
+
+    // with inverted index query
+    // If we use common expr pass to inverted index , we should set 
enable_common_expr_pushdown = true
+    sql """ set enable_common_expr_pushdown = true; """
+    sql """ set enable_inverted_index_query=true; """
+    sql """ set inverted_index_skip_threshold = 0; """ // set skip threshold 
to 0
+
+    // select ipv6 in ipv4 cidr
+    qt_sql_with_ii_0 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv6, 
'255.255.255.255/12') from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv6, '255.255.255.255/12') order by id; """
+    // select ipv6 in ipv6 cidr
+    qt_sql_with_ii_1 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv6, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+    qt_sql_with_ii_2 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv6, 
'::ffff:192.168.0.4/128') from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv6, '::ffff:192.168.0.4/128') order by id; """
+    qt_sql_with_ii_3 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv6, 
'2001:16a0:2:200a::2/64') from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv6, '2001:16a0:2:200a::2/64') order by id; """
+
+    // select ipv4 in ipv6 cidr
+    qt_sql_with_ii_4 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv4, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+    // select ipv4 in ipv4 cidr
+    qt_sql_with_ii_5 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv6, 
'255.255.255.255/12') from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv4, '255.255.255.255/12') order by id; """
+    qt_sql_with_ii_6 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv4, 
'127.0.0.0/8') from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv4, '127.0.0.0/8') order by id; """
+    qt_sql_with_ii_7 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv4, 
'192.168.100.0/24') from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv4, '192.168.100.0/24') order by id; """
+
+    // select in null cidr
+    qt_sql_with_ii_8 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv4, 
null) from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv4, null) order by id; """
+    qt_sql_with_ii_9 """  select id, ipv4, ipv6, is_ip_address_in_range(ipv6, 
null) from tc_ip_cidr_search_with_inverted_index where 
is_ip_address_in_range(ipv6, null) order by id; """
+
+
+
+    def create_sql = {
+        List<String> list = new ArrayList<>()
+        // select ipv6 in ipv4 cidr
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'255.255.255.255/12') order by id;")
+        // select ipv6 in ipv6 cidr
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id;")
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'::ffff:192.168.0.4/128') order by id;")
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, 
'2001:16a0:2:200a::2/64') order by id;")
+        // select ipv4 in ipv6 cidr
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id;")
+        // select ipv4 in ipv4 cidr
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'255.255.255.255/12') order by id;")
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'127.0.0.0/8') order by id;")
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, 
'192.168.100.0/24') order by id;")
+        // select in null cidr
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, null) 
order by id;")
+        list.add("select id, ipv4, ipv6 from 
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, null) 
order by id;")
+        return list;
+    }
+
+    def checkpoints_name = "ip.inverted_index_filtered"
+    def execute_sql = { sqlList ->
+        def i = 0
+        for (sqlStr in sqlList) {
+            try {
+                log.info("execute sql: i")
+                GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, 
[req_id: i])
+                order_qt_sql """ ${sqlStr} """
+            } finally {
+                GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+            }
+            ++i
+        }
+    }
+
+    execute_sql.call(create_sql.call())
+
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to