This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit cc00666be6439e1bc28f530b7e0d81bb03a25d3e
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Tue May 7 10:25:13 2024 +0800

    [opt](inverted index) add inlist condition handling to compound (#34134)
    
    1. Previously, the compound did not support the inlist condition, which 
could impact performance if an inverted index was created.
---
 be/src/exec/olap_common.h                          |  25 ++--
 be/src/exec/olap_utils.h                           |   4 +
 be/src/olap/column_predicate.h                     |   2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  30 +++--
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  10 +-
 be/src/olap/tablet_reader.cpp                      |   4 +-
 be/src/vec/exec/scan/vscan_node.cpp                | 106 ++++++++++------
 be/src/vec/exec/scan/vscan_node.h                  |  18 ++-
 .../inverted_index_p0/test_compound_inlist.out     |  61 +++++++++
 .../inverted_index_p0/test_compound_inlist.groovy  | 140 +++++++++++++++++++++
 10 files changed, 333 insertions(+), 67 deletions(-)

diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 634f9128553..5d6377b9c7c 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -275,24 +275,26 @@ public:
     }
 
     void to_condition_in_compound(std::vector<TCondition>& filters) {
-        for (const auto& value : _compound_values) {
+        for (const auto& compound_value : _compound_values) {
             TCondition condition;
             condition.__set_column_name(_column_name);
-            if (value.first == FILTER_LARGER) {
+            if (compound_value.first == FILTER_LARGER) {
                 condition.__set_condition_op(">>");
-            } else if (value.first == FILTER_LARGER_OR_EQUAL) {
+            } else if (compound_value.first == FILTER_LARGER_OR_EQUAL) {
                 condition.__set_condition_op(">=");
-            } else if (value.first == FILTER_LESS) {
+            } else if (compound_value.first == FILTER_LESS) {
                 condition.__set_condition_op("<<");
-            } else if (value.first == FILTER_LESS_OR_EQUAL) {
+            } else if (compound_value.first == FILTER_LESS_OR_EQUAL) {
                 condition.__set_condition_op("<=");
-            } else if (value.first == FILTER_IN) {
+            } else if (compound_value.first == FILTER_IN) {
                 condition.__set_condition_op("*=");
-            } else if (value.first == FILTER_NOT_IN) {
+            } else if (compound_value.first == FILTER_NOT_IN) {
                 condition.__set_condition_op("!*=");
             }
-            condition.condition_values.push_back(
-                    cast_to_string<primitive_type, CppType>(value.second, 
_scale));
+            for (const auto& value : compound_value.second) {
+                condition.condition_values.push_back(
+                        cast_to_string<primitive_type, CppType>(value, 
_scale));
+            }
             if (condition.condition_values.size() != 0) {
                 filters.push_back(std::move(condition));
             }
@@ -446,7 +448,7 @@ private:
                                                   primitive_type == 
PrimitiveType::TYPE_DATETIMEV2;
 
     // range value except leaf node of and node in compound expr tree
-    std::set<std::pair<SQLFilterOp, CppType>> _compound_values;
+    std::map<SQLFilterOp, std::set<CppType>> _compound_values;
     bool _marked_runtime_filter_predicate = false;
 };
 
@@ -598,8 +600,7 @@ Status 
ColumnValueRange<primitive_type>::add_fixed_value(const CppType& value) {
 
 template <PrimitiveType primitive_type>
 Status ColumnValueRange<primitive_type>::add_compound_value(SQLFilterOp op, 
CppType value) {
-    std::pair<SQLFilterOp, CppType> val_with_op(op, value);
-    _compound_values.insert(val_with_op);
+    _compound_values[op].insert(value);
     _contain_null = false;
 
     _high_value = TYPE_MIN;
diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h
index 2e101b1270f..4e5c08677a2 100644
--- a/be/src/exec/olap_utils.h
+++ b/be/src/exec/olap_utils.h
@@ -117,6 +117,10 @@ inline SQLFilterOp to_olap_filter_type(const std::string& 
function_name, bool op
         return opposite ? FILTER_NOT_IN : FILTER_IN;
     } else if (function_name == "ne") {
         return opposite ? FILTER_IN : FILTER_NOT_IN;
+    } else if (function_name == "in_list") {
+        return opposite ? FILTER_NOT_IN : FILTER_IN;
+    } else if (function_name == "not_in_list") {
+        return opposite ? FILTER_IN : FILTER_NOT_IN;
     } else {
         DCHECK(false) << "Function Name: " << function_name;
         return FILTER_IN;
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index d549540e5c7..75c1eb2ccd7 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -36,7 +36,7 @@ namespace doris {
 class Schema;
 
 struct PredicateParams {
-    std::string value;
+    std::vector<std::string> values;
     bool marked_by_runtime_filter = false;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index d4ae8cf3324..3dbb8b694ae 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -741,10 +741,17 @@ Status 
SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
         _column_predicate_info->column_name = expr->expr_name();
     } else if (_is_literal_node(node_type)) {
         auto v_literal_expr = 
std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
-        _column_predicate_info->query_value = v_literal_expr->value();
-    } else if (node_type == TExprNodeType::BINARY_PRED || node_type == 
TExprNodeType::MATCH_PRED) {
+        
_column_predicate_info->query_values.push_back(v_literal_expr->value());
+    } else if (node_type == TExprNodeType::BINARY_PRED || node_type == 
TExprNodeType::MATCH_PRED ||
+               node_type == TExprNodeType::IN_PRED) {
         if (node_type == TExprNodeType::MATCH_PRED) {
             _column_predicate_info->query_op = "match";
+        } else if (node_type == TExprNodeType::IN_PRED) {
+            if (expr->op() == TExprOpcode::type::FILTER_IN) {
+                _column_predicate_info->query_op = "in_list";
+            } else {
+                _column_predicate_info->query_op = "not_in_list";
+            }
         } else {
             _column_predicate_info->query_op = expr->fn().name.function_name;
         }
@@ -869,7 +876,9 @@ Status 
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
         bool is_support = pred_type == PredicateType::EQ || pred_type == 
PredicateType::NE ||
                           pred_type == PredicateType::LT || pred_type == 
PredicateType::LE ||
                           pred_type == PredicateType::GT || pred_type == 
PredicateType::GE ||
-                          pred_type == PredicateType::MATCH;
+                          pred_type == PredicateType::MATCH ||
+                          pred_type == PredicateType::IN_LIST ||
+                          pred_type == PredicateType::NOT_IN_LIST;
         if (!is_support) {
             continue;
         }
@@ -950,7 +959,8 @@ std::string 
SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predica
     auto pred_type = predicate->type();
     auto predicate_params = predicate->predicate_params();
     pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
column_desc->name() + "_" +
-                       predicate->pred_type_string(pred_type) + "_" + 
predicate_params->value;
+                       predicate->pred_type_string(pred_type) + "_" +
+                       join(predicate_params->values, ",");
 
     return pred_result_sign;
 }
@@ -958,7 +968,7 @@ std::string 
SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predica
 std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicateInfo* 
predicate_info) {
     std::string pred_result_sign;
     pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
predicate_info->column_name + "_" +
-                       predicate_info->query_op + "_" + 
predicate_info->query_value;
+                       predicate_info->query_op + "_" + 
join(predicate_info->query_values, ",");
     return pred_result_sign;
 }
 
@@ -1007,7 +1017,7 @@ Status 
SegmentIterator::_apply_inverted_index_on_column_predicate(
         }
 
         auto pred_type = pred->type();
-        if (pred_type == PredicateType::MATCH) {
+        if (pred_type == PredicateType::MATCH || pred_type == 
PredicateType::IN_LIST) {
             std::string pred_result_sign = _gen_predicate_result_sign(pred);
             _rowid_result_for_index.emplace(pred_result_sign, 
std::make_pair(false, _row_bitmap));
         }
@@ -2630,10 +2640,16 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
         }
     } else if (_is_literal_node(node_type)) {
         auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
-        _column_predicate_info->query_value = v_literal_expr->value();
+        
_column_predicate_info->query_values.push_back(v_literal_expr->value());
     } else {
         if (node_type == TExprNodeType::MATCH_PRED) {
             _column_predicate_info->query_op = "match";
+        } else if (node_type == TExprNodeType::IN_PRED) {
+            if (expr->op() == TExprOpcode::type::FILTER_IN) {
+                _column_predicate_info->query_op = "in_list";
+            } else {
+                _column_predicate_info->query_op = "not_in_list";
+            }
         } else if (node_type != TExprNodeType::COMPOUND_PRED) {
             _column_predicate_info->query_op = expr->fn().name.function_name;
         }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 84c10f3b8b2..50c195fb720 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -76,18 +76,20 @@ struct ColumnPredicateInfo {
     std::string debug_string() const {
         std::stringstream ss;
         ss << "column_name=" << column_name << ", query_op=" << query_op
-           << ", query_value=" << query_value;
+           << ", query_value=" << join(query_values, ",");
         return ss.str();
     }
 
-    bool is_empty() const { return column_name.empty() && query_value.empty() 
&& query_op.empty(); }
+    bool is_empty() const {
+        return column_name.empty() && query_values.empty() && query_op.empty();
+    }
 
     bool is_equal(const ColumnPredicateInfo& column_pred_info) const {
         if (column_pred_info.column_name != column_name) {
             return false;
         }
 
-        if (column_pred_info.query_value != query_value) {
+        if (column_pred_info.query_values != query_values) {
             return false;
         }
 
@@ -99,7 +101,7 @@ struct ColumnPredicateInfo {
     }
 
     std::string column_name;
-    std::string query_value;
+    std::vector<std::string> query_values;
     std::string query_op;
 };
 
diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp
index 7b40ff4eae1..808dddd9b08 100644
--- a/be/src/olap/tablet_reader.cpp
+++ b/be/src/olap/tablet_reader.cpp
@@ -497,7 +497,7 @@ Status TabletReader::_init_conditions_param(const 
ReaderParams& read_params) {
             // record condition value into predicate_params in order to 
pushdown segment_iterator,
             // _gen_predicate_result_sign will build predicate result unique 
sign with condition value
             auto predicate_params = predicate->predicate_params();
-            predicate_params->value = condition.condition_values[0];
+            predicate_params->values = condition.condition_values;
             predicate_params->marked_by_runtime_filter = 
condition.marked_by_runtime_filter;
             if (column.aggregation() != 
FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {
                 _value_col_predicates.push_back(predicate);
@@ -569,7 +569,7 @@ Status 
TabletReader::_init_conditions_param_except_leafnode_of_andnode(
         if (predicate != nullptr) {
             auto predicate_params = predicate->predicate_params();
             predicate_params->marked_by_runtime_filter = 
condition.marked_by_runtime_filter;
-            predicate_params->value = condition.condition_values[0];
+            predicate_params->values = condition.condition_values;
             _col_preds_except_leafnode_of_andnode.push_back(predicate);
         }
     }
diff --git a/be/src/vec/exec/scan/vscan_node.cpp 
b/be/src/vec/exec/scan/vscan_node.cpp
index 68895b47739..a37287dcda4 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -1018,7 +1018,9 @@ Status VScanNode::_normalize_compound_predicate(
         auto children_num = expr->children().size();
         for (auto i = 0; i < children_num; ++i) {
             auto* child_expr = expr->children()[i].get();
-            if (TExprNodeType::BINARY_PRED == child_expr->node_type()) {
+            if (TExprNodeType::BINARY_PRED == child_expr->node_type() ||
+                TExprNodeType::IN_PRED == child_expr->node_type() ||
+                TExprNodeType::MATCH_PRED == child_expr->node_type()) {
                 SlotDescriptor* slot = nullptr;
                 ColumnValueRangeType* range_on_slot = nullptr;
                 if (_is_predicate_acting_on_slot(child_expr, 
in_predicate_checker, &slot,
@@ -1033,30 +1035,16 @@ Status VScanNode::_normalize_compound_predicate(
                                     value_range.mark_runtime_filter_predicate(
                                             _is_runtime_filter_predicate);
                                 }};
-                                
static_cast<void>(_normalize_binary_in_compound_predicate(
-                                        child_expr, expr_ctx, slot, 
value_range, pdt));
-                            },
-                            active_range);
-
-                    _compound_value_ranges.emplace_back(active_range);
-                }
-            } else if (TExprNodeType::MATCH_PRED == child_expr->node_type()) {
-                SlotDescriptor* slot = nullptr;
-                ColumnValueRangeType* range_on_slot = nullptr;
-                if (_is_predicate_acting_on_slot(child_expr, 
in_predicate_checker, &slot,
-                                                 &range_on_slot) ||
-                    _is_predicate_acting_on_slot(child_expr, 
eq_predicate_checker, &slot,
-                                                 &range_on_slot)) {
-                    ColumnValueRangeType active_range =
-                            *range_on_slot; // copy, in order not to affect 
the range in the _colname_to_value_range
-                    std::visit(
-                            [&](auto& value_range) {
-                                Defer mark_runtime_filter_flag {[&]() {
-                                    value_range.mark_runtime_filter_predicate(
-                                            _is_runtime_filter_predicate);
-                                }};
-                                
static_cast<void>(_normalize_match_in_compound_predicate(
-                                        child_expr, expr_ctx, slot, 
value_range, pdt));
+                                if (TExprNodeType::BINARY_PRED == 
child_expr->node_type()) {
+                                    
static_cast<void>(_normalize_binary_compound_predicate(
+                                            child_expr, expr_ctx, slot, 
value_range, pdt));
+                                } else if (TExprNodeType::IN_PRED == 
child_expr->node_type()) {
+                                    
static_cast<void>(_normalize_in_and_not_in_compound_predicate(
+                                            child_expr, expr_ctx, slot, 
value_range, pdt));
+                                } else {
+                                    
static_cast<void>(_normalize_match_compound_predicate(
+                                            child_expr, expr_ctx, slot, 
value_range, pdt));
+                                }
                             },
                             active_range);
 
@@ -1074,11 +1062,10 @@ Status VScanNode::_normalize_compound_predicate(
 }
 
 template <PrimitiveType T>
-Status VScanNode::_normalize_binary_in_compound_predicate(vectorized::VExpr* 
expr,
-                                                          VExprContext* 
expr_ctx,
-                                                          SlotDescriptor* slot,
-                                                          ColumnValueRange<T>& 
range,
-                                                          PushDownType* pdt) {
+Status VScanNode::_normalize_binary_compound_predicate(vectorized::VExpr* expr,
+                                                       VExprContext* expr_ctx, 
SlotDescriptor* slot,
+                                                       ColumnValueRange<T>& 
range,
+                                                       PushDownType* pdt) {
     DCHECK(expr->children().size() == 2);
     if (TExprNodeType::BINARY_PRED == expr->node_type()) {
         auto eq_checker = [](const std::string& fn_name) { return fn_name == 
"eq"; };
@@ -1132,11 +1119,60 @@ Status 
VScanNode::_normalize_binary_in_compound_predicate(vectorized::VExpr* exp
 }
 
 template <PrimitiveType T>
-Status VScanNode::_normalize_match_in_compound_predicate(vectorized::VExpr* 
expr,
-                                                         VExprContext* 
expr_ctx,
-                                                         SlotDescriptor* slot,
-                                                         ColumnValueRange<T>& 
range,
-                                                         PushDownType* pdt) {
+Status 
VScanNode::_normalize_in_and_not_in_compound_predicate(vectorized::VExpr* expr,
+                                                              VExprContext* 
expr_ctx,
+                                                              SlotDescriptor* 
slot,
+                                                              
ColumnValueRange<T>& range,
+                                                              PushDownType* 
pdt) {
+    if (TExprNodeType::IN_PRED == expr->node_type()) {
+        std::string fn_name =
+                expr->op() == TExprOpcode::type::FILTER_IN ? "in_list" : 
"not_in_list";
+
+        HybridSetBase::IteratorBase* iter = nullptr;
+        auto hybrid_set = expr->get_set_func();
+
+        if (hybrid_set != nullptr) {
+            if (hybrid_set->size() <= _max_pushdown_conditions_per_column) {
+                iter = hybrid_set->begin();
+            } else {
+                _filter_predicates.in_filters.emplace_back(slot->col_name(), 
expr->get_set_func());
+                *pdt = PushDownType::ACCEPTABLE;
+                return Status::OK();
+            }
+        } else {
+            VInPredicate* pred = static_cast<VInPredicate*>(expr);
+
+            InState* state = reinterpret_cast<InState*>(
+                    expr_ctx->fn_context(pred->fn_context_index())
+                            
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+
+            if (!state->use_set) {
+                return Status::OK();
+            }
+
+            iter = state->hybrid_set->begin();
+        }
+
+        while (iter->has_next()) {
+            if (nullptr == iter->get_value()) {
+                iter->next();
+                continue;
+            }
+            auto value = const_cast<void*>(iter->get_value());
+            RETURN_IF_ERROR(_change_value_range<false>(
+                    range, value, 
ColumnValueRange<T>::add_compound_value_range, fn_name, 0));
+            iter->next();
+        }
+        *pdt = PushDownType::ACCEPTABLE;
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType T>
+Status VScanNode::_normalize_match_compound_predicate(vectorized::VExpr* expr,
+                                                      VExprContext* expr_ctx, 
SlotDescriptor* slot,
+                                                      ColumnValueRange<T>& 
range,
+                                                      PushDownType* pdt) {
     DCHECK(expr->children().size() == 2);
     if (TExprNodeType::MATCH_PRED == expr->node_type()) {
         RETURN_IF_ERROR(_normalize_match_predicate(expr, expr_ctx, slot, 
range, pdt));
diff --git a/be/src/vec/exec/scan/vscan_node.h 
b/be/src/vec/exec/scan/vscan_node.h
index f93536327b1..04bc738fda4 100644
--- a/be/src/vec/exec/scan/vscan_node.h
+++ b/be/src/vec/exec/scan/vscan_node.h
@@ -442,14 +442,20 @@ private:
                     eq_predicate_checker);
 
     template <PrimitiveType T>
-    Status _normalize_binary_in_compound_predicate(vectorized::VExpr* expr, 
VExprContext* expr_ctx,
-                                                   SlotDescriptor* slot, 
ColumnValueRange<T>& range,
-                                                   PushDownType* pdt);
+    Status _normalize_binary_compound_predicate(vectorized::VExpr* expr, 
VExprContext* expr_ctx,
+                                                SlotDescriptor* slot, 
ColumnValueRange<T>& range,
+                                                PushDownType* pdt);
 
     template <PrimitiveType T>
-    Status _normalize_match_in_compound_predicate(vectorized::VExpr* expr, 
VExprContext* expr_ctx,
-                                                  SlotDescriptor* slot, 
ColumnValueRange<T>& range,
-                                                  PushDownType* pdt);
+    Status _normalize_in_and_not_in_compound_predicate(vectorized::VExpr* expr,
+                                                       VExprContext* expr_ctx, 
SlotDescriptor* slot,
+                                                       ColumnValueRange<T>& 
range,
+                                                       PushDownType* pdt);
+
+    template <PrimitiveType T>
+    Status _normalize_match_compound_predicate(vectorized::VExpr* expr, 
VExprContext* expr_ctx,
+                                               SlotDescriptor* slot, 
ColumnValueRange<T>& range,
+                                               PushDownType* pdt);
 
     template <PrimitiveType T>
     Status _normalize_is_null_predicate(vectorized::VExpr* expr, VExprContext* 
expr_ctx,
diff --git a/regression-test/data/inverted_index_p0/test_compound_inlist.out 
b/regression-test/data/inverted_index_p0/test_compound_inlist.out
new file mode 100644
index 00000000000..04db42cd9f4
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_compound_inlist.out
@@ -0,0 +1,61 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+17
+
+-- !sql --
+17
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+17
+
+-- !sql --
+17
+
+-- !sql --
+160
+
+-- !sql --
+160
+
+-- !sql --
+861
+
+-- !sql --
+861
+
+-- !sql --
+77
+
+-- !sql --
+77
+
+-- !sql --
+68
+
+-- !sql --
+68
+
+-- !sql --
+68
+
+-- !sql --
+68
+
+-- !sql --
+68
+
+-- !sql --
+68
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_compound_inlist.groovy 
b/regression-test/suites/inverted_index_p0/test_compound_inlist.groovy
new file mode 100644
index 00000000000..f3883819f49
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_compound_inlist.groovy
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_compound_inlist", "p0"){
+    def indexTbName1 = "test_compound_inlist_1"
+    def indexTbName2 = "test_compound_inlist_2"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+    sql """
+      CREATE TABLE ${indexTbName1} (
+        `@timestamp` int(11) NULL COMMENT "",
+        `clientip` varchar(20) NULL COMMENT "",
+        `request` text NULL COMMENT "",
+        `status` int(11) NULL COMMENT "",
+        `size` int(11) NULL COMMENT "",
+        INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT '',
+        INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "disable_auto_compaction" = "true"
+      );
+    """
+
+    sql "DROP TABLE IF EXISTS ${indexTbName2}"
+    sql """
+      CREATE TABLE ${indexTbName2} (
+        `@timestamp` int(11) NULL COMMENT "",
+        `clientip` varchar(20) NULL COMMENT "",
+        `request` text NULL COMMENT "",
+        `status` int(11) NULL COMMENT "",
+        `size` int(11) NULL COMMENT "",
+        INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT '',
+        INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT '',
+        INDEX status_idx (`status`) USING INVERTED COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "disable_auto_compaction" = "true"
+      );
+    """
+
+    def load_httplogs_data = {table_name, label, read_flag, format_flag, 
file_name, ignore_failure=false,
+                        expected_succ_rows = -1, load_to_single_tablet = 
'true' ->
+        
+        // load the json data
+        streamLoad {
+            table "${table_name}"
+            
+            // set http request header params
+            set 'label', label + "_" + UUID.randomUUID().toString()
+            set 'read_json_by_line', read_flag
+            set 'format', format_flag
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+            if (expected_succ_rows >= 0) {
+                set 'max_filter_ratio', '1'
+            }
+
+            // if declared a check callback, the default check condition will 
ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                       if (ignore_failure && expected_succ_rows < 0) { return }
+                    if (exception != null) {
+                        throw exception
+                    }
+                    log.info("Stream load result: ${result}".toString())
+                    def json = parseJson(result)
+                    assertEquals("success", json.Status.toLowerCase())
+                    if (expected_succ_rows >= 0) {
+                        assertEquals(json.NumberLoadedRows, expected_succ_rows)
+                    } else {
+                        assertEquals(json.NumberTotalRows, 
json.NumberLoadedRows + json.NumberUnselectedRows)
+                        assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes 
> 0)
+                }
+            }
+        }
+    }
+
+    try {
+        load_httplogs_data.call(indexTbName1, 'test_compound_list_1', 'true', 
'json', 'documents-1000.json')
+        load_httplogs_data.call(indexTbName2, 'test_compound_list_2', 'true', 
'json', 'documents-1000.json')
+
+        sql "sync"
+
+        qt_sql """ select count() from ${indexTbName1} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """
+        qt_sql """ select count() from ${indexTbName2} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """
+        
+        qt_sql """ select count() from ${indexTbName1} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status in (1, 2, 200)); """
+        qt_sql """ select count() from ${indexTbName2} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status in (1, 2, 200)); """
+
+        qt_sql """ select count() from ${indexTbName1} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status not in (1, 2, 304)); """
+        qt_sql """ select count() from ${indexTbName2} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status not in (1, 2, 304)); """
+
+        qt_sql """ select count() from ${indexTbName1} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status not in (1, 2, 200)); """
+        qt_sql """ select count() from ${indexTbName2} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) and status not in (1, 2, 200)); """
+
+        qt_sql """ select count() from ${indexTbName1} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) or status in (1, 2, 304)); """
+        qt_sql """ select count() from ${indexTbName2} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) or status in (1, 2, 304)); """
+
+        qt_sql """ select count() from ${indexTbName1} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) or status in (1, 2, 200)); """
+        qt_sql """ select count() from ${indexTbName2} where (((request 
match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 
'english' and clientip match_phrase '4')) or status in (1, 2, 200)); """
+      
+        qt_sql """ select count() from ${indexTbName1} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status in (1, 2, 304)); """
+        qt_sql """ select count() from ${indexTbName2} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status in (1, 2, 304)); """
+
+        qt_sql """ select count() from ${indexTbName1} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status in (1, 2, 200)); """
+        qt_sql """ select count() from ${indexTbName2} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status in (1, 2, 200)); """
+
+        qt_sql """ select count() from ${indexTbName1} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status not in (1, 2, 304)); """
+        qt_sql """ select count() from ${indexTbName2} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status not in (1, 2, 304)); """
+
+        qt_sql """ select count() from ${indexTbName1} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status in (1, 2, 304, 200) and status not in (1, 2, 
304)); """
+        qt_sql """ select count() from ${indexTbName2} where ((request 
match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' 
and clientip match_phrase '2')) or (((request match_phrase 'images' and 
clientip match_phrase '3') or (request match_phrase 'english' and clientip 
match_phrase '4')) and status in (1, 2, 304, 200) and status not in (1, 2, 
304)); """
+
+    } finally {
+        //try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to