This is an automated email from the ASF dual-hosted git repository. lide pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new 00bd7afb93 [Feature](add bitmap udaf) add the bitmap intersection and difference set for mixed calculation of udaf (#15588) (#19778) 00bd7afb93 is described below commit 00bd7afb93e128950418f3d721aa4c2e9d014601 Author: yuxuan-luo <119841515+yuxuan-...@users.noreply.github.com> AuthorDate: Thu May 18 14:59:26 2023 +0800 [Feature](add bitmap udaf) add the bitmap intersection and difference set for mixed calculation of udaf (#15588) (#19778) Co-authored-by: zhbinbin <16679214+zhbin...@users.noreply.github.com> Co-authored-by: zhangbinbin05 <zhangbinbi...@baidu.com> (cherry picked from commit ff9e03e2bf8ca5b4cdd6fe44b129db868e84ddb4) --- be/src/util/bitmap_expr_calculation.h | 216 +++++++++++++++++++++ be/src/util/bitmap_intersect.h | 5 +- .../aggregate_function_orthogonal_bitmap.cpp | 19 ++ .../aggregate_function_orthogonal_bitmap.h | 97 +++++++++ .../apache/doris/analysis/FunctionCallExpr.java | 4 +- .../apache/doris/catalog/AggregateFunction.java | 1 + .../java/org/apache/doris/catalog/FunctionSet.java | 43 ++++ 7 files changed, 381 insertions(+), 4 deletions(-) diff --git a/be/src/util/bitmap_expr_calculation.h b/be/src/util/bitmap_expr_calculation.h new file mode 100644 index 0000000000..24784cc957 --- /dev/null +++ b/be/src/util/bitmap_expr_calculation.h @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once +#include <stack> +#include <string> + +#include "util/bitmap_intersect.h" + +namespace doris { + +// Compute the intersection union difference set of two or more bitmaps +// Usage: orthogonal_bitmap_parse_calculate(bitmap_column, filter_column, input_string) +// Example: orthogonal_bitmap_expr_calculate(user_id, event, '(A|B)&(C-D)'), meaning find the intersection union difference set of user_id in all A/B/C/D 4 bitmaps +// Operation symbol: +// the operator '|' stands for union, the operator '&' stands for intersection, the operator '-' indicates the difference set, the operator '^' stands for xor +class BitmapExprCalculation : public BitmapIntersect<std::string> { +public: + BitmapExprCalculation() = default; + + explicit BitmapExprCalculation(const char* src) { deserialize(src); } + + void bitmap_calculation_init(std::string& input_str) { + _polish = reverse_polish(input_str); + std::string bitmap_key; + for (int i = 0; i < _polish.length(); i++) { + char c = _polish.at(i); + if (c != '&' && c != '|' && c != '^' && c != '-' && c != ' ' && c != '\\') { + bitmap_key += c; + } else if (i != 0 && _polish.at(i - 1) == '\\') { + bitmap_key += c; + } else if (c == '\\') { + continue; + } else { + if (bitmap_key.length() > 0) { + add_key(bitmap_key); + bitmap_key.clear(); + } + } + } + if (bitmap_key.length() > 0) { + add_key(bitmap_key); + bitmap_key.clear(); + } + } + + BitmapValue bitmap_calculate() { + std::stack<BitmapValue> values; + std::string bitmap_key; + for (int i = 0; i < _polish.length(); i++) { + char c = _polish.at(i); + if (c == ' ') { + if (bitmap_key.length() > 0) { + values.push(_bitmaps[bitmap_key]); + bitmap_key.clear(); + } + } else if (c != '&' && c != '|' && c != '^' && c != '-' && c != '\\') { + bitmap_key += c; + } else if (i != 0 && _polish.at(i - 1) == '\\') { + bitmap_key += c; + } else if (c == '\\') { + continue; + } else { + if (bitmap_key.length() > 0) { + values.push(_bitmaps[bitmap_key]); + bitmap_key.clear(); + } + if (values.size() >= 2) { + BitmapValue op_a = values.top(); + values.pop(); + BitmapValue op_b = values.top(); + values.pop(); + BitmapValue cal_result; + bitmap_calculate(op_a, op_b, c, cal_result); + values.push(cal_result); + } + } + } + BitmapValue result; + if (bitmap_key.length() > 0) { + result |= _bitmaps[bitmap_key]; + } else if (!values.empty()) { + result |= values.top(); + } + return result; + } + + // calculate the bitmap value by expr bitmap calculate + int64_t bitmap_calculate_count() { + if (_bitmaps.empty()) { + return 0; + } + return bitmap_calculate().cardinality(); + } + +private: + constexpr int priority(char c) { + switch (c) { + case '&': + return 1; + case '|': + return 1; + case '^': + return 1; + case '-': + return 1; + default: + return 0; + } + } + + template <class T> + std::string print_stack(std::stack<T>& stack) { + std::string result; + while (!stack.empty()) { + result = stack.top() + result; + stack.pop(); + } + return result; + } + + std::string reverse_polish(const std::string& input_str) { + std::stack<char> polish; + std::stack<char> op_stack; + bool last_is_char = false; + for (int i = 0; i < input_str.length(); i++) { + char cur_char = input_str.at(i); + if (cur_char != '&' && cur_char != '|' && cur_char != '^' && cur_char != '-' && + cur_char != '(' && cur_char != ')' && cur_char != ' ' && cur_char != '\t') { + if (!last_is_char) { + polish.push(' '); + } + polish.push(cur_char); + last_is_char = true; + continue; + } else if (i != 0 && input_str.at(i - 1) == '\\') { + polish.push(cur_char); + last_is_char = true; + continue; + } else if (cur_char == ' ' || cur_char == '\t') { + last_is_char = false; + continue; + } else if (cur_char == '(') { + op_stack.push(cur_char); + } else if (!op_stack.empty() && cur_char == ')') { + while (!op_stack.empty() && op_stack.top() != '(') { + polish.push(op_stack.top()); + op_stack.pop(); + } + op_stack.pop(); + } else { + if (!op_stack.empty() && op_stack.top() == '(') { + op_stack.push(cur_char); + } else { + if (!op_stack.empty() && priority(cur_char) > priority(op_stack.top())) { + op_stack.push(cur_char); + } else { + while (!op_stack.empty()) { + if (op_stack.top() == '(') { + break; + } + if (priority(cur_char) <= priority(op_stack.top())) { + polish.push(op_stack.top()); + op_stack.pop(); + } else { + break; + } + } + op_stack.push(cur_char); + } + } + } + last_is_char = false; + } + + while (!op_stack.empty()) { + polish.push(op_stack.top()); + op_stack.pop(); + } + return print_stack(polish); + } + + void bitmap_calculate(BitmapValue& op_a, BitmapValue& op_b, char op, BitmapValue& result) { + result |= op_b; + switch (op) { + case '&': + result &= op_a; + break; + case '|': + result |= op_a; + break; + case '-': + result -= op_a; + break; + case '^': + result ^= op_a; + break; + } + } + + std::string _polish; +}; +} // namespace doris diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h index 94ff8dc283..a6f3428acb 100644 --- a/be/src/util/bitmap_intersect.h +++ b/be/src/util/bitmap_intersect.h @@ -172,7 +172,6 @@ inline void Helper::read_from<std::string>(const char** src, std::string* result *src += length; } // read_from end - } // namespace detail // Calculate the intersection of two or more bitmaps @@ -262,7 +261,7 @@ public: } } -private: +protected: std::map<T, BitmapValue> _bitmaps; }; @@ -349,7 +348,7 @@ public: } } -private: +protected: phmap::flat_hash_map<std::string, BitmapValue> _bitmaps; }; diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp index 4194befae2..f3327eff1f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp @@ -68,6 +68,18 @@ AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_intersect_count name, argument_types, parameters, result_is_nullable); } +AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_expr_calculate( + const std::string& name, const DataTypes& argument_types, const Array& parameters, bool result_is_nullable) { + return create_aggregate_function_orthogonal<AggOrthBitMapExprCal>(name, argument_types, parameters, + result_is_nullable); +} + +AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_expr_calculate_count( + const std::string& name, const DataTypes& argument_types, const Array& parameters, bool result_is_nullable) { + return create_aggregate_function_orthogonal<AggOrthBitMapExprCalCount>(name, argument_types, parameters, + result_is_nullable); +} + AggregateFunctionPtr create_aggregate_function_intersect_count(const std::string& name, const DataTypes& argument_types, const Array& parameters, @@ -94,5 +106,12 @@ void register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactor create_aggregate_function_orthogonal_bitmap_union_count); factory.register_function("intersect_count", create_aggregate_function_intersect_count); + + factory.register_function("orthogonal_bitmap_expr_calculate", + create_aggregate_function_orthogonal_bitmap_expr_calculate); + + factory.register_function( + "orthogonal_bitmap_expr_calculate_count", + create_aggregate_function_orthogonal_bitmap_expr_calculate_count); } } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h index 0c8b8b9b20..1dd5f6a93d 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h @@ -18,6 +18,7 @@ #pragma once #include "exprs/bitmap_function.h" +#include "util/bitmap_expr_calculation.h" #include "util/bitmap_intersect.h" #include "util/bitmap_value.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -178,6 +179,102 @@ private: Int64 result = 0; }; +template <typename T> +struct AggOrthBitmapExprCalBaseData { +public: + using ColVecData = std::conditional_t<IsNumber<T>, ColumnVector<T>, ColumnString>; + + void add(const IColumn** columns, size_t row_num) { + const auto& bitmap_col = static_cast<const ColumnBitmap&>(*columns[0]); + const auto& data_col = static_cast<const ColVecData&>(*columns[1]); + const auto& bitmap_value = bitmap_col.get_element(row_num); + std::string update_key = data_col.get_data_at(row_num).to_string(); + bitmap_expr_cal.update(update_key, bitmap_value); + } + + void init_add_key(const IColumn** columns, size_t row_num, int argument_size) { + if (first_init) { + DCHECK(argument_size > 1); + const auto& col = static_cast<const ColVecData&>(*columns[2]); + std::string expr = col.get_data_at(row_num).to_string(); + bitmap_expr_cal.bitmap_calculation_init(expr); + first_init = false; + } + } + +protected: + doris::BitmapExprCalculation bitmap_expr_cal; + bool first_init = true; +}; + +template <typename T> +struct AggOrthBitMapExprCal : public AggOrthBitmapExprCalBaseData<T> { +public: + static constexpr auto name = "orthogonal_bitmap_expr_calculate"; + + static DataTypePtr get_return_type() { return std::make_shared<DataTypeBitMap>(); } + + void merge(const AggOrthBitMapExprCal& rhs) { + if (rhs.first_init) { + return; + } + result |= rhs.result; + } + + void write(BufferWritable& buf) { + write_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf); + result = AggOrthBitmapExprCalBaseData<T>::bitmap_expr_cal.bitmap_calculate(); + DataTypeBitMap::serialize_as_stream(result, buf); + } + + void read(BufferReadable& buf) { + read_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf); + DataTypeBitMap::deserialize_as_stream(result, buf); + } + + void get(IColumn& to) const { + auto& column = static_cast<ColumnBitmap&>(to); + column.get_data().emplace_back(result); + } + +private: + BitmapValue result; +}; + +template <typename T> +struct AggOrthBitMapExprCalCount : public AggOrthBitmapExprCalBaseData<T> { +public: + static constexpr auto name = "orthogonal_bitmap_expr_calculate_count"; + + static DataTypePtr get_return_type() { return std::make_shared<DataTypeInt64>(); } + + void merge(const AggOrthBitMapExprCalCount& rhs) { + if (rhs.first_init) { + return; + } + result += rhs.result; + } + + void write(BufferWritable& buf) { + write_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf); + result = AggOrthBitmapExprCalBaseData<T>::bitmap_expr_cal.bitmap_calculate_count(); + write_binary(result, buf); + } + + void read(BufferReadable& buf) { + read_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf); + read_binary(result, buf); + } + + void get(IColumn& to) const { + auto& column = static_cast<ColumnVector<Int64>&>(to); + column.get_data().emplace_back(result); + } + +private: + int64_t result = 0; +}; + template <typename T> struct OrthBitmapUnionCountData { static constexpr auto name = "orthogonal_bitmap_union_count"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 8ef4b36c88..c9f25a3f8b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -797,7 +797,9 @@ public class FunctionCallExpr extends Expr { if (fnName.getFunction().equalsIgnoreCase(FunctionSet.INTERSECT_COUNT) || fnName.getFunction() .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT) || fnName.getFunction() - .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT)) { + .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT) || fnName.getFunction() + .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT) || fnName.getFunction() + .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE)) { if (children.size() <= 2) { throw new AnalysisException(fnName + "(bitmap_column, column_to_filter, filter_values) " + "function requires at least three parameters"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java index 09cb22b59e..5e2a44ea23 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java @@ -50,6 +50,7 @@ public class AggregateFunction extends Function { "dense_rank", "multi_distinct_count", "multi_distinct_sum", FunctionSet.HLL_UNION_AGG, FunctionSet.HLL_UNION, FunctionSet.HLL_RAW_AGG, FunctionSet.BITMAP_UNION, FunctionSet.BITMAP_INTERSECT, FunctionSet.ORTHOGONAL_BITMAP_INTERSECT, FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT, + FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT, FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE, FunctionSet.INTERSECT_COUNT, FunctionSet.ORTHOGONAL_BITMAP_UNION_COUNT, FunctionSet.COUNT, "approx_count_distinct", "ndv", FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize", FunctionSet.WINDOW_FUNNEL, FunctionSet.RETENTION, diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index b6c69c7708..079fa823e9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -925,6 +925,8 @@ public class FunctionSet<T> { public static final String ORTHOGONAL_BITMAP_INTERSECT = "orthogonal_bitmap_intersect"; public static final String ORTHOGONAL_BITMAP_INTERSECT_COUNT = "orthogonal_bitmap_intersect_count"; public static final String ORTHOGONAL_BITMAP_UNION_COUNT = "orthogonal_bitmap_union_count"; + public static final String ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT = "orthogonal_bitmap_expr_calculate_count"; + public static final String ORTHOGONAL_BITMAP_EXPR_CALCULATE = "orthogonal_bitmap_expr_calculate"; public static final String QUANTILE_UNION = "quantile_union"; //TODO(weixiang): is quantile_percent can be replaced by approx_percentile? @@ -2394,6 +2396,47 @@ public class FunctionSet<T> { Lists.newArrayList(Type.BITMAP, t, t), Type.BIGINT, Type.BITMAP, true, "", "", "", "", "", "", "", true, false, true, true)); } + + Type[] ntypes = {Type.CHAR, Type.VARCHAR, Type.STRING}; + for (Type t : ntypes) { + addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE, + Lists.newArrayList(Type.BITMAP, t, Type.STRING), + Type.BITMAP, + Type.VARCHAR, + true, + "_ZN5doris15BitmapFunctions37orthogonal_bitmap_expr_calculate_initEPN9doris_udf15FunctionContextEPNS1_9StringValE", + "_ZN5doris15BitmapFunctions39orthogonal_bitmap_expr_calculate_updateEPN9doris_udf15FunctionContextERKNS1_9StringValES6_iPS5_S7_", + "_ZN5doris15BitmapFunctions12bitmap_unionEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_", + "_ZN5doris15BitmapFunctions42orthogonal_bitmap_expr_calculate_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE", + "", + "", + "_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE", + true, false, true)); + + addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT, + Lists.newArrayList(Type.BITMAP, t, Type.STRING), + Type.BIGINT, + Type.VARCHAR, + true, + "_ZN5doris15BitmapFunctions43orthogonal_bitmap_expr_calculate_count_initEPN9doris_udf15FunctionContextEPNS1_9StringValE", + "_ZN5doris15BitmapFunctions39orthogonal_bitmap_expr_calculate_updateEPN9doris_udf15FunctionContextERKNS1_9StringValES6_iPS5_S7_", + "_ZN5doris15BitmapFunctions29orthogonal_bitmap_count_mergeEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_", + "_ZN5doris15BitmapFunctions48orthogonal_bitmap_expr_calculate_count_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE", + "", + "", + "_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE", + true, false, true)); + + //vec ORTHOGONAL_BITMAP_EXPR_CALCULATE and ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT + addBuiltin( + AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE, Lists.newArrayList(Type.BITMAP, t, Type.STRING), + Type.BITMAP, Type.BITMAP, true, "", "", "", "", "", "", "", true, false, true, true)); + + addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT, + Lists.newArrayList(Type.BITMAP, t, Type.STRING), Type.BIGINT, Type.BITMAP, true, "", "", "", "", "", "", "", + true, false, true, true)); + } + // bitmap addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION, Lists.newArrayList(Type.BITMAP), Type.BITMAP, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org